diff --git a/.github/workflows/dockerhub-main-push.yml b/.github/workflows/dockerhub-main-push.yml index 8f8dc0d..dbec67d 100644 --- a/.github/workflows/dockerhub-main-push.yml +++ b/.github/workflows/dockerhub-main-push.yml @@ -1,8 +1,10 @@ name: DockerHub Main Push on: - push: - branches: [ "main" ] + workflow_run: + workflows: ["Unittest Main Push"] + types: + - completed jobs: build: diff --git a/.github/workflows/unittest-dev.yml b/.github/workflows/unittest-dev.yml new file mode 100644 index 0000000..918ddd3 --- /dev/null +++ b/.github/workflows/unittest-dev.yml @@ -0,0 +1,30 @@ +name: Unittest Dev + +on: + pull_request: + branches: [ "dev" ] + push: + branches: [ "dev" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install python dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Test with unittest + run: | + python -m unittest discover ./webscrapers/unittests/utilities diff --git a/.github/workflows/unittest-main-pr.yml b/.github/workflows/unittest-main-pr.yml new file mode 100644 index 0000000..73fe9d1 --- /dev/null +++ b/.github/workflows/unittest-main-pr.yml @@ -0,0 +1,28 @@ +name: Unittest Main Pull Request + +on: + pull_request: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install python dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Test with unittest + run: | + python -m unittest discover ./webscrapers/unittests/utilities diff --git a/.github/workflows/unittest-main-push.yml b/.github/workflows/unittest-main-push.yml new file mode 100644 index 0000000..5fe6263 --- /dev/null +++ b/.github/workflows/unittest-main-push.yml @@ -0,0 +1,28 @@ +name: Unittest Main Push + +on: + push: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install python dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Test with unittest + run: | + python -m unittest discover ./webscrapers/unittests/utilities diff --git a/model/arch/crop_image.py b/model/arch/crop_image.py new file mode 100644 index 0000000..03960a4 --- /dev/null +++ b/model/arch/crop_image.py @@ -0,0 +1,39 @@ +import numpy as np +from beartype import beartype +from typing import Union + +@beartype +def crop_image( + image_array:np.array, + pt1_wh:Union[tuple,list], + pt2_wh:Union[tuple,list] + ) -> np.array: + """Crops an image array to specified combination of two diagonal points + + Parameters + --------- + image : numpy.array + The numpy image array to crop + pt1_wh : list, tuple + Diagonal point 1 coordinates for cropping the image + pt2_wh : list, tuple + Diagonal point 2 coordinates for cropping the image + + Returns + ------- + numpy.array + The cropped image array + """ + # extract out diagonal cropping points + (pt1_w, pt1_h) = pt1_wh + (pt2_w, pt2_h) = pt2_wh + # group height and width points + wpts = (pt1_h, pt2_h) + hpts = (pt1_w, pt2_w) + # extract out image shape + n_image_dims = len(image_array.shape) + if n_image_dims == 3: + crop_image_array = image_array[min(hpts):max(hpts), min(wpts):max(wpts), :] + else: + crop_image_array = image_array[min(hpts):max(hpts), min(wpts):max(wpts)] + return crop_image_array \ No newline at end of file diff --git a/model/data_prep.py b/model/arch/data_prep.py similarity index 89% rename from model/data_prep.py rename to model/arch/data_prep.py index 1d66338..f76a54f 100644 --- a/model/data_prep.py +++ b/model/arch/data_prep.py @@ -2,6 +2,7 @@ import os import sys import pickle +import logging import numpy as np import pandas as pd from sklearn.model_selection import train_test_split @@ -9,17 +10,23 @@ # load custom modules sys.path.append(os.getcwd()) import cons -from utilities.load_image import load_image -from utilities.greyscale_image import greyscale_image -from utilities.pad_image import pad_image -from utilities.plot_image import plot_image -from utilities.resize_image import resize_image +from arch.load_image import load_image +from arch.greyscale_image import greyscale_image +from arch.pad_image import pad_image +from arch.resize_image import resize_image -def data_prep(cons): +def data_prep(): + """ + Data preparation pipeline for generating the model training, testing and validation data. - """""" + Parameters + ---------- - print("Generating image file paths and classes ...") + Returns + ------- + """ + + logging.info("Generating image file paths and classes ...") if False: @@ -55,7 +62,7 @@ def data_prep(cons): # combine train and test files image_fpaths = {**train_image_fpaths, **test_image_fpaths} - print("Creating image dataframe ...") + logging.info("Creating image dataframe ...") # create list to hold image data image_data = [] @@ -78,7 +85,7 @@ def data_prep(cons): # convert image data object into a pandas dataframe image_dataframe = pd.DataFrame(image_data) - print("Padding images ...") + logging.info("Padding images ...") # find the largest image dimensions max_height = image_dataframe['image_shape'].apply(lambda x: x[0]).max() # height @@ -90,7 +97,7 @@ def data_prep(cons): # apply padding to standardize all images shapes image_dataframe['pad_image_array'] = image_dataframe['image_array'].apply(lambda x: pad_image(x, pad_shape_wh = pad_shape)) - print('Down sizing image ...') + logging.info('Down sizing image ...') # set down size shape downsize_shape = tuple([round(dim * 1/3) for dim in pad_shape]) @@ -98,7 +105,7 @@ def data_prep(cons): # apply resizing to downsize image shapes image_dataframe['pad_image_array'] = image_dataframe['pad_image_array'].apply(lambda x: resize_image(x, reshape_wh = downsize_shape)) - print('Splitting train set ...') + logging.info('Splitting train set ...') # subset the output image data sub_cols = ['image_fpath', 'pad_image_array', 'target', 'dataset'] @@ -136,8 +143,6 @@ def data_prep(cons): with open(cons.test_data_pickle_fpath, 'wb') as handle: pickle.dump(test_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) - return 0 - # if running as main programme if __name__ == "__main__": diff --git a/model/arch/greyscale_image.py b/model/arch/greyscale_image.py new file mode 100644 index 0000000..d6a6a73 --- /dev/null +++ b/model/arch/greyscale_image.py @@ -0,0 +1,31 @@ +import numpy as np +from beartype import beartype + +@beartype +def greyscale_image( + rgb_image_array:np.array, + keep_3dim:bool=True + ) -> np.array: + """ + Transforms an image array to greyscale + + Parameters + ---------- + rgb_image_array : numpy.array + The coloured numpy image array to transform to greyscale + keep_3dim : bool + Whether to keep the third dimension of the image array, default is True + + Returns + ------- + numpy.array + The transformed greyscale numpy image + """ + # apply grey scale transformation + grey_image_array = np.dot(rgb_image_array[:, :, :3], [0.2125, 0.7154, 0.0721]) + # floor transformed pixel floats + gray_img = grey_image_array.astype(np.uint8) + # if keeping third dimension + if keep_3dim: + gray_img = gray_img[..., np.newaxis] + return gray_img diff --git a/model/arch/keras_model.py b/model/arch/keras_model.py index 566f4ea..c4c04ef 100644 --- a/model/arch/keras_model.py +++ b/model/arch/keras_model.py @@ -12,9 +12,9 @@ sys.path.append(os.getcwd()) import cons from utilities.plot_image import plot_image -from arch.LeNet5 import LeNet5 +from keras.LeNet5 import LeNet5 from fit_model import fit_model -from plot_model import plot_model_fit +from utilities.plot_model import plot_model_fit print('Loading data ...') diff --git a/model/arch/load_image.py b/model/arch/load_image.py new file mode 100644 index 0000000..23d2e16 --- /dev/null +++ b/model/arch/load_image.py @@ -0,0 +1,24 @@ +import numpy as np +import cv2 +from beartype import beartype + +@beartype +def load_image(image_fpath:str) -> np.array: + """ + Loads an image file as an image array from disk + + Parameters + ---------- + image_fpath : str + The file path to the image file to load as an image array + + Returns + ------- + numpy.array + The loaded image array + """ + # load image from file path + image_imread = cv2.imread(image_fpath) + # convert to numpy array + rgb_image_array = np.array(image_imread) + return rgb_image_array \ No newline at end of file diff --git a/model/utilities/pad_image.py b/model/arch/pad_image.py similarity index 50% rename from model/utilities/pad_image.py rename to model/arch/pad_image.py index 21dbbcf..d471c9c 100644 --- a/model/utilities/pad_image.py +++ b/model/arch/pad_image.py @@ -1,7 +1,27 @@ import numpy as np +from beartype import beartype +from typing import Union -def pad_image(image_array, pad_shape_wh): - """""" +@beartype +def pad_image( + image_array:np.array, + pad_shape_wh:Union[list,tuple] + ) -> np.array: + """ + Pads an image array to a desired width and height + + Parameters + ---------- + image_array: np.array + The image array to pad to a specified dimension + pad_shape_wh : list, tuple + The desired dimensions to pad the input image array to + + Returns + ------- + numpy.array + The padded image array + """ image_array_shape = image_array.shape (img_h, img_w) = image_array_shape[0:2] (pad_img_w, pad_img_h) = pad_shape_wh diff --git a/model/arch/resize_image.py b/model/arch/resize_image.py new file mode 100644 index 0000000..e5f80b3 --- /dev/null +++ b/model/arch/resize_image.py @@ -0,0 +1,38 @@ +import numpy as np +import cv2 +from beartype import beartype +from typing import Union + +@beartype +def resize_image( + image_array:np.array, + reshape_wh:Union[list,tuple], + interpolation=cv2.INTER_LINEAR, + keep_3dim:bool=True + ) -> np.array: + """ + Resizes a numpy image array to a specified shape width and height + + Parameters + ---------- + image_array : numpy.array + The image array to reshape to new dimensions + reshape_wh : list, tuple + The dimensions to reshape the image array to + interpolation : cv2.INTER_LINEAR + The interpolation function for reshaping the image array, default is cv2.INTER_LINEAR + keep_3dim : bool + Whether to maintain the third dimension of the input numpy image array, default is True + + Returns + ------- + numpy.array + The reshaped numpy image array + """ + # rescale the image either by shrinking or expanding + res_image_array = cv2.resize(image_array, dsize = reshape_wh, interpolation=interpolation) + # keep 3dim; when applying resizing to (:, :, 1) shape images + if keep_3dim and len(res_image_array.shape) == 2: + res_image_array = res_image_array[..., np.newaxis] + + return res_image_array \ No newline at end of file diff --git a/model/exeKerasModel.cmd b/model/exeKerasModel.cmd new file mode 100644 index 0000000..adaca20 --- /dev/null +++ b/model/exeKerasModel.cmd @@ -0,0 +1 @@ +call python -m pdb prg_keras_model.py --run_model_training --run_testset_prediction \ No newline at end of file diff --git a/model/exeKerasModel.sh b/model/exeKerasModel.sh new file mode 100644 index 0000000..4dd185d --- /dev/null +++ b/model/exeKerasModel.sh @@ -0,0 +1 @@ +python -m pdb prg_keras_model.py --run_model_training --run_testset_prediction \ No newline at end of file diff --git a/model/exeUnittests.cmd b/model/exeUnittests.cmd new file mode 100644 index 0000000..780f944 --- /dev/null +++ b/model/exeUnittests.cmd @@ -0,0 +1 @@ +call python -m unittest discover unittests\utilities \ No newline at end of file diff --git a/model/keras/AlexNet8.py b/model/keras/AlexNet8.py index 1300584..40b92e9 100644 --- a/model/keras/AlexNet8.py +++ b/model/keras/AlexNet8.py @@ -1,14 +1,32 @@ # load in relevant libraries +from beartype import beartype +from typing import Union from keras.models import Model from keras.layers import Flatten, Dense, Input, Dropout from keras.layers import Conv2D, MaxPooling2D -def AlexNet8(input_shape = (227, 227, 3), - n_classes = 1000, - output_activation = 'softmax' - ): - +@beartype +def AlexNet8( + input_shape:Union[list,tuple]=(227,227,3), + n_classes:int=1000, + output_activation:str='softmax' + ) -> Model: """ + AlexNet8 Keras model + + Parameters + ---------- + input_shape : list,tuple + The dimensions of the input image arrays, default is (227.227,3) + n_classes : int + The number of output classes to classify for, default is 1000 + output_activation : str + The type of activation function to use, default is softmax + + Returns + ------- + Model + The keras AlexNet8 model """ # set input shapes diff --git a/model/keras/LeNet5.py b/model/keras/LeNet5.py index a3b7373..cf08cb4 100644 --- a/model/keras/LeNet5.py +++ b/model/keras/LeNet5.py @@ -1,90 +1,31 @@ -# -*- coding: utf-8 -*- -""" -Created on Sun Jan 31 15:22:57 2021 -@author: oislen -""" - -# load in relevant libraries +from beartype import beartype +from typing import Union from keras.models import Model from keras.layers import Flatten, Dense, Input, Dropout from keras.layers import Conv2D, AveragePooling2D -def LeNet5(input_shape = (28, 28, 1), - n_classes = 10, - output_activation = 'softmax' - ): - +@beartype +def LeNet5( + input_shape:Union[list,tuple]=(28,28,1), + n_classes:int=10, + output_activation:str='softmax' + ) -> Model: """ - - LeNet5 Documentation - - Function Overview - - This function generates a LeNet5 Model architecture: - - 1. Conv2D - - filters: 32 - - kernal: 5 x 5 - - activation: relu - - padding: same - - MaxPooling2D - - pool: 2 x 2 - - 2. Conv2D - - filters: 32 - - kernal: 5 x 5 - - activation: relu - - padding: same - - MaxPooling2D - - pool: 2 x 2 - - Flatten - - 3. Dense - - units: 128 - - activation: relu - - Dropout - - rate: 0.25 - - 4. Dense - - units: 64 - - activation: relu - - Dropout - - 0.25 rate - - 5. Dense - - units: n classes - - activation: output_activation - - Defaults - - LeNet5(input_shape = (28, 28, 1), - n_classes = 10, - output_activation = 'softmax', - name = 'LeNet5' - ) + LeNet5 keras model Parameters - - input_shape - the input image shape / dimensions - n_classes - the number of target classes + ---------- + input_shape : list,tuple + the input image shape / dimensions, default is (28,28,1) + n_classes : int + The number of target classes, default is 10 + output_activation : str + The type of activation function to use, default is softmax Returns - - model - keras.Model, the LeNet model - - Example - - LeNet5(input_shape = (28, 28, 1), - n_classes = 10, - output_activation = 'softmax', - name = 'LeNet5' - ) - + ------- + Model + Keras.Model, the LeNet model """ # set input shapes diff --git a/model/keras/ResNet50_pretrained.py b/model/keras/ResNet50_pretrained.py index 6d56720..e561bf4 100644 --- a/model/keras/ResNet50_pretrained.py +++ b/model/keras/ResNet50_pretrained.py @@ -1,13 +1,34 @@ +from beartype import beartype +from typing import Union from keras.layers import Dropout, Dense, Flatten from keras.applications import ResNet50 from keras.models import Model -def ResNet50_pretrained(input_shape = (224, 224, 3), - n_classes = 2, - output_activation = 'softmax' - ): - +@beartype +def ResNet50_pretrained( + input_shape:Union[list,tuple]=(224,224,3), + n_classes:int=2, + output_activation:str='softmax' + ) -> Model: """ + Pretrained ResNet50 keras model + + Parameters + ---------- + input_shape : list,tuple + the input image shape / dimensions, default is (224,224,3) + n_classes : int + The number of target classes, default is 2 + output_activation : str + The type of activation function to use, default is softmax + + Returns + ------- + Model + Keras.Model, the pretrained ResNet50 model + + Example + ------- model = ResNet50_pretrained(input_shape = (224, 224, 3), n_classes = 2, output_activation = 'softmax', @@ -41,7 +62,4 @@ def ResNet50_pretrained(input_shape = (224, 224, 3), model = Model(inputs = ResNet50_pretrained.input, outputs = x, name = 'ResNet50_pretrained') - return model - - - + return model \ No newline at end of file diff --git a/model/keras/UNet.py b/model/keras/UNet.py index e860445..75873d6 100644 --- a/model/keras/UNet.py +++ b/model/keras/UNet.py @@ -1,3 +1,8 @@ +from beartype import beartype +from typing import Union +from keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose +from keras.layers.merge import concatenate +import tensorflow as tf def conv_block(inputs=None, n_filters=32, dropout_prob=0, max_pooling=True): """ diff --git a/model/keras/VGG16.py b/model/keras/VGG16.py index 05ed56e..dc26bfe 100644 --- a/model/keras/VGG16.py +++ b/model/keras/VGG16.py @@ -1,14 +1,30 @@ -# load in relevant libraries +from beartype import beartype +from typing import Union from keras.models import Model from keras.layers import Flatten, Dense, Input, Dropout from keras.layers import Conv2D, MaxPooling2D -def VGG16(input_shape = (224, 224, 3), - n_classes = 1000, - output_activation = 'softmax' - ): - +@beartype +def VGG16(input_shape:Union[list,tuple]=(224,224,3), + n_classes:int=1000, + output_activation:str='softmax' + ) -> Model: """ + VGG16 keras model + + Parameters + ---------- + input_shape : list,tuple + the input image shape / dimensions, default is (224,224,3) + n_classes : int + The number of target classes, default is 1000 + output_activation : str + The type of activation function to use, default is softmax + + Returns + ------- + Model + Keras.Model, the VGG16 model """ # set input shapes diff --git a/model/keras/VGG16_pretrained.py b/model/keras/VGG16_pretrained.py index 85af564..c7a0729 100644 --- a/model/keras/VGG16_pretrained.py +++ b/model/keras/VGG16_pretrained.py @@ -1,12 +1,34 @@ +from beartype import beartype +from typing import Union from keras.layers import Dropout, Dense, GlobalMaxPooling2D from keras.applications import VGG16 from keras.models import Model -def VGG16_pretrained(input_shape = (224, 224, 3), - n_classes = 1, - output_activation = 'sigmoid' - ): +@beartype +def VGG16_pretrained( + input_shape:Union[list,tuple]=(224,224,3), + n_classes:int=1, + output_activation:str='sigmoid' + ) -> Model: """ + Pretrained VGG16 keras model + + Parameters + ---------- + input_shape : list,tuple + the input image shape / dimensions, default is (224,224,3) + n_classes : int + The number of target classes, default is 1 + output_activation : str + The type of activation function to use, default is sigmoid + + Returns + ------- + Model + Keras.Model, the pretrained VGG16 model + + Example + ------- model = VGG16_pretrained(input_shape = (224, 224, 3), n_classes = 1, output_activation = 'sigmoid', diff --git a/model/prg_keras_model.py b/model/prg_keras_model.py index 371f790..30570cd 100644 --- a/model/prg_keras_model.py +++ b/model/prg_keras_model.py @@ -147,7 +147,7 @@ logging.info("Plot example test set predictions...") # plot random sample predictions - plot_preds(data=test_df, cons=cons, output_fpath=cons.keras_pred_images_fpath, show_plot=False) + plot_preds(data=test_df, output_fpath=cons.keras_pred_images_fpath, show_plot=False) timeLogger.logTime(parentKey="Plots", subKey="TestSetPredictions") logging.info("Generate a sample submission file for kaggle...") diff --git a/model/prg_torch_model.py b/model/prg_torch_model.py index 1dc57b1..2cc37d8 100644 --- a/model/prg_torch_model.py +++ b/model/prg_torch_model.py @@ -165,7 +165,7 @@ logging.info("Plot example test set predictions...") # plot random sample predictions - plot_preds(data=test_df, cons=cons, output_fpath=cons.torch_pred_images_fpath, show_plot=False) + plot_preds(data=test_df, output_fpath=cons.torch_pred_images_fpath, show_plot=False) timeLogger.logTime(parentKey="Plots", subKey="TestSetPredictions") logging.info("Generate a sample submission file for kaggle...") diff --git a/model/unittests/utilities/test_TimeIt.py b/model/unittests/utilities/test_TimeIt.py new file mode 100644 index 0000000..11cfdfa --- /dev/null +++ b/model/unittests/utilities/test_TimeIt.py @@ -0,0 +1,34 @@ +import unittest +import os +import sys + +sys.path.append(os.path.join(os.getcwd(), "models")) + +import cons +from utilities.TimeIt import TimeIt + +parentKey="Test Parent Key" +subKey="Test Sub Key" +exp_log_keys = ["parentKey", "subKey", "stepTime", "cumulativeTime"] +obs_timeit = TimeIt() +obs_timeit.logTime(parentKey=parentKey, subKey=subKey) +obs_log_keys = list(obs_timeit.log[0].keys()) + +class Test_TimeIt(unittest.TestCase): + """""" + + def setUp(self): + self.exp_log_keys = exp_log_keys + self.obs_log_keys = obs_log_keys + + def test_type(self): + self.assertEqual(type(self.obs_log_keys), type(self.exp_log_keys)) + + def test_len(self): + self.assertEqual(len(self.obs_log_keys), len(self.exp_log_keys)) + + def test_values(self): + self.assertEqual(self.obs_log_keys, self.exp_log_keys) + +if __name__ == "__main__": + unittest.main() diff --git a/model/utilities/TimeIt.py b/model/utilities/TimeIt.py index 3be6215..c056c13 100644 --- a/model/utilities/TimeIt.py +++ b/model/utilities/TimeIt.py @@ -1,12 +1,12 @@ import logging +from beartype import beartype +from typing import Union from time import time class TimeIt(): - """ - """ def __init__(self): - """ + """An object for timing code execution times, records time step and cumulative time. """ self.log = [] self.currentTime = time() @@ -14,8 +14,20 @@ def __init__(self): self.stepTime = None self.cumulativeTime = 0.0 - def logTime(self, parentKey, subKey=None): - """ + @beartype + def logTime( + self, + parentKey:str, + subKey:Union[str,None]=None + ): + """Sets a timestamp for code execution step + + Parameters + ---------- + paraentKey : str + The parent key to label the code execution step + subKey : str + The sub key to label the code execution step, default is None """ self.previousTime = self.currentTime self.currentTime = time() @@ -24,4 +36,12 @@ def logTime(self, parentKey, subKey=None): logEntry = {"parentKey":parentKey, "subKey":subKey, "stepTime":self.stepTime, "cumulativeTime":self.cumulativeTime} logging.info(logEntry) self.log.append(logEntry) - \ No newline at end of file + + def reset(self): + """Resets the timing object + """ + self.log = [] + self.currentTime = time() + self.previousTime = None + self.stepTime = None + self.cumulativeTime = 0.0 \ No newline at end of file diff --git a/model/utilities/crop_image.py b/model/utilities/crop_image.py deleted file mode 100644 index e09a3c7..0000000 --- a/model/utilities/crop_image.py +++ /dev/null @@ -1,15 +0,0 @@ -def crop_image(image_array, pt1_wh, pt2_wh): - """""" - # extract out diagonal cropping points - (pt1_w, pt1_h) = pt1_wh - (pt2_w, pt2_h) = pt2_wh - # group height and width points - wpts = (pt1_h, pt2_h) - hpts = (pt1_w, pt2_w) - # extract out image shape - n_image_dims = len(image_array.shape) - if n_image_dims == 3: - crop_image_array = image_array[min(hpts):max(hpts), min(wpts):max(wpts), :] - else: - crop_image_array = image_array[min(hpts):max(hpts), min(wpts):max(wpts)] - return crop_image_array \ No newline at end of file diff --git a/model/utilities/greyscale_image.py b/model/utilities/greyscale_image.py deleted file mode 100644 index b7a30e4..0000000 --- a/model/utilities/greyscale_image.py +++ /dev/null @@ -1,12 +0,0 @@ -import numpy as np - -def greyscale_image(rgb_image_array, keep_3dim = True): - """""" - # apply grey scale transformation - grey_image_array = np.dot(rgb_image_array[:, :, :3], [0.2125, 0.7154, 0.0721]) - # floor transformed pixel floats - gray_img = grey_image_array.astype(np.uint8) - # if keeping third dimension - if keep_3dim: - gray_img = gray_img[..., np.newaxis] - return gray_img diff --git a/model/utilities/load_image.py b/model/utilities/load_image.py deleted file mode 100644 index a5db503..0000000 --- a/model/utilities/load_image.py +++ /dev/null @@ -1,10 +0,0 @@ -import numpy as np -import cv2 - -def load_image(image_fpath): - """""" - # load image from file path - image_imread = cv2.imread(image_fpath) - # convert to numpy array - rgb_image_array = np.array(image_imread) - return rgb_image_array \ No newline at end of file diff --git a/model/utilities/plot_generator.py b/model/utilities/plot_generator.py index bf76cca..5e0a7cd 100644 --- a/model/utilities/plot_generator.py +++ b/model/utilities/plot_generator.py @@ -3,8 +3,14 @@ from beartype import beartype @beartype -def plot_generator(generator, mode:str='keras', output_fpath:Union[str,None]=None, show_plot:bool=True): - """Plots multiple images from a generator. +def plot_generator( + generator, + mode:str='keras', + output_fpath:Union[str,None]=None, + show_plot:bool=True + ): + """ + Plots multiple images from a generator. Parameters ---------- diff --git a/model/utilities/plot_image.py b/model/utilities/plot_image.py index 187cae1..abdd2bf 100644 --- a/model/utilities/plot_image.py +++ b/model/utilities/plot_image.py @@ -1,10 +1,16 @@ import matplotlib.pyplot as plt +import numpy as np from beartype import beartype from typing import Union @beartype -def plot_image(image_array, output_fpath:Union[str,None]=None, show_plot:bool=True): - """Plots an image array. +def plot_image( + image_array, + output_fpath:Union[str,None]=None, + show_plot:bool=True + ): + """ + Plots an image array. Parameters ---------- diff --git a/model/utilities/plot_model.py b/model/utilities/plot_model.py index 179e280..316cdde 100644 --- a/model/utilities/plot_model.py +++ b/model/utilities/plot_model.py @@ -1,43 +1,34 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Feb 2 11:37:53 2021 -@author: oislen -""" - # load relevant libraries import os +from beartype import beartype +from typing import Union from matplotlib import pyplot as plt -def plot_model_fit(model_fit, output_fdir = None, show_plot:bool=True): - +@beartype +def plot_model_fit( + model_fit, + output_fdir:Union[str,None]=None, + show_plot:bool=True + ): """ - - Plot Model Fit Documentation - - Function Overview - - This function plots the model's fit during training in relation to a validation set. - - Defaults - - plot_model_fit(model_fit) + Plots the model's fit during training in relation to a validation set. Parameters + ---------- - model_fit - model.predict(), the Keras model predict object + model_fit : model.predict(), + the Keras model predict object + output_fdir : str + The file path to save the model fit plots to disk, default is None show_plot : bool Whether to show the generated plot, default is True Returns - - 0 for successful execution - - Exmaple - - plot_model_fit(model_fit = model_fit) - - Source - + ------- + + Reference + ---------- + https://github.com/jiadaizhao/Advanced-Machine-Learning-Specialization """ @@ -88,6 +79,4 @@ def plot_model_fit(model_fit, output_fdir = None, show_plot:bool=True): plt.savefig(loss_output_fpath) if show_plot: plt.show() - plt.close() - - return 0 \ No newline at end of file + plt.close() \ No newline at end of file diff --git a/model/utilities/plot_preds.py b/model/utilities/plot_preds.py index 615a726..7850dee 100644 --- a/model/utilities/plot_preds.py +++ b/model/utilities/plot_preds.py @@ -1,11 +1,30 @@ import os +from beartype import beartype import matplotlib.pyplot as plt +import pandas as pd from tensorflow.keras.preprocessing.image import load_img +import cons -def plot_preds(data, cons, output_fpath = None, show_plot:bool=True): +@beartype +def plot_preds( + data:pd.DataFrame, + output_fpath:str = None, + show_plot:bool=True + ): """ + Shows model predictions as a grid of images with labels + + Parameters + ---------- + data : pandas.DataFrame + A dataframe of image arrays and associated labels + output_fpath : str + The file path to save the plot of predictions, default is None show_plot : bool Whether to show the generated plot, default is True + + Returns + ------- """ sample_test = data.head(18) plt.figure(figsize=(12, 24)) @@ -23,5 +42,4 @@ def plot_preds(data, cons, output_fpath = None, show_plot:bool=True): plt.savefig(output_fpath) if show_plot: plt.show() - plt.close() - return 0 \ No newline at end of file + plt.close() \ No newline at end of file diff --git a/model/utilities/resize_image.py b/model/utilities/resize_image.py deleted file mode 100644 index 6c44240..0000000 --- a/model/utilities/resize_image.py +++ /dev/null @@ -1,12 +0,0 @@ -import numpy as np -import cv2 - -def resize_image(image_array, reshape_wh, interpolation = cv2.INTER_LINEAR, keep_3dim = True): - """""" - # rescale the image either by shrinking or expanding - res_image_array = cv2.resize(image_array, dsize = reshape_wh, interpolation=interpolation) - # keep 3dim; when applying resizing to (:, :, 1) shape images - if keep_3dim and len(res_image_array.shape) == 2: - res_image_array = res_image_array[..., np.newaxis] - - return res_image_array \ No newline at end of file diff --git a/webscrapers/playwright/cons.py b/webscrapers/arch/playwright/cons.py similarity index 100% rename from webscrapers/playwright/cons.py rename to webscrapers/arch/playwright/cons.py diff --git a/webscrapers/playwright/webscraper.py b/webscrapers/arch/playwright/webscraper.py similarity index 100% rename from webscrapers/playwright/webscraper.py rename to webscrapers/arch/playwright/webscraper.py diff --git a/webscrapers/scrapy/scrapy.cfg b/webscrapers/arch/scrapy/scrapy.cfg similarity index 100% rename from webscrapers/scrapy/scrapy.cfg rename to webscrapers/arch/scrapy/scrapy.cfg diff --git a/webscrapers/scrapy/scrapy_webscraper/__init__.py b/webscrapers/arch/scrapy/scrapy_webscraper/__init__.py similarity index 100% rename from webscrapers/scrapy/scrapy_webscraper/__init__.py rename to webscrapers/arch/scrapy/scrapy_webscraper/__init__.py diff --git a/webscrapers/scrapy/scrapy_webscraper/items.py b/webscrapers/arch/scrapy/scrapy_webscraper/items.py similarity index 100% rename from webscrapers/scrapy/scrapy_webscraper/items.py rename to webscrapers/arch/scrapy/scrapy_webscraper/items.py diff --git a/webscrapers/scrapy/scrapy_webscraper/middlewares.py b/webscrapers/arch/scrapy/scrapy_webscraper/middlewares.py similarity index 100% rename from webscrapers/scrapy/scrapy_webscraper/middlewares.py rename to webscrapers/arch/scrapy/scrapy_webscraper/middlewares.py diff --git a/webscrapers/scrapy/scrapy_webscraper/pipelines.py b/webscrapers/arch/scrapy/scrapy_webscraper/pipelines.py similarity index 100% rename from webscrapers/scrapy/scrapy_webscraper/pipelines.py rename to webscrapers/arch/scrapy/scrapy_webscraper/pipelines.py diff --git a/webscrapers/scrapy/scrapy_webscraper/settings.py b/webscrapers/arch/scrapy/scrapy_webscraper/settings.py similarity index 100% rename from webscrapers/scrapy/scrapy_webscraper/settings.py rename to webscrapers/arch/scrapy/scrapy_webscraper/settings.py diff --git a/webscrapers/scrapy/scrapy_webscraper/spiders/FreeImages.py b/webscrapers/arch/scrapy/scrapy_webscraper/spiders/FreeImages.py similarity index 100% rename from webscrapers/scrapy/scrapy_webscraper/spiders/FreeImages.py rename to webscrapers/arch/scrapy/scrapy_webscraper/spiders/FreeImages.py diff --git a/webscrapers/scrapy/scrapy_webscraper/spiders/__init__.py b/webscrapers/arch/scrapy/scrapy_webscraper/spiders/__init__.py similarity index 100% rename from webscrapers/scrapy/scrapy_webscraper/spiders/__init__.py rename to webscrapers/arch/scrapy/scrapy_webscraper/spiders/__init__.py diff --git a/webscrapers/exeUnittests.cmd b/webscrapers/exeUnittests.cmd new file mode 100644 index 0000000..780f944 --- /dev/null +++ b/webscrapers/exeUnittests.cmd @@ -0,0 +1 @@ +call python -m unittest discover unittests\utilities \ No newline at end of file diff --git a/webscrapers/prg_scrape_imgs.py b/webscrapers/prg_scrape_imgs.py index 27acfb0..7d45bc6 100644 --- a/webscrapers/prg_scrape_imgs.py +++ b/webscrapers/prg_scrape_imgs.py @@ -1,9 +1,9 @@ import logging -import cons -from commandline_interface import commandline_interface -from webscrapers.download_comp_data import download_comp_data -from webscrapers.beautifulsoup.webscraper import webscraper from beartype import beartype +import cons +from utilities.commandline_interface import commandline_interface +from utilities.download_comp_data import download_comp_data +from utilities.webscraper import webscraper @beartype def scrape_imags( diff --git a/webscrapers/unittests/utilities/test_gen_urls.py b/webscrapers/unittests/utilities/test_gen_urls.py new file mode 100644 index 0000000..dcf2cb6 --- /dev/null +++ b/webscrapers/unittests/utilities/test_gen_urls.py @@ -0,0 +1,30 @@ +import unittest +import os +import sys + +sys.path.append(os.path.join(os.getcwd(), "webscrapers")) + +import cons +from utilities.webscraper import gen_urls + +exp_urls = ['https://free-images.com/search/?q=cat&skip=0', 'https://free-images.com/search/?q=cat&skip=100', 'https://free-images.com/search/?q=cat&skip=200'] +obs_urls = gen_urls(search="cat", n_images=300, home_url=cons.home_url) + +class Test_gen_urls(unittest.TestCase): + """""" + + def setUp(self): + self.obs_urls = obs_urls + self.exp_urls = exp_urls + + def test_type(self): + self.assertEqual(type(self.obs_urls), type(self.exp_urls)) + + def test_len(self): + self.assertEqual(len(self.obs_urls), len(self.exp_urls)) + + def test_values(self): + self.assertEqual(self.obs_urls, self.exp_urls) + +if __name__ == "__main__": + unittest.main() diff --git a/webscrapers/unittests/utilities/test_scrape_srcs.py b/webscrapers/unittests/utilities/test_scrape_srcs.py new file mode 100644 index 0000000..7d283ff --- /dev/null +++ b/webscrapers/unittests/utilities/test_scrape_srcs.py @@ -0,0 +1,31 @@ +import unittest +import os +import sys + +sys.path.append(os.path.join(os.getcwd(), "webscrapers")) + +import cons +from utilities.webscraper import scrape_srcs + +exp_srcs = ['https://free-images.com/sm/d790/cat_home_cat_looking.jpg', 'https://free-images.com/sm/e396/cat_hangover_red_cute_51.jpg', 'https://free-images.com/sm/bc44/cat_hangover_siamese_cat_2.jpg'] +urls = ['https://free-images.com/search/?q=cat&skip=0'] +obs_srcs = scrape_srcs(urls=urls, n_images=3, home_url=cons.home_url) + +class Test_scrape_srcs(unittest.TestCase): + """""" + + def setUp(self): + self.obs_srcs = obs_srcs + self.exp_srcs = exp_srcs + + def test_type(self): + self.assertEqual(type(self.obs_srcs), type(self.exp_srcs)) + + def test_len(self): + self.assertEqual(len(self.obs_srcs), len(self.exp_srcs)) + + def test_values(self): + self.assertEqual(self.obs_srcs, self.exp_srcs) + +if __name__ == "__main__": + unittest.main() diff --git a/webscrapers/commandline_interface.py b/webscrapers/utilities/commandline_interface.py similarity index 100% rename from webscrapers/commandline_interface.py rename to webscrapers/utilities/commandline_interface.py diff --git a/webscrapers/download_comp_data.py b/webscrapers/utilities/download_comp_data.py similarity index 100% rename from webscrapers/download_comp_data.py rename to webscrapers/utilities/download_comp_data.py diff --git a/webscrapers/beautifulsoup/webscraper.py b/webscrapers/utilities/webscraper.py similarity index 100% rename from webscrapers/beautifulsoup/webscraper.py rename to webscrapers/utilities/webscraper.py