oislen · oislen · Feb 12, 2025 · Feb 10, 2025 · Feb 11, 2025 · Feb 11, 2025
diff --git a/README.md b/README.md
@@ -24,6 +24,10 @@ See the analysis results notebook for a further details on the analysis; includi
 
 * https://nbviewer.org/github/oislen/CatClassifier/blob/main/report/torch_analysis_results.ipynb
 
+Master serialised copies of the trainined models are available on Kaggle:
+
+* https://www.kaggle.com/models/oislen/cat-classifier-cnn-models
+
 ## Running the Application (Windows)
 
 ### Anaconda

diff --git a/aws/ref/create_fleet_config.json b/aws/ref/create_fleet_config.json
@@ -12,40 +12,37 @@
             },
             "Overrides": [
                 {
-                    "InstanceType": "c3.2xlarge"
+                    "InstanceType": "g4ad.xlarge"
                 },
                 {
-                    "InstanceType": "c3.4xlarge"
+                    "InstanceType": "g4ad.2xlarge"
                 },
                 {
-                    "InstanceType": "c4.2xlarge"
+                    "InstanceType": "g4ad.4xlarge"
                 },
                 {
-                    "InstanceType": "c4.4xlarge"
+                    "InstanceType": "g4dn.xlarge"
                 },
                 {
-                    "InstanceType": "c5.2xlarge"
+                    "InstanceType": "g4dn.2xlarge"
                 },
                 {
-                    "InstanceType": "c5.4xlarge"
+                    "InstanceType": "g4dn.4xlarge"
                 },
                 {
-                    "InstanceType": "c5a.2xlarge"
+                    "InstanceType": "g5.xlarge"
                 },
                 {
-                    "InstanceType": "c5a.4xlarge"
+                    "InstanceType": "g5.2xlarge"
                 },
                 {
-                    "InstanceType": "c5ad.2xlarge"
+                    "InstanceType": "g6.xlarge"
                 },
                 {
-                    "InstanceType": "c5ad.4xlarge"
+                    "InstanceType": "g6.2xlarge"
                 },
                 {
-                    "InstanceType": "c5d.2xlarge"
-                },
-                {
-                    "InstanceType": "c5d.4xlarge"
+                    "InstanceType": "g6.4xlarge"
                 }
             ]
         }

diff --git a/doc/woof_meow.jpg b/doc/woof_meow.jpg
diff --git a/model/arch/classify_image_keras.py b/model/arch/classify_image_keras.py
@@ -0,0 +1,84 @@
+# python model/arch/classify_image_keras.py --image_fpath E:/GitHub/CatClassifier/data/train/cat.0.jpg --model_fpath E:/GitHub/CatClassifier/data/models/AlexNet8.keras
+
+import logging
+import argparse
+import platform
+import os
+import pandas as pd
+import numpy as np
+import sys
+import re
+from beartype import beartype
+
+# set root file directories
+root_dir_re_match = re.findall(string=os.getcwd(), pattern="^.+CatClassifier")
+root_fdir = root_dir_re_match[0] if len(root_dir_re_match) > 0 else os.path.join(".", "CatClassifier")
+model_fdir = os.path.join(root_fdir, 'model')
+sys.path.append(model_fdir)
+
+# load custom scripts
+import cons
+
+# load tensorflow / keras modules
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras.preprocessing.image import load_img
+from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
+from keras.models import load_model
+from keras import optimizers
+
+@beartype
+def classify_image_keras(image_fpath:str, model_fpath:str=cons.keras_model_pickle_fpath):
+    """Classifies an input image using the keras model
+
+    Parameters
+    ----------
+    image_fpath : str
+        The full filepath to the image to classify using the keras model
+    model_fpath : str
+        The full filepath to the keras model to use for classification, default is cons.keras_model_pickle_fpath
+
+    Returns
+    -------
+    list
+        The image file classification results as a recordset
+    """
+
+    logging.info("Loading keras model...")
+    # load model
+    model = load_model(model_fpath)
+
+    logging.info("Generating dataset...")
+    # prepare test data
+    dataframe = pd.DataFrame({'filepath': [image_fpath]})
+
+    logging.info("Creating dataloader...")
+    # set data generator
+    imagedatagenerator = ImageDataGenerator(rescale=cons.rescale)
+    generator = imagedatagenerator.flow_from_dataframe(dataframe=dataframe, directory=cons.test_fdir, x_col='filepath', y_col=None, class_mode=None, target_size=cons.IMAGE_SIZE, batch_size=cons.batch_size, shuffle=cons.shuffle)
+
+    logging.info("Classifying image...")
+    # make test set predictions
+    predict = model.predict(generator, steps=int(np.ceil(dataframe.shape[0]/cons.batch_size)))
+    dataframe['category'] = np.argmax(predict, axis=-1)
+    dataframe['category'] = dataframe['category'].replace(cons.category_mapper)
+    response = dataframe.to_dict(orient="records")
+    logging.info(response)
+    return response
+
+if __name__ == "__main__":
+
+    # set up logging
+    lgr = logging.getLogger()
+    lgr.setLevel(logging.INFO)
+
+    # define argument parser object
+    parser = argparse.ArgumentParser(description="Classify Image (Torch Model)")
+    # add input arguments
+    parser.add_argument("--image_fpath", action="store", dest="image_fpath", type=str, help="String, the full file path to the image to classify")
+    parser.add_argument("--model_fpath", action="store", dest="model_fpath", type=str, default=cons.keras_model_pickle_fpath, help="String, the full file path to the model to use for classification")
+    # create an output dictionary to hold the results
+    input_params_dict = {}
+    # extract input arguments
+    args = parser.parse_args()
+    # classify image using keras model
+    response = classify_image_keras(image_fpath=args.image_fpath, model_fpath=args.model_fpath)
diff --git a/model/arch/classify_image_torch.py b/model/arch/classify_image_torch.py
@@ -0,0 +1,104 @@
+# python model/arch/classify_image_torch.py --image_fpath E:/GitHub/CatClassifier/data/train/cat.0.jpg --model_fpath E:/GitHub/CatClassifier/data/models/VGG16.pt
+
+import logging
+import argparse
+import platform
+import os
+import pandas as pd
+import numpy as np
+import sys
+import re
+from beartype import beartype
+
+# set root file directories
+root_dir_re_match = re.findall(string=os.getcwd(), pattern="^.+CatClassifier")
+root_fdir = root_dir_re_match[0] if len(root_dir_re_match) > 0 else os.path.join(".", "CatClassifier")
+model_fdir = os.path.join(root_fdir, 'model')
+sys.path.append(model_fdir)
+
+# set huggingface hub directory
+huggingface_hub_dir = 'E:\\huggingface'
+if (platform.system() == 'Windows') and (os.path.exists(huggingface_hub_dir)):
+    os.environ['TORCH_HOME'] = huggingface_hub_dir
+    os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
+
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from torchvision import transforms
+
+# load custom scripts
+import cons
+from model.torch.VGG16_pretrained import VGG16_pretrained
+from model.torch.CustomDataset import CustomDataset
+
+# device configuration
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+torch_transforms = transforms.Compose([
+    transforms.Resize(size=[cons.IMAGE_WIDTH, cons.IMAGE_HEIGHT])  # resize the input image to a uniform size
+    #,transforms.RandomRotation(30)
+    #,transforms.RandomHorizontalFlip(p=0.05)
+    #,transforms.RandomPerspective(distortion_scale=0.05, p=0.05)
+    ,transforms.ToTensor()  # convert PIL Image or numpy.ndarray to tensor and normalize to somewhere between [0,1]
+    ,transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # standardized processing
+])
+
+@beartype
+def classify_image_torch(image_fpath:str, model_fpath:str=cons.torch_model_pt_fpath):
+    """Classifies an input image using the torch model
+
+    Parameters
+    ----------
+    image_fpath : str
+        The full filepath to the image to classify using the torch model
+    model_fpath : str
+        The full filepath to the torch model to use for classification, default is cons.torch_model_pt_fpath
+
+    Returns
+    -------
+    list
+        The image file classification results as a recordset
+    """
+
+    logging.info("Loading torch model...")
+    # load model
+    #model = AlexNet8(num_classes=2).to(device)
+    model = VGG16_pretrained(num_classes=2).to(device)
+    model.load(input_fpath=model_fpath)
+
+    logging.info("Generating dataset...")
+    # prepare test data
+    dataframe = pd.DataFrame({'filepath': [image_fpath]})
+
+    logging.info("Creating dataloader...")
+    # set train data loader
+    dataset = CustomDataset(dataframe, transforms=torch_transforms, mode='test')
+    loader = DataLoader(dataset, batch_size=cons.batch_size, shuffle=False, num_workers=cons.num_workers, pin_memory=True)
+
+    logging.info("Classifying image...")
+    # make test set predictions
+    predict = model.predict(loader, device)
+    dataframe['category'] = np.argmax(predict, axis=-1)
+    dataframe["category"] = dataframe["category"].replace(cons.category_mapper)
+    response = dataframe.to_dict(orient="records")
+    logging.info(response)
+    return response
+
+if __name__ == "__main__":
+
+    # set up logging
+    lgr = logging.getLogger()
+    lgr.setLevel(logging.INFO)
+
+    # define argument parser object
+    parser = argparse.ArgumentParser(description="Classify Image (Torch Model)")
+    # add input arguments
+    parser.add_argument("--image_fpath", action="store", dest="image_fpath", type=str, help="String, the full file path to the image to classify")
+    parser.add_argument("--model_fpath", action="store", dest="model_fpath", type=str, default=cons.torch_model_pt_fpath, help="String, the full file path to the model to use for classification")
+    # create an output dictionary to hold the results
+    input_params_dict = {}
+    # extract input arguments
+    args = parser.parse_args()
+    # classify image using torch model
+    response = classify_image_torch(image_fpath=args.image_fpath, model_fpath=args.model_fpath)
diff --git a/webscrapers/cons.py b/webscrapers/cons.py
@@ -5,13 +5,14 @@
 # set root file directories
 root_dir_re_match = re.findall(string=os.getcwd(), pattern="^.+CatClassifier")
 root_fdir = root_dir_re_match[0] if len(root_dir_re_match) > 0 else os.path.join(".", "CatClassifier")
-data_fdir = os.path.join(root_fdir, 'data')
-creds_fdir = os.path.join(root_fdir, '.creds')
-dataprep_fdir = os.path.join(root_fdir, 'data_prep')
-report_fdir = os.path.join(root_fdir, 'report')
-test_fdir = os.path.join(data_fdir, 'test1')
-train_fdir = os.path.join(data_fdir, 'train')
-webscrapers_fdir = os.path.join(root_fdir, 'webscrapers')
+data_fdir = os.path.join(root_fdir, "data")
+creds_fdir = os.path.join(root_fdir, ".creds")
+dataprep_fdir = os.path.join(root_fdir, "data_prep")
+report_fdir = os.path.join(root_fdir, "report")
+test_fdir = os.path.join(data_fdir, "test1")
+train_fdir = os.path.join(data_fdir, "train")
+models_fir = os.path.join(data_fdir, "models")
+webscrapers_fdir = os.path.join(root_fdir, "webscrapers")
 
 # set list containing all required directories
 root_fdirs = [root_fdir, data_fdir,  dataprep_fdir, report_fdir, test_fdir, train_fdir, webscrapers_fdir]
@@ -22,12 +23,15 @@
 
 # set kaggle competition name
 os.environ["KAGGLE_CONFIG_DIR"] = creds_fdir
-comp_name = 'dogs-vs-cats'
+comp_name = "dogs-vs-cats"
 download_data = True
 unzip_data = True
 del_zip = True
 
+# set kaggle model detailes
+model_instance_url="oislen/cat-classifier-cnn-models/pyTorch/default/1"
+
 # webscraping constants
 n_images = 6000
-home_url = 'https://free-images.com'
-output_dir =  os.path.join(data_fdir, '{search}')
+home_url = "https://free-images.com"
+output_dir =  os.path.join(data_fdir, "{search}")
diff --git a/webscrapers/prg_scrape_imgs.py b/webscrapers/prg_scrape_imgs.py
@@ -2,18 +2,21 @@
 from beartype import beartype
 import cons
 from utilities.commandline_interface import commandline_interface
-from utilities.download_comp_data import download_comp_data
+from utilities.download_comp_data import download_comp_data, download_models
 from utilities.webscraper import webscraper
 
 @beartype
 def scrape_imags(
+    run_download_models:bool=False,
     run_download_comp_data:bool=False, 
     run_webscraper:bool=False
     ):
     """Programme for running Kaggle comp data download and image web scrapers
 
     Parameters
     ----------
+    run_download_models : bool
+        Whether to run the download Kaggle master models, default is False
     run_download_comp_data : bool
         Whether to run the download Kaggle competition data, default is False
     run_webscraper : bool
@@ -22,8 +25,16 @@ def scrape_imags(
     Returns
     -------
     """
+    if run_download_models:
+        logging.info("Downloading kaggle models ...")
+        # download competition data
+        download_models(
+            model_instance_url=cons.model_instance_url,
+            model_dir=cons.models_fir
+            )
+
     if run_download_comp_data:
-        logging.info('Downloading kaggle data ...')
+        logging.info("Downloading kaggle data ...")
         # download competition data
         download_comp_data(
             comp_name=cons.comp_name,
@@ -33,25 +44,25 @@ def scrape_imags(
             del_zip=cons.del_zip
             )
     if run_webscraper:
-        logging.info('Running cat image webscraper ...')
+        logging.info("Running cat image webscraper ...")
         # run cat webscraper
         webscraper(
-            search='cat', 
+            search="cat", 
             n_images=cons.n_images, 
             home_url=cons.home_url, 
             output_dir=cons.train_fdir
             )
-        logging.info('Running dog image webscraper ...')
+        logging.info("Running dog image webscraper ...")
         # run dog webscraper
         webscraper(
-            search='dog', 
+            search="dog", 
             n_images=cons.n_images, 
             home_url=cons.home_url, 
             output_dir=cons.train_fdir
             )
 
 # if running as main programme
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     # set up logging
     lgr = logging.getLogger()
@@ -62,6 +73,7 @@ def scrape_imags(
 
     # run the scrape images programme
     scrape_imags(
-        run_download_comp_data=input_params_dict['run_download_comp_data'], 
-        run_webscraper=input_params_dict['run_webscraper']
+        run_download_models=input_params_dict["run_download_models"],
+        run_download_comp_data=input_params_dict["run_download_comp_data"], 
+        run_webscraper=input_params_dict["run_webscraper"]
         )
diff --git a/webscrapers/utilities/commandline_interface.py b/webscrapers/utilities/commandline_interface.py
@@ -15,13 +15,15 @@ def commandline_interface():
     # define argument parser object
     parser = argparse.ArgumentParser(description="Execute Webscrapers.")
     # add input arguments
+    parser.add_argument("--run_download_models", action=argparse.BooleanOptionalAction, dest="run_download_models", type=bool, default=False, help="Boolean, whether to run the download master Kaggle models, default is False",)
     parser.add_argument("--run_download_comp_data", action=argparse.BooleanOptionalAction, dest="run_download_comp_data", type=bool, default=False, help="Boolean, whether to run the download Kaggle competition data, default is False",)
     parser.add_argument("--run_webscraper", action=argparse.BooleanOptionalAction, dest="run_webscraper", type=bool, default=False, help="Boolean, whether to run the image webscraper, default is False",)
     # create an output dictionary to hold the results
     input_params_dict = {}
     # extract input arguments
     args = parser.parse_args()
     # map input arguments into output dictionary
+    input_params_dict["run_download_models"] = args.run_download_models
     input_params_dict["run_download_comp_data"] = args.run_download_comp_data
     input_params_dict["run_webscraper"] = args.run_webscraper
     return input_params_dict