Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ See the analysis results notebook for a further details on the analysis; includi

* https://nbviewer.org/github/oislen/CatClassifier/blob/main/report/torch_analysis_results.ipynb

Master serialised copies of the trainined models are available on Kaggle:

* https://www.kaggle.com/models/oislen/cat-classifier-cnn-models

## Running the Application (Windows)

### Anaconda
Expand Down
25 changes: 11 additions & 14 deletions aws/ref/create_fleet_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,37 @@
},
"Overrides": [
{
"InstanceType": "c3.2xlarge"
"InstanceType": "g4ad.xlarge"
},
{
"InstanceType": "c3.4xlarge"
"InstanceType": "g4ad.2xlarge"
},
{
"InstanceType": "c4.2xlarge"
"InstanceType": "g4ad.4xlarge"
},
{
"InstanceType": "c4.4xlarge"
"InstanceType": "g4dn.xlarge"
},
{
"InstanceType": "c5.2xlarge"
"InstanceType": "g4dn.2xlarge"
},
{
"InstanceType": "c5.4xlarge"
"InstanceType": "g4dn.4xlarge"
},
{
"InstanceType": "c5a.2xlarge"
"InstanceType": "g5.xlarge"
},
{
"InstanceType": "c5a.4xlarge"
"InstanceType": "g5.2xlarge"
},
{
"InstanceType": "c5ad.2xlarge"
"InstanceType": "g6.xlarge"
},
{
"InstanceType": "c5ad.4xlarge"
"InstanceType": "g6.2xlarge"
},
{
"InstanceType": "c5d.2xlarge"
},
{
"InstanceType": "c5d.4xlarge"
"InstanceType": "g6.4xlarge"
}
]
}
Expand Down
Binary file added doc/woof_meow.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
84 changes: 84 additions & 0 deletions model/arch/classify_image_keras.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# python model/arch/classify_image_keras.py --image_fpath E:/GitHub/CatClassifier/data/train/cat.0.jpg --model_fpath E:/GitHub/CatClassifier/data/models/AlexNet8.keras

import logging
import argparse
import platform
import os
import pandas as pd
import numpy as np
import sys
import re
from beartype import beartype

# set root file directories
root_dir_re_match = re.findall(string=os.getcwd(), pattern="^.+CatClassifier")
root_fdir = root_dir_re_match[0] if len(root_dir_re_match) > 0 else os.path.join(".", "CatClassifier")
model_fdir = os.path.join(root_fdir, 'model')
sys.path.append(model_fdir)

# load custom scripts
import cons

# load tensorflow / keras modules
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.models import load_model
from keras import optimizers

@beartype
def classify_image_keras(image_fpath:str, model_fpath:str=cons.keras_model_pickle_fpath):
"""Classifies an input image using the keras model

Parameters
----------
image_fpath : str
The full filepath to the image to classify using the keras model
model_fpath : str
The full filepath to the keras model to use for classification, default is cons.keras_model_pickle_fpath

Returns
-------
list
The image file classification results as a recordset
"""

logging.info("Loading keras model...")
# load model
model = load_model(model_fpath)

logging.info("Generating dataset...")
# prepare test data
dataframe = pd.DataFrame({'filepath': [image_fpath]})

logging.info("Creating dataloader...")
# set data generator
imagedatagenerator = ImageDataGenerator(rescale=cons.rescale)
generator = imagedatagenerator.flow_from_dataframe(dataframe=dataframe, directory=cons.test_fdir, x_col='filepath', y_col=None, class_mode=None, target_size=cons.IMAGE_SIZE, batch_size=cons.batch_size, shuffle=cons.shuffle)

logging.info("Classifying image...")
# make test set predictions
predict = model.predict(generator, steps=int(np.ceil(dataframe.shape[0]/cons.batch_size)))
dataframe['category'] = np.argmax(predict, axis=-1)
dataframe['category'] = dataframe['category'].replace(cons.category_mapper)
response = dataframe.to_dict(orient="records")
logging.info(response)
return response

if __name__ == "__main__":

# set up logging
lgr = logging.getLogger()
lgr.setLevel(logging.INFO)

# define argument parser object
parser = argparse.ArgumentParser(description="Classify Image (Torch Model)")
# add input arguments
parser.add_argument("--image_fpath", action="store", dest="image_fpath", type=str, help="String, the full file path to the image to classify")
parser.add_argument("--model_fpath", action="store", dest="model_fpath", type=str, default=cons.keras_model_pickle_fpath, help="String, the full file path to the model to use for classification")
# create an output dictionary to hold the results
input_params_dict = {}
# extract input arguments
args = parser.parse_args()
# classify image using keras model
response = classify_image_keras(image_fpath=args.image_fpath, model_fpath=args.model_fpath)
104 changes: 104 additions & 0 deletions model/arch/classify_image_torch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# python model/arch/classify_image_torch.py --image_fpath E:/GitHub/CatClassifier/data/train/cat.0.jpg --model_fpath E:/GitHub/CatClassifier/data/models/VGG16.pt

import logging
import argparse
import platform
import os
import pandas as pd
import numpy as np
import sys
import re
from beartype import beartype

# set root file directories
root_dir_re_match = re.findall(string=os.getcwd(), pattern="^.+CatClassifier")
root_fdir = root_dir_re_match[0] if len(root_dir_re_match) > 0 else os.path.join(".", "CatClassifier")
model_fdir = os.path.join(root_fdir, 'model')
sys.path.append(model_fdir)

# set huggingface hub directory
huggingface_hub_dir = 'E:\\huggingface'
if (platform.system() == 'Windows') and (os.path.exists(huggingface_hub_dir)):
os.environ['TORCH_HOME'] = huggingface_hub_dir
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms

# load custom scripts
import cons
from model.torch.VGG16_pretrained import VGG16_pretrained
from model.torch.CustomDataset import CustomDataset

# device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch_transforms = transforms.Compose([
transforms.Resize(size=[cons.IMAGE_WIDTH, cons.IMAGE_HEIGHT]) # resize the input image to a uniform size
#,transforms.RandomRotation(30)
#,transforms.RandomHorizontalFlip(p=0.05)
#,transforms.RandomPerspective(distortion_scale=0.05, p=0.05)
,transforms.ToTensor() # convert PIL Image or numpy.ndarray to tensor and normalize to somewhere between [0,1]
,transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # standardized processing
])

@beartype
def classify_image_torch(image_fpath:str, model_fpath:str=cons.torch_model_pt_fpath):
"""Classifies an input image using the torch model

Parameters
----------
image_fpath : str
The full filepath to the image to classify using the torch model
model_fpath : str
The full filepath to the torch model to use for classification, default is cons.torch_model_pt_fpath

Returns
-------
list
The image file classification results as a recordset
"""

logging.info("Loading torch model...")
# load model
#model = AlexNet8(num_classes=2).to(device)
model = VGG16_pretrained(num_classes=2).to(device)
model.load(input_fpath=model_fpath)

logging.info("Generating dataset...")
# prepare test data
dataframe = pd.DataFrame({'filepath': [image_fpath]})

logging.info("Creating dataloader...")
# set train data loader
dataset = CustomDataset(dataframe, transforms=torch_transforms, mode='test')
loader = DataLoader(dataset, batch_size=cons.batch_size, shuffle=False, num_workers=cons.num_workers, pin_memory=True)

logging.info("Classifying image...")
# make test set predictions
predict = model.predict(loader, device)
dataframe['category'] = np.argmax(predict, axis=-1)
dataframe["category"] = dataframe["category"].replace(cons.category_mapper)
response = dataframe.to_dict(orient="records")
logging.info(response)
return response

if __name__ == "__main__":

# set up logging
lgr = logging.getLogger()
lgr.setLevel(logging.INFO)

# define argument parser object
parser = argparse.ArgumentParser(description="Classify Image (Torch Model)")
# add input arguments
parser.add_argument("--image_fpath", action="store", dest="image_fpath", type=str, help="String, the full file path to the image to classify")
parser.add_argument("--model_fpath", action="store", dest="model_fpath", type=str, default=cons.torch_model_pt_fpath, help="String, the full file path to the model to use for classification")
# create an output dictionary to hold the results
input_params_dict = {}
# extract input arguments
args = parser.parse_args()
# classify image using torch model
response = classify_image_torch(image_fpath=args.image_fpath, model_fpath=args.model_fpath)
24 changes: 14 additions & 10 deletions webscrapers/cons.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
# set root file directories
root_dir_re_match = re.findall(string=os.getcwd(), pattern="^.+CatClassifier")
root_fdir = root_dir_re_match[0] if len(root_dir_re_match) > 0 else os.path.join(".", "CatClassifier")
data_fdir = os.path.join(root_fdir, 'data')
creds_fdir = os.path.join(root_fdir, '.creds')
dataprep_fdir = os.path.join(root_fdir, 'data_prep')
report_fdir = os.path.join(root_fdir, 'report')
test_fdir = os.path.join(data_fdir, 'test1')
train_fdir = os.path.join(data_fdir, 'train')
webscrapers_fdir = os.path.join(root_fdir, 'webscrapers')
data_fdir = os.path.join(root_fdir, "data")
creds_fdir = os.path.join(root_fdir, ".creds")
dataprep_fdir = os.path.join(root_fdir, "data_prep")
report_fdir = os.path.join(root_fdir, "report")
test_fdir = os.path.join(data_fdir, "test1")
train_fdir = os.path.join(data_fdir, "train")
models_fir = os.path.join(data_fdir, "models")
webscrapers_fdir = os.path.join(root_fdir, "webscrapers")

# set list containing all required directories
root_fdirs = [root_fdir, data_fdir, dataprep_fdir, report_fdir, test_fdir, train_fdir, webscrapers_fdir]
Expand All @@ -22,12 +23,15 @@

# set kaggle competition name
os.environ["KAGGLE_CONFIG_DIR"] = creds_fdir
comp_name = 'dogs-vs-cats'
comp_name = "dogs-vs-cats"
download_data = True
unzip_data = True
del_zip = True

# set kaggle model detailes
model_instance_url="oislen/cat-classifier-cnn-models/pyTorch/default/1"

# webscraping constants
n_images = 6000
home_url = 'https://free-images.com'
output_dir = os.path.join(data_fdir, '{search}')
home_url = "https://free-images.com"
output_dir = os.path.join(data_fdir, "{search}")
30 changes: 21 additions & 9 deletions webscrapers/prg_scrape_imgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,21 @@
from beartype import beartype
import cons
from utilities.commandline_interface import commandline_interface
from utilities.download_comp_data import download_comp_data
from utilities.download_comp_data import download_comp_data, download_models
from utilities.webscraper import webscraper

@beartype
def scrape_imags(
run_download_models:bool=False,
run_download_comp_data:bool=False,
run_webscraper:bool=False
):
"""Programme for running Kaggle comp data download and image web scrapers

Parameters
----------
run_download_models : bool
Whether to run the download Kaggle master models, default is False
run_download_comp_data : bool
Whether to run the download Kaggle competition data, default is False
run_webscraper : bool
Expand All @@ -22,8 +25,16 @@ def scrape_imags(
Returns
-------
"""
if run_download_models:
logging.info("Downloading kaggle models ...")
# download competition data
download_models(
model_instance_url=cons.model_instance_url,
model_dir=cons.models_fir
)

if run_download_comp_data:
logging.info('Downloading kaggle data ...')
logging.info("Downloading kaggle data ...")
# download competition data
download_comp_data(
comp_name=cons.comp_name,
Expand All @@ -33,25 +44,25 @@ def scrape_imags(
del_zip=cons.del_zip
)
if run_webscraper:
logging.info('Running cat image webscraper ...')
logging.info("Running cat image webscraper ...")
# run cat webscraper
webscraper(
search='cat',
search="cat",
n_images=cons.n_images,
home_url=cons.home_url,
output_dir=cons.train_fdir
)
logging.info('Running dog image webscraper ...')
logging.info("Running dog image webscraper ...")
# run dog webscraper
webscraper(
search='dog',
search="dog",
n_images=cons.n_images,
home_url=cons.home_url,
output_dir=cons.train_fdir
)

# if running as main programme
if __name__ == '__main__':
if __name__ == "__main__":

# set up logging
lgr = logging.getLogger()
Expand All @@ -62,6 +73,7 @@ def scrape_imags(

# run the scrape images programme
scrape_imags(
run_download_comp_data=input_params_dict['run_download_comp_data'],
run_webscraper=input_params_dict['run_webscraper']
run_download_models=input_params_dict["run_download_models"],
run_download_comp_data=input_params_dict["run_download_comp_data"],
run_webscraper=input_params_dict["run_webscraper"]
)
2 changes: 2 additions & 0 deletions webscrapers/utilities/commandline_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@ def commandline_interface():
# define argument parser object
parser = argparse.ArgumentParser(description="Execute Webscrapers.")
# add input arguments
parser.add_argument("--run_download_models", action=argparse.BooleanOptionalAction, dest="run_download_models", type=bool, default=False, help="Boolean, whether to run the download master Kaggle models, default is False",)
parser.add_argument("--run_download_comp_data", action=argparse.BooleanOptionalAction, dest="run_download_comp_data", type=bool, default=False, help="Boolean, whether to run the download Kaggle competition data, default is False",)
parser.add_argument("--run_webscraper", action=argparse.BooleanOptionalAction, dest="run_webscraper", type=bool, default=False, help="Boolean, whether to run the image webscraper, default is False",)
# create an output dictionary to hold the results
input_params_dict = {}
# extract input arguments
args = parser.parse_args()
# map input arguments into output dictionary
input_params_dict["run_download_models"] = args.run_download_models
input_params_dict["run_download_comp_data"] = args.run_download_comp_data
input_params_dict["run_webscraper"] = args.run_webscraper
return input_params_dict
Loading