Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-venv
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
RUN /opt/venv/bin/python3 -m pip install -v -r /home/ubuntu/CatClassifier/requirements.txt
RUN /opt/venv/bin/python3 -m pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121

WORKDIR /home/${user}
ENTRYPOINT ["jupyter", "lab", "--ip=0.0.0.0", "--allow-root"]
2 changes: 1 addition & 1 deletion aws/exeRunEC2.cmd
Original file line number Diff line number Diff line change
@@ -1 +1 @@
call python prg_run_ec2_instance.py --launch --terminate --describe --isFleet
call python prg_run_ec2_instance.py --isFleet --launch --terminate --describe
6 changes: 4 additions & 2 deletions aws/exeSetUpEC2.cmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
:: set EC2 login info
SET EC2_USER=ec2-user
::SET EC2_USER=ec2-user
SET EC2_USER=ubuntu
SET EC2_PEM_FPATH="C:\Users\oisin\.aws\kaggle.pem"
SET EC2_CREDS_FDIR=E:\GitHub\CatClassifier\.creds
SET EC2_SETUP_FPATH=E:\GitHub\CatClassifier\aws\linux_docker_setup.sh
Expand All @@ -17,5 +18,6 @@ call scp -i %EC2_PEM_FPATH% -r %EC2_CREDS_FDIR% %EC2_USER%@%EC2_DNS%:~/.
call scp -i %EC2_PEM_FPATH% %EC2_SETUP_FPATH% %EC2_USER%@%EC2_DNS%:~/linux_docker_setup.sh
call scp -i %EC2_PEM_FPATH% %EC2_EXTRACT_FPATH% %EC2_USER%@%EC2_DNS%:~/docker_extract_data.sh
:: ssh to EC2 and run linux setp
call ssh -v -i %EC2_PEM_FPATH% %EC2_USER%@%EC2_DNS% "sed -i 's/\r$//' ~/linux_docker_setup.sh; bash ~/linux_docker_setup.sh"
call ssh -v -i %EC2_PEM_FPATH% %EC2_USER%@%EC2_DNS%
::call ssh -v -i %EC2_PEM_FPATH% %EC2_USER%@%EC2_DNS% "sed -i 's/\r$//' ~/linux_docker_setup.sh; bash ~/linux_docker_setup.sh"
ENDLOCAL
11 changes: 7 additions & 4 deletions aws/linux_docker_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# 2. make sure to increase volume in /dev/nvme0n1 (/dev/xvda) e.g. 100gb

# linux file formatting
# sudo yum install -y dos2unix
# sudo apt-get install -y dos2unix
# dos2unix ./linux_docker_setup.sh

#-- EC2 Spot Instance Checks --#
Expand All @@ -14,6 +14,9 @@ df -h
lscpu
# calculate percentage of used memory
free -m | awk 'FNR == 2 {print $3/($3+$4)*100}'
# check gpu status
nvidia-smi
# watch -n 0.5 nvidia-smi

#-- Configure Permissions and Overcommit Settings --#

Expand Down Expand Up @@ -54,11 +57,11 @@ sudo umount /tmp
#-- Download Required Programmes --#

# update os
sudo yum update -y
sudo apt-get update -y
# install required base software
sudo yum install -y htop vim tmux dos2unix docker git
sudo apt-get install -y htop vim tmux dos2unix docker git
# remove unneed dependencies
sudo yum autoremove
sudo apt-get autoremove

#-- Pull Git Repo --#

Expand Down
19 changes: 8 additions & 11 deletions aws/ref/create_fleet_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,19 @@
},
"Overrides": [
{
"InstanceType": "g4ad.xlarge"
},
{
"InstanceType": "g4ad.2xlarge"
"InstanceType": "g4dn.xlarge"
},
{
"InstanceType": "g4ad.4xlarge"
"InstanceType": "g4dn.2xlarge"
},
{
"InstanceType": "g4dn.xlarge"
"InstanceType": "g4dn.4xlarge"
},
{
"InstanceType": "g4dn.2xlarge"
"InstanceType": "g4dn.8xlarge"
},
{
"InstanceType": "g4dn.4xlarge"
"InstanceType": "g4dn.12xlarge"
},
{
"InstanceType": "g5.xlarge"
Expand All @@ -36,13 +33,13 @@
"InstanceType": "g5.2xlarge"
},
{
"InstanceType": "g6.xlarge"
"InstanceType": "g5.4xlarge"
},
{
"InstanceType": "g6.2xlarge"
"InstanceType": "g5.8xlarge"
},
{
"InstanceType": "g6.4xlarge"
"InstanceType": "g5.12xlarge"
}
]
}
Expand Down
8 changes: 4 additions & 4 deletions aws/ref/launch_template_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
"LaunchTemplateData": {
"BlockDeviceMappings": [
{
"DeviceName": "/dev/xvda",
"DeviceName": "/dev/sda1",
"Ebs": {
"VolumeSize": 50,
"VolumeSize": 100,
"VolumeType": "gp3"
}
}
Expand All @@ -20,8 +20,8 @@
"Groups":["sg-03864b806cd78ded3"]
}
],
"ImageId": "ami-00385a401487aefa4",
"InstanceType": "t2.micro",
"ImageId": "ami-000b13fcd5cd7b0f8",
"InstanceType": "g4ad.xlarge",
"KeyName": "kaggle",
"Placement": {
"AvailabilityZone": "eu-west-1a"
Expand Down
3 changes: 2 additions & 1 deletion conda/catclass.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ call conda activate catclass
call conda update -n base conda --yes

:: install relevant libraries
call pip install -v -r ..\requirements.txt
call pip install -v -r ..\requirements.txt
call pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121
2 changes: 2 additions & 0 deletions exeDocker.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ call docker build --no-cache -t %DOCKER_IMAGE% .

:: run docker container
call docker run --name %DOCKER_CONTAINER_NAME% --shm-size=512m --publish 8888:8888 --volume E:\GitHub\CatClassifier\.creds:/home/ubuntu/CatClassifier/.creds --volume E:\GitHub\CatClassifier\report:/home/ubuntu/CatClassifier/report -it %DOCKER_IMAGE%
::call docker run --entrypoint sh --name %DOCKER_CONTAINER_NAME% ---shm-size=512m --publish 8888:8888 --volume E:\GitHub\CatClassifier\.creds:/home/ubuntu/CatClassifier/.creds --volume E:\GitHub\CatClassifier\report:/home/ubuntu/CatClassifier/report -it %DOCKER_IMAGE%
::call docker run -it --entrypoint bash --name cc --shm-size=512m --volume /home/ec2-user/.creds:/home/ubuntu/CatClassifier/.creds --rm oislen/cat-classifier:latest

:: useful docker commands
:: docker images
Expand Down
2 changes: 1 addition & 1 deletion model/arch/classify_image_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from model.torch.CustomDataset import CustomDataset

# device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda' if torch.cuda.is_available() and cons.check_gpu else 'cpu')

torch_transforms = transforms.Compose([
transforms.Resize(size=[cons.IMAGE_WIDTH, cons.IMAGE_HEIGHT]) # resize the input image to a uniform size
Expand Down
3 changes: 2 additions & 1 deletion model/cons.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,5 @@
shuffle = False

# multiprocessing
num_workers = os.cpu_count()
num_workers = os.environ.get("PARAM_NUM_WORKERS", os.cpu_count())
check_gpu = os.environ.get("PARAM_CHECK_GPU", False)
2 changes: 1 addition & 1 deletion model/prg_torch_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
num_epochs = cons.min_epochs if cons.FAST_RUN else cons.max_epochs

# device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda' if torch.cuda.is_available() and cons.check_gpu else 'cpu')

torch_transforms = transforms.Compose([
transforms.Resize(size=[cons.IMAGE_WIDTH, cons.IMAGE_HEIGHT]) # resize the input image to a uniform size
Expand Down
4 changes: 2 additions & 2 deletions report/torch_analysis_results.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "504c7d94",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -113,7 +113,7 @@
],
"source": [
"# device configuration\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"device = torch.device('cuda' if torch.cuda.is_available() and cons.check_gpu else 'cpu')\n",
"# load trained torch model\n",
"model = VGG16_pretrained(num_classes=2).to(device)\n",
"model.load(input_fpath=cons.torch_model_pt_fpath)\n",
Expand Down
2 changes: 1 addition & 1 deletion report/torch_analysis_results.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ A pre-trained VGG CNN model with 16 layers was trained using the processed image

```{python}
# device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda' if torch.cuda.is_available() and cons.check_gpu else 'cpu')
# load trained torch model
model = VGG16_pretrained(num_classes=2).to(device)
model.load(input_fpath=cons.torch_model_pt_fpath)
Expand Down
2 changes: 0 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,5 @@ matplotlib==3.10.0
seaborn==0.13.2
kaggle==1.6.17
jupyterlab==4.3.5
torch==2.6.0
torchvision==0.21.0
beartype==0.19.0
boto3==1.36.13