diff --git a/classification_model_training/files_from_makeathon/data/test.dvc b/classification_model_training/files_from_makeathon/data/test.dvc
index f6f370a..5cc43a9 100644
--- a/classification_model_training/files_from_makeathon/data/test.dvc
+++ b/classification_model_training/files_from_makeathon/data/test.dvc
@@ -1,6 +1,6 @@
outs:
-- md5: e03ff535d9f3da7f51ed642b70ec5d95.dir
- size: 549163
+- md5: c6f373f6997ea2bae313d494caa35b43.dir
+ size: 1434598
nfiles: 80
hash: md5
path: test
diff --git a/classification_model_training/files_from_makeathon/data/train.dvc b/classification_model_training/files_from_makeathon/data/train.dvc
index 5df3ee2..25b2feb 100644
--- a/classification_model_training/files_from_makeathon/data/train.dvc
+++ b/classification_model_training/files_from_makeathon/data/train.dvc
@@ -1,6 +1,6 @@
outs:
-- md5: 0263429cb9fefbc8b68b6a88c26d942f.dir
- size: 2506709
+- md5: c6c8f93cb47471d0133562e34c3f56d0.dir
+ size: 6053320
nfiles: 362
hash: md5
path: train
diff --git a/classification_model_training/files_from_makeathon/dvc.lock b/classification_model_training/files_from_makeathon/dvc.lock
index 70c6532..c34fb9b 100644
--- a/classification_model_training/files_from_makeathon/dvc.lock
+++ b/classification_model_training/files_from_makeathon/dvc.lock
@@ -97,3 +97,47 @@ stages:
hash: md5
md5: 49a445cb95476dbc8dc7e7e2530050f0
size: 74
+ evaluate-last:
+ cmd: python predict.py --model efficientnet-b2 --image_size 224 --num_classes
+ 4 --dataset ./data/test/images --gt ./data/test/gt.csv --single_model_path models/model.pt
+ deps:
+ - path: data/test
+ hash: md5
+ md5: e03ff535d9f3da7f51ed642b70ec5d95.dir
+ size: 549163
+ nfiles: 80
+ - path: models/model.pt
+ hash: md5
+ md5: b7e4e7f26989ba76d1009d1a78724b9c
+ size: 31270857
+ - path: predict.py
+ hash: md5
+ md5: 43fb163a6a3546d1bd6b47cf92fc6ff5
+ size: 6001
+ params:
+ params.yaml:
+ base:
+ random_seed: 1
+ evaluate:
+ dataset: ./data/test/images
+ gt: ./data/test/gt.csv
+ device: cuda
+ val_bs: 16
+ train:
+ fold: 1
+ dataset: ./data/train/images
+ gt: ./data/train/gt.csv
+ num_classes: 4
+ metric: f1_score
+ device: cuda
+ epochs: 30
+ train_batch: 16
+ val_batch: 16
+ outdir: models
+ lr: 0.001
+ dropout_rate: 0.3
+ drop_connect_rate: 0.2
+ batch_norm_momentum: 0.99
+ batch_norm_epsilon: 0.001
+ model: efficientnet-b2
+ image_size: 224
diff --git a/classification_model_training/files_from_makeathon/dvc.yaml b/classification_model_training/files_from_makeathon/dvc.yaml
index 97ba0b2..6560fb7 100644
--- a/classification_model_training/files_from_makeathon/dvc.yaml
+++ b/classification_model_training/files_from_makeathon/dvc.yaml
@@ -59,3 +59,20 @@ stages:
metrics:
- models/metrics-evaluate.json:
cache: false
+ evaluate-last:
+ cmd: >-
+ python predict.py
+ --model ${train.model}
+ --image_size ${train.image_size}
+ --num_classes ${train.num_classes}
+ --dataset ${evaluate.dataset}
+ --gt ${evaluate.gt}
+ --single_model_path ${train.outdir}/model.pt
+ deps:
+ - data/test
+ - predict.py
+ - models/model.pt
+ params:
+ - base
+ - train
+ - evaluate
diff --git a/classification_model_training/files_from_makeathon/notebooks/crop.ipynb b/classification_model_training/files_from_makeathon/notebooks/crop.ipynb
new file mode 100644
index 0000000..bd2cc7b
--- /dev/null
+++ b/classification_model_training/files_from_makeathon/notebooks/crop.ipynb
@@ -0,0 +1,347 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "cc5ebda6-6120-47c3-8240-b74fe83b16b4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import seaborn as sns\n",
+ "\n",
+ "import cv2\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7c6bf02f-8330-4509-b489-0f0f0eca787b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "csv_train_path = '../data/train/gt.csv'\n",
+ "csv_test_path = '../data/test/gt.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "124bb387-ed02-461e-8e8e-080229fecc86",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_train = pd.read_csv(csv_train_path)\n",
+ "df_train['split'] = 'train'\n",
+ "df_test = pd.read_csv(csv_test_path)\n",
+ "df_test['split'] = 'test'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "cbe2f50e-2407-4aa4-b957-e3426681fde9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "label_map = {\n",
+ " 0:\"positive\",\n",
+ " 1:\"negative\",\n",
+ " 2:\"empty\",\n",
+ " 3:\"invalid\",\n",
+ "}\n",
+ "\n",
+ "split_map = {\n",
+ " 0:\"train\",\n",
+ " 1:\"val\",\n",
+ " -1:\"test\",\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "5739a88f-739b-4ffd-9ee1-843aac5b5559",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " image | \n",
+ " target | \n",
+ " fold | \n",
+ " split | \n",
+ " brand | \n",
+ " stem | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " safecare_negative_030_jpg.rf.7c617838cb5f06ef9... | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " train | \n",
+ " safecare | \n",
+ " safecare_negative_030 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " safecare_invalid_swap_038_jpg.rf.89441e6b07934... | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " train | \n",
+ " safecare | \n",
+ " safecare_invalid_swap_038 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " hygisun_negative_005_jpg.rf.ff1b59ca7eb97855c3... | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " train | \n",
+ " hygisun | \n",
+ " hygisun_negative_005 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " medicovid_negative_real_005_png.rf.8502b610e6f... | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " train | \n",
+ " medicovid | \n",
+ " medicovid_negative_real_005 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " safecare_invalid_swap_012_jpg.rf.9a46f6916090f... | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " train | \n",
+ " safecare | \n",
+ " safecare_invalid_swap_012 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " image target fold split \n",
+ "0 safecare_negative_030_jpg.rf.7c617838cb5f06ef9... 1 0 train \\\n",
+ "1 safecare_invalid_swap_038_jpg.rf.89441e6b07934... 3 0 train \n",
+ "2 hygisun_negative_005_jpg.rf.ff1b59ca7eb97855c3... 1 0 train \n",
+ "3 medicovid_negative_real_005_png.rf.8502b610e6f... 1 0 train \n",
+ "4 safecare_invalid_swap_012_jpg.rf.9a46f6916090f... 3 0 train \n",
+ "\n",
+ " brand stem \n",
+ "0 safecare safecare_negative_030 \n",
+ "1 safecare safecare_invalid_swap_038 \n",
+ "2 hygisun hygisun_negative_005 \n",
+ "3 medicovid medicovid_negative_real_005 \n",
+ "4 safecare safecare_invalid_swap_012 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_cls = pd.concat([df_train,df_test])\n",
+ "df_cls['brand'] = df_cls['image'].str.extract(r'([^_]+)')\n",
+ "df_cls['stem'] = df_cls.image.str.extract('(.*)_[jpg]')\n",
+ "df_cls['fold'] = df_cls['fold'].fillna(-1).astype('int')\n",
+ "df_cls.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "f7061f91-41f3-4de8-8aba-1dbd2a3a3036",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "raw_path = f\"../../../../covisionapp_image_upload/\"\n",
+ "stems = list((p.resolve(),p.resolve().stem) for p in Path(raw_path).glob(\"**/*\") if p.suffix in {\".jpg\", \".png\"})\n",
+ "df_raw = pd.DataFrame(stems)\n",
+ "df_raw.columns = ['raw_image', 'stem']\n",
+ "# df_raw.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "0510bfb9-6832-4b3f-9d48-33869be9263f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "txt_path = '../../../covision-training/data/raw/'\n",
+ "txts = list((p.resolve(),p.resolve().stem) for p in Path(txt_path).glob(\"**/labels/*\") if p.suffix in {\".txt\",})\n",
+ "df_txt = pd.DataFrame(txts)\n",
+ "df_txt.columns = ['raw_txt', 'stem']\n",
+ "df_txt['stem'] = df_txt['stem'].str.extract('(.*)_[jpg]')\n",
+ "# df_txt.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "e152d78a-438f-4854-927d-d94446d8f972",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# list(filter(lambda x: x=='safecare_negative_030', stems))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "bc70955f-cf23-42f9-87cf-f101c622588f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "assert len(df_cls[~df_cls['stem'].isin(df_raw['stem'])]) == 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "4d2a0b0b-c865-45d4-95d2-1e04f58f8461",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "assert len(df_cls[~df_cls['stem'].isin(df_txt['stem'])]) == 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "5ad5c687-0bd2-48bc-ab9f-4691f968fc9d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = df_cls.merge(df_raw, on='stem').merge(df_txt, on='stem')\n",
+ "# df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "b94e822a-f9dd-46d3-95a9-b38e308a67cf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "root = Path('../data/')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "b33d839d-c448-4506-80ed-f564db3a6fe1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['new_image'] = df.raw_txt.map(lambda x: Path(x).stem + '.jpg')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "efae24b5-2811-482d-a081-71764306c2e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[df['split']=='train'][['new_image','target','fold']].to_csv(root / 'train' / 'gt.csv', header=['image','target','fold'], index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "4e8b239a-5a14-4ef7-ad03-ba3619bfce94",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[df['split']=='test'][['new_image','target']].to_csv(root / 'test' / 'gt.csv', header=['image','target'], index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "077831bb-bb4d-4250-ae97-7d5b390ece67",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "maxwidth, maxheight = 300, 300\n",
+ "if True:\n",
+ " for row in df.itertuples():\n",
+ " img = cv2.imread(str(row.raw_image))\n",
+ " # plt.imshow(img)\n",
+ " with open(row.raw_txt) as f:\n",
+ " lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)\n",
+ " \n",
+ " dh, dw = img.shape[:2]\n",
+ " x, y, w, h = lb[0][1:]\n",
+ " top = int((y - h / 2.) * dh)\n",
+ " left = int((x - w / 2.) * dw)\n",
+ " height = int(dh*h)\n",
+ " width = int(dw*w)\n",
+ " \n",
+ " cropped_image = img[top:top+height, left:left+width]\n",
+ " f1 = maxwidth / cropped_image.shape[1]\n",
+ " f2 = maxheight / cropped_image.shape[0]\n",
+ " f = min(f1, f2) # resizing factor\n",
+ " dim = (int(cropped_image.shape[1] * f), int(cropped_image.shape[0] * f))\n",
+ " cropped_image = cv2.resize(cropped_image, dim)\n",
+ " # plt.imshow(cropped_image)\n",
+ " \n",
+ " ds = 'train' if row.split in ['train','val'] else 'test'\n",
+ " fname = root / ds / 'images' / (row.raw_txt.stem + '.jpg')\n",
+ " # print(fname)\n",
+ " fname.parent.mkdir(parents=True, exist_ok=True)\n",
+ " cv2.imwrite(str(fname), cropped_image)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}