diff --git a/README.md b/README.md index a2697be..d8312d0 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Author: Kaicheng Yang, Jiankang Deng, Xiang An, Jiawei Li, Ziyong Feng, Jia Guo, 1、Download YFCC15M - The YFCC15M dataset we used is [YFCC15M-DeCLIP](https://arxiv.org/abs/2110.05208), we download it from the [repo](https://github.com/AdamRain/YFCC15M_downloader), finally we successful donwload 15061515 image-text pairs. + The YFCC15M dataset we used is [YFCC15M-DeCLIP](https://arxiv.org/abs/2110.05208), we download it from the [repo](https://github.com/AdamRain/YFCC15M_downloader), finally we successful download 15061515 image-text pairs. 2、Generate synthetic caption @@ -40,7 +40,7 @@ Author: Kaicheng Yang, Jiankang Deng, Xiang An, Jiawei Li, Ziyong Feng, Jia Guo, - ### Pretrained Model Weight - You can download the pretrained model weight from [Google Drive](https://drive.google.com/file/d/1AqSHisCKZOZ16Q3sYguK6zIZIuwwEriE/view?usp=share_link) or [BaiduYun](https://pan.baidu.com/s/10dFfvGMWeaTXUyrZlZlCEw?pwd=xftg), and you can find the traning log in [Google Drive](https://drive.google.com/file/d/1I8gdSQCJAfFamDcVztwW8EQIc_OOK8Xh/view?usp=share_link) or [BaiduYun](https://pan.baidu.com/s/1oz0UVzX2N0Sri7MfwR-kog?pwd=7ki7) + You can download the pretrained model weight from [Google Drive](https://drive.google.com/file/d/1AqSHisCKZOZ16Q3sYguK6zIZIuwwEriE/view?usp=share_link) or [BaiduYun](https://pan.baidu.com/s/10dFfvGMWeaTXUyrZlZlCEw?pwd=xftg), and you can find the training log in [Google Drive](https://drive.google.com/file/d/1I8gdSQCJAfFamDcVztwW8EQIc_OOK8Xh/view?usp=share_link) or [BaiduYun](https://pan.baidu.com/s/1oz0UVzX2N0Sri7MfwR-kog?pwd=7ki7) - ### Training @@ -51,7 +51,7 @@ Author: Kaicheng Yang, Jiankang Deng, Xiang An, Jiawei Li, Ziyong Feng, Jia Guo, - ### Evaluation - Evaluate zero shot cross-modal retireval + Evaluate zero shot cross-modal retrieval ``` bash run_retrieval.sh diff --git a/text_image_retrieval.py b/text_image_retrieval.py index 22a5dde..6740df4 100644 --- a/text_image_retrieval.py +++ b/text_image_retrieval.py @@ -8,7 +8,7 @@ 'flickr': flickr30k} def compute_retrieval(similarity_scores, txt2img, img2txt): - # comput text -> image + # compute text -> image t2i_similarity_score = similarity_scores.t() t2i_ranks = torch.zeros(t2i_similarity_score.shape[0]) @@ -23,7 +23,7 @@ def compute_retrieval(similarity_scores, txt2img, img2txt): tr10 = 100.0 * len(torch.where(t2i_ranks < 10)[0]) / len(t2i_ranks) t2i_report_dict = {"r1": tr1, "r5": tr5, "r10": tr10} - #comput image -> text + #compute image -> text i2t_similarity_score = similarity_scores i2t_ranks = torch.zeros(i2t_similarity_score.shape[0]) for index, score in enumerate(i2t_similarity_score): @@ -136,4 +136,4 @@ def main(args): parser.add_argument("--model-weight", default= "/mnt/laion/clip/vit_b_16-laion400m_e32-55e67d44.pt") parser.add_argument("--input-size", default=224, type=int, help="Image resolution.") args = parser.parse_args() - main(args) \ No newline at end of file + main(args)