diff --git a/fastclass/fc_download.py b/fastclass/fc_download.py index 9431f10..19d8f15 100755 --- a/fastclass/fc_download.py +++ b/fastclass/fc_download.py @@ -66,27 +66,22 @@ def crawl( for c in crawlers: print(f" -> {c}") - if c == "GOOGLE": - google_crawler = GoogleImageCrawler( + if c == "BAIDU": + baidu_crawler = BaiduImageCrawler( downloader_cls=CustomDownloader, - parser_cls=GoogleParser, log_level=logging.CRITICAL, - feeder_threads=1, - parser_threads=1, - downloader_threads=4, storage={"root_dir": folder}, ) - - google_crawler.crawl( + baidu_crawler.crawl( keyword=search, offset=0, max_num=maxnum, min_size=(200, 200), max_size=None, - file_idx_offset=0, + file_idx_offset="auto", ) - if c == "BING": + elif c == "BING": bing_crawler = BingImageCrawler( downloader_cls=CustomDownloader, log_level=logging.CRITICAL, @@ -101,22 +96,7 @@ def crawl( file_idx_offset="auto", ) - if c == "BAIDU": - baidu_crawler = BaiduImageCrawler( - downloader_cls=CustomDownloader, - log_level=logging.CRITICAL, - storage={"root_dir": folder}, - ) - baidu_crawler.crawl( - keyword=search, - offset=0, - max_num=maxnum, - min_size=(200, 200), - max_size=None, - file_idx_offset="auto", - ) - - if c == "FLICKR": + elif c == "FLICKR": flick_api_key = os.environ.get("FLICKR_API_KEY") if not flick_api_key: print( @@ -140,6 +120,26 @@ def crawl( file_idx_offset="auto", ) + elif c == "GOOGLE": + google_crawler = GoogleImageCrawler( + downloader_cls=CustomDownloader, + parser_cls=GoogleParser, + log_level=logging.CRITICAL, + feeder_threads=1, + parser_threads=1, + downloader_threads=4, + storage={"root_dir": folder}, + ) + + google_crawler.crawl( + keyword=search, + offset=0, + max_num=maxnum, + min_size=(200, 200), + max_size=None, + file_idx_offset=0, + ) + return {k: v for k, v in CustomDownloader.registry.items() if k is not None}