-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Labels
enhancementNew feature or requestNew feature or requesthelp wantedExtra attention is neededExtra attention is needed
Description
HowTimeFlies/scripts/01_getimages_google_imagesearch.py
Lines 59 to 117 in b7beeca
| print("wikipedia extraction for the year...") | |
| saved_folder = f'Wiki_images/{this_year}' | |
| if not os.path.exists(saved_folder): | |
| os.mkdir(saved_folder) | |
| try: | |
| iterations = download_wiki_images(this_year,saved_folder,iterations) | |
| except Exception as e: | |
| print("error with wikipedia data pull") | |
| print(e) | |
| """ | |
| def download_wiki_images(this_year, saved_folder, iterations): | |
| aspect_one = this_year | |
| iterations = iterations + 1 | |
| if iterations % 20 == 0: | |
| time.sleep(15) | |
| wikipedia_image = ( | |
| f"https://commons.wikimedia.org/wiki/Category:{str(this_year)}_photographs" | |
| ) | |
| print("\n", wikipedia_image) | |
| response = requests.get(wikipedia_image, headers=user_agent) | |
| # print(response) | |
| html = response.text | |
| soup = BeautifulSoup(html, "html.parser") | |
| results = soup.findAll("div", {"class": "gallerytext"}) | |
| count = 1 | |
| links = [] | |
| for result in results: | |
| try: | |
| link = result["data-src"] | |
| links.append(link) | |
| count += 1 | |
| if count > n_images: | |
| break | |
| except KeyError: | |
| continue | |
| # totals = totals + len(links) | |
| print(f"Downloading {len(links)} images...") | |
| for i, link in enumerate(links): | |
| response = requests.get(link) | |
| image_name = saved_folder + "/" + data + str(i + 1) + ".jpg" | |
| with open(image_name, "wb") as fh: | |
| fh.write(response.content) | |
| # print(f'Running total of collected images:{totals}') | |
| return iterations | |
| def download_images( |
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or requesthelp wantedExtra attention is neededExtra attention is needed