Skip to content

use this function to get the wiki extract working.  #2

@grahamwaters

Description

@grahamwaters

print("wikipedia extraction for the year...")
saved_folder = f'Wiki_images/{this_year}'
if not os.path.exists(saved_folder):
os.mkdir(saved_folder)
try:
iterations = download_wiki_images(this_year,saved_folder,iterations)
except Exception as e:
print("error with wikipedia data pull")
print(e)
"""
def download_wiki_images(this_year, saved_folder, iterations):
aspect_one = this_year
iterations = iterations + 1
if iterations % 20 == 0:
time.sleep(15)
wikipedia_image = (
f"https://commons.wikimedia.org/wiki/Category:{str(this_year)}_photographs"
)
print("\n", wikipedia_image)
response = requests.get(wikipedia_image, headers=user_agent)
# print(response)
html = response.text
soup = BeautifulSoup(html, "html.parser")
results = soup.findAll("div", {"class": "gallerytext"})
count = 1
links = []
for result in results:
try:
link = result["data-src"]
links.append(link)
count += 1
if count > n_images:
break
except KeyError:
continue
# totals = totals + len(links)
print(f"Downloading {len(links)} images...")
for i, link in enumerate(links):
response = requests.get(link)
image_name = saved_folder + "/" + data + str(i + 1) + ".jpg"
with open(image_name, "wb") as fh:
fh.write(response.content)
# print(f'Running total of collected images:{totals}')
return iterations
def download_images(

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or requesthelp wantedExtra attention is needed

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions