use this function to get the wiki extract working. · Issue #2 · grahamwaters/HowTimeFlies

HowTimeFlies/scripts/01_getimages_google_imagesearch.py

Lines 59 to 117 in b7beeca

    
                   print("wikipedia extraction for the year...") 
        
                   saved_folder = f'Wiki_images/{this_year}' 
        
                   if not os.path.exists(saved_folder): 
        
                       os.mkdir(saved_folder) 
        
                   try: 
        
                       iterations = download_wiki_images(this_year,saved_folder,iterations) 
        
                   except Exception as e: 
        
                       print("error with wikipedia data pull") 
        
                       print(e) 
        
                   """ 
        
           def download_wiki_images(this_year, saved_folder, iterations): 
        
               aspect_one = this_year 
        
               iterations = iterations + 1 
        
               if iterations % 20 == 0: 
        
                   time.sleep(15) 
        
               wikipedia_image = ( 
        
                   f"https://commons.wikimedia.org/wiki/Category:{str(this_year)}_photographs" 
        
               ) 
        
               print("\n", wikipedia_image) 
        
               response = requests.get(wikipedia_image, headers=user_agent) 
        
               # print(response) 
        
               html = response.text 
        
               soup = BeautifulSoup(html, "html.parser") 
        
               results = soup.findAll("div", {"class": "gallerytext"}) 
        
               count = 1 
        
               links = [] 
        
               for result in results: 
        
                   try: 
        
                       link = result["data-src"] 
        
                       links.append(link) 
        
                       count += 1 
        
                       if count > n_images: 
        
                           break 
        
                   except KeyError: 
        
                       continue 
        
               # totals = totals + len(links) 
        
               print(f"Downloading {len(links)} images...") 
        
               for i, link in enumerate(links): 
        
                   response = requests.get(link) 
        
                   image_name = saved_folder + "/" + data + str(i + 1) + ".jpg" 
        
                   with open(image_name, "wb") as fh: 
        
                       fh.write(response.content) 
        
               # print(f'Running total of collected images:{totals}') 
        
               return iterations 
        
           def download_images(

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

use this function to get the wiki extract working. #2

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

	print("wikipedia extraction for the year...")
	saved_folder = f'Wiki_images/{this_year}'
	if not os.path.exists(saved_folder):
	os.mkdir(saved_folder)
	try:
	iterations = download_wiki_images(this_year,saved_folder,iterations)
	except Exception as e:
	print("error with wikipedia data pull")
	print(e)
	"""


	def download_wiki_images(this_year, saved_folder, iterations):

	aspect_one = this_year

	iterations = iterations + 1
	if iterations % 20 == 0:
	time.sleep(15)

	wikipedia_image = (
	f"https://commons.wikimedia.org/wiki/Category:{str(this_year)}_photographs"
	)
	print("\n", wikipedia_image)
	response = requests.get(wikipedia_image, headers=user_agent)
	# print(response)
	html = response.text

	soup = BeautifulSoup(html, "html.parser")

	results = soup.findAll("div", {"class": "gallerytext"})

	count = 1
	links = []
	for result in results:
	try:
	link = result["data-src"]
	links.append(link)
	count += 1
	if count > n_images:
	break

	except KeyError:
	continue
	# totals = totals + len(links)
	print(f"Downloading {len(links)} images...")

	for i, link in enumerate(links):
	response = requests.get(link)

	image_name = saved_folder + "/" + data + str(i + 1) + ".jpg"

	with open(image_name, "wb") as fh:
	fh.write(response.content)
	# print(f'Running total of collected images:{totals}')
	return iterations


	def download_images(

use this function to get the wiki extract working. #2

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions