HoverHell · mrraghur · Mar 19, 2023 · May 25, 2023 · May 26, 2023 · May 27, 2023
diff --git a/redditdownload/gfycat.py b/redditdownload/gfycat.py
@@ -23,14 +23,14 @@ def __init__(self):
         super(gfycat, self).__init__()
 
     def __fetch(self, url, param):
-        import urllib2
+        import urllib
         import json
         try:
             # added simple User-Ajent string to avoid CloudFlare block this request
             headers = {'User-Agent': 'Mozilla/5.0'}
-            req = urllib2.Request(url+param, None, headers)
-            connection = urllib2.urlopen(req).read()
-        except urllib2.HTTPError, err:
+            req = urllib.request.Request(url+param, None, headers)
+            connection = urllib.request.urlopen(req).read()
+        except urllib.request.HTTPError as err:
             raise ValueError(err.read())
         result = namedtuple("result", "raw json")
         return result(raw=connection, json=json.loads(connection))
@@ -117,22 +117,22 @@ def get(self, what):
             return ("Sorry, can't find %s" % error)
 
     def download(self, location):
-        import urllib2
+        import urllib
         if not location.endswith(".mp4"):
             location = location + self.get("gfyName") + ".mp4"
         try:
             # added simple User-Ajent string to avoid CloudFlare block this request
             headers = {'User-Agent': 'Mozilla/5.0'}
-            req = urllib2.Request(self.get("mp4Url"), None, headers)
-            file = urllib2.urlopen(req)
+            req = urllib.Request(self.get("mp4Url"), None, headers)
+            file = urllib.urlopen(req)
             # make sure that the status code is 200, and the content type is mp4
-            if int(file.code) is not 200 or file.headers["content-type"] != "video/mp4":
+            if int(file.code) != 200 or file.headers["content-type"] != "video/mp4":
                 raise ValueError("Problem downlading the file. Status code is %s or the content-type is not right %s"
                     % (file.code, file.headers["content-type"]))
             data = file.read()
             with open(location, "wb") as mp4:
                 mp4.write(data)
-        except urllib2.HTTPError, err:
+        except urllib.error.HTTPError as err:
             raise ValueError(err.read())
 
     def formated(self, ignoreNull=False):

diff --git a/redditdownload/reddit.py b/redditdownload/reddit.py
@@ -2,8 +2,9 @@
 """Return list of items from a sub-reddit of reddit.com."""
 
 import sys
-import HTMLParser
-from urllib2 import urlopen, Request, HTTPError
+from html.parser import HTMLParser
+import html
+from urllib.request import urlopen, Request, HTTPError
 from json import JSONDecoder
 
 
@@ -33,15 +34,15 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
         if '/m/' not in subreddit:
             warning = ('That doesn\'t look like a multireddit. Are you sure'
                        'you need that multireddit flag?')
-            print warning
+            print (warning)
             sys.exit(1)
         url = 'http://www.reddit.com/user/%s.json' % subreddit
     if not multireddit:
         if '/m/' in subreddit:
             warning = ('It looks like you are trying to fetch a multireddit. \n'
                        'Check the multireddit flag. '
                        'Call --help for more info')
-            print warning
+            print (warning)
             sys.exit(1)
         # no sorting needed
         if reddit_sort is None:
@@ -96,6 +97,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
     try:
         req = Request(url, headers=hdr)
         json = urlopen(req).read()
+        json = json.decode('ISO-8859-1')
         data = JSONDecoder().decode(json)
         if isinstance(data, dict):
             items = [x['data'] for x in data['data']['children']]
@@ -119,9 +121,9 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
     # returns `url` values html-escaped, whereas we normally need them
     # in the way they are meant to be downloaded (i.e. urlquoted at
     # most).
-    htmlparser = HTMLParser.HTMLParser()
+    htmlparser = HTMLParser()#.HTMLParser()
     for item in items:
         if item.get('url'):
-            item['url'] = htmlparser.unescape(item['url'])
+            item['url'] = html.unescape(item['url'])
 
     return items
diff --git a/redditdownload/redditdownload.py b/redditdownload/redditdownload.py
@@ -1,25 +1,33 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 """Download images from a reddit.com subreddit."""
 
 from __future__ import print_function
 
 import os
 import re
-import StringIO
+from io import StringIO
 import sys
 import logging
-from urllib2 import urlopen, HTTPError, URLError
-from httplib import InvalidURL
+import praw
+# from dotenv import load_dotenv
+from urllib.request import urlopen, HTTPError, URLError
+from http.client import InvalidURL
 from argparse import ArgumentParser
 from os.path import (
     exists as pathexists, join as pathjoin, basename as pathbasename,
     splitext as pathsplitext)
 from os import mkdir, getcwd
 import time
+import pdb
+import nltk
+import textwrap
+# nltk.download('punkt')
+# nltk.download('averaged_perceptron_tagger')
 
 from .gfycat import gfycat
 from .reddit import getitems
 from .deviantart import process_deviant_url
+from PIL import Image, ImageDraw, ImageFont, ImageColor
 
 
 _log = logging.getLogger('redditdownload')
@@ -29,7 +37,7 @@ def request(url, *ar, **kwa):
     _retries = kwa.pop('_retries', 4)
     _retry_pause = kwa.pop('_retry_pause', 0)
     res = None
-    for _try in xrange(_retries):
+    for _try in range(_retries):
         try:
             res = urlopen(url, *ar, **kwa)
         except Exception as exc:
@@ -99,6 +107,7 @@ def extract_imgur_album_urls(album_url):
     return urls
 
 
+
 def download_from_url(url, dest_file):
     """
     Attempt to download file specified by url to 'dest_file'
@@ -232,8 +241,12 @@ def slugify(value):
     # with some modification
     import unicodedata
     value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
-    value = unicode(re.sub(r'[^\w\s-]', '', value).strip())
-    # value = re.sub(r'[-\s]+', '-', value) # not replacing space with hypen
+    tmp0 = re.sub(r'[^\w\s-]', '', value.decode())
+    tmp1 = tmp0.strip()
+    value = str(tmp1)
+    #value = unicode(re.sub(r'[^\w\s-]', '', value.decode()).strip())
+    #value = str(re.sub(r'[^\w\s-]', '', value.decode()).strip())
+    #value = re.sub(r'[-\s]+', '-', value) # not replacing space with hypen
     return value
 
 
@@ -294,8 +307,102 @@ def parse_reddit_argument(reddit_args):
         # print in one line but with nicer format
         return 'Downloading images from "{}" subreddit'.format(', '.join(reddit_args.split('+')))
 
+#Read a file, and write back file name to the image itself.
+#Write in some random localtion, using a default font that exists everywhere.
+#Note that I just want to get some o/p out. I can (hopefully) improve it later.
+#TODO: This writing only works for still images, not for gifs.
+
+
+def extract_nouns(text):
+    nouns = []
+    tokens = nltk.word_tokenize(text)
+    tagged_words = nltk.pos_tag(tokens)
+    for word, pos in tagged_words:
+        if pos.startswith('N'):
+            nouns.append(word)
+    return nouns
+
+def writeTitleIntoImage(filename):
+    img = Image.open(filename)
+    draw = ImageDraw.Draw(img)
+    textToWrite0 = filename
+    textToWrite00 = extract_nouns(textToWrite0)
+    textToWrite1 = ' '.join(textToWrite00)
+    myFont = ImageFont.truetype('FreeMono.ttf', 55)
+
+    pattern_order = ['x', 'OC', r'\.jpg', r'\.jpeg', r'[0-9]', r'\.png', r'\.webm', r'\.gifs']
+
+    for pattern in pattern_order:
+        if re.search(pattern, textToWrite1):
+            textToWrite2 = re.sub(pattern, '', textToWrite1)
+            textToWrite1 = textToWrite2
+
+    textToWrite = textToWrite1.capitalize()
+
+    text_width, text_height = draw.textsize(textToWrite, font=myFont)
+    img_width, img_height = img.size
+
+    if img_width < img_height:
+        position = ((img_height - img_width) // 2, img_height - text_height - 200)
+    else:
+        position = ((img_width - img_height) // 2, img_height - text_height - 200)
+
+    draw.rectangle(
+        [(position[0] - 10, position[1] - 5), (position[0] + text_width + 10, position[1] + text_height + 20)],
+        fill='white')
+    draw.text(position, textToWrite, font=myFont, fill='blue')
+    # img.show()
+    img.save(filename)  # Write to the same file!
+
+    # draw.text((140, 100), textToWrite, font=myFont, fill='black')
+    # draw.text((540, 120), textToWrite, font=myFont, fill='gray')
+    # draw.text((140, 520), textToWrite, font=myFont, fill='yellow')
+
+
+
+# def configure():
+#     load_dotenv()
+
+def get_first_comment_from_post(post_id):
+
+    reddit = praw.Reddit(client_id='',
+                         client_secret='',
+                         user_agent=''
+                         )
+    # return url
+
+    # post_id = '13vxtfl'
+
+    post = reddit.submission(id=post_id)
+
+    first_comment = post.comments[1].body
+
+    return first_comment
+
+
+def writeCommentIntoImage(filename, url):
+    img = Image.open(filename)
+    draw = ImageDraw.Draw(img)
+    myFont = ImageFont.truetype('FreeMono.ttf', 55)
+    textToWrite = get_first_comment_from_post(url)
+    text_width, text_height = draw.textsize(textToWrite, font=myFont)
+    img_width, img_height = img.size
+
+    if img_width < img_height:
+        position = ((img_height - img_width) // 2, img_height - text_height - 100)
+    else:
+        position = ((img_width - img_height) // 2, img_height - text_height - 100)
+
+    draw.rectangle(
+        [(position[0] - 10, position[1] - 5), (position[0] + text_width + 10, position[1] + text_height + 20)],
+        fill='white')
+    draw.text(position, textToWrite, font=myFont, fill='blue')
+    # img.show()
+    img.save(filename)  # Write to the same file!
+
 
 def main():
+    # configure()
     ARGS = parse_args(sys.argv[1:])
 
     logging.basicConfig(level=logging.INFO)
@@ -332,22 +439,24 @@ def main():
 
         # measure time and set the program to wait 4 second between request
         # as per reddit api guidelines
-        end_time = time.clock()
+        end_time = time.perf_counter()
 
         if start_time is not None:
             elapsed_time = end_time - start_time
 
             if elapsed_time <= 4:  # throttling
                 time.sleep(4 - elapsed_time)
 
-        start_time = time.clock()
+        start_time = time.perf_counter()
 
         if not ITEMS:
             # No more items to process
             break
 
         for ITEM in ITEMS:
             TOTAL += 1
+            # data = json.loads(ITEM)
+            comment_url = ITEM["id"]
 
             # not downloading if url is reddit comment
             if ('reddit.com/r/' + ARGS.reddit + '/comments/' in ITEM['url'] or
@@ -439,6 +548,7 @@ def main():
                         raise URLError('Url is empty')
                     else:
                         text_templ = '    Attempting to download URL[{}] as [{}].'
+                        #pdb.set_trace()
                         print(text_templ.format(URL.encode('utf-8'), FILENAME.encode('utf-8')))
 
                     # Download the image
@@ -448,6 +558,11 @@ def main():
                         print('    Sucessfully downloaded URL [%s] as [%s].' % (URL, FILENAME))
                         DOWNLOADED += 1
                         FILECOUNT += 1
+                        #DOwnload successful. Now write the file name INTO the IMAGE.
+                        #If an exception is thrown, it is caught and we move on to next picture/gif
+                        writeTitleIntoImage(FILENAME)
+                        comm = get_first_comment_from_post(comment_url)
+                        writeCommentIntoImage(FILENAME, comm)
 
                     except Exception as exc:
                         print('    %s' % (exc,))