Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions redditdownload/gfycat.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ def __init__(self):
super(gfycat, self).__init__()

def __fetch(self, url, param):
import urllib2
import urllib
import json
try:
# added simple User-Ajent string to avoid CloudFlare block this request
headers = {'User-Agent': 'Mozilla/5.0'}
req = urllib2.Request(url+param, None, headers)
connection = urllib2.urlopen(req).read()
except urllib2.HTTPError, err:
req = urllib.request.Request(url+param, None, headers)
connection = urllib.request.urlopen(req).read()
except urllib.request.HTTPError as err:
raise ValueError(err.read())
result = namedtuple("result", "raw json")
return result(raw=connection, json=json.loads(connection))
Expand Down Expand Up @@ -117,22 +117,22 @@ def get(self, what):
return ("Sorry, can't find %s" % error)

def download(self, location):
import urllib2
import urllib
if not location.endswith(".mp4"):
location = location + self.get("gfyName") + ".mp4"
try:
# added simple User-Ajent string to avoid CloudFlare block this request
headers = {'User-Agent': 'Mozilla/5.0'}
req = urllib2.Request(self.get("mp4Url"), None, headers)
file = urllib2.urlopen(req)
req = urllib.Request(self.get("mp4Url"), None, headers)
file = urllib.urlopen(req)
# make sure that the status code is 200, and the content type is mp4
if int(file.code) is not 200 or file.headers["content-type"] != "video/mp4":
if int(file.code) != 200 or file.headers["content-type"] != "video/mp4":
raise ValueError("Problem downlading the file. Status code is %s or the content-type is not right %s"
% (file.code, file.headers["content-type"]))
data = file.read()
with open(location, "wb") as mp4:
mp4.write(data)
except urllib2.HTTPError, err:
except urllib.error.HTTPError as err:
raise ValueError(err.read())

def formated(self, ignoreNull=False):
Expand Down
14 changes: 8 additions & 6 deletions redditdownload/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
"""Return list of items from a sub-reddit of reddit.com."""

import sys
import HTMLParser
from urllib2 import urlopen, Request, HTTPError
from html.parser import HTMLParser
import html
from urllib.request import urlopen, Request, HTTPError
from json import JSONDecoder


Expand Down Expand Up @@ -33,15 +34,15 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
if '/m/' not in subreddit:
warning = ('That doesn\'t look like a multireddit. Are you sure'
'you need that multireddit flag?')
print warning
print (warning)
sys.exit(1)
url = 'http://www.reddit.com/user/%s.json' % subreddit
if not multireddit:
if '/m/' in subreddit:
warning = ('It looks like you are trying to fetch a multireddit. \n'
'Check the multireddit flag. '
'Call --help for more info')
print warning
print (warning)
sys.exit(1)
# no sorting needed
if reddit_sort is None:
Expand Down Expand Up @@ -96,6 +97,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
try:
req = Request(url, headers=hdr)
json = urlopen(req).read()
json = json.decode('ISO-8859-1')
data = JSONDecoder().decode(json)
if isinstance(data, dict):
items = [x['data'] for x in data['data']['children']]
Expand All @@ -119,9 +121,9 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
# returns `url` values html-escaped, whereas we normally need them
# in the way they are meant to be downloaded (i.e. urlquoted at
# most).
htmlparser = HTMLParser.HTMLParser()
htmlparser = HTMLParser()#.HTMLParser()
for item in items:
if item.get('url'):
item['url'] = htmlparser.unescape(item['url'])
item['url'] = html.unescape(item['url'])

return items
133 changes: 124 additions & 9 deletions redditdownload/redditdownload.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,33 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
"""Download images from a reddit.com subreddit."""

from __future__ import print_function

import os
import re
import StringIO
from io import StringIO
import sys
import logging
from urllib2 import urlopen, HTTPError, URLError
from httplib import InvalidURL
import praw
# from dotenv import load_dotenv
from urllib.request import urlopen, HTTPError, URLError
from http.client import InvalidURL
from argparse import ArgumentParser
from os.path import (
exists as pathexists, join as pathjoin, basename as pathbasename,
splitext as pathsplitext)
from os import mkdir, getcwd
import time
import pdb
import nltk
import textwrap
# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')

from .gfycat import gfycat
from .reddit import getitems
from .deviantart import process_deviant_url
from PIL import Image, ImageDraw, ImageFont, ImageColor


_log = logging.getLogger('redditdownload')
Expand All @@ -29,7 +37,7 @@ def request(url, *ar, **kwa):
_retries = kwa.pop('_retries', 4)
_retry_pause = kwa.pop('_retry_pause', 0)
res = None
for _try in xrange(_retries):
for _try in range(_retries):
try:
res = urlopen(url, *ar, **kwa)
except Exception as exc:
Expand Down Expand Up @@ -99,6 +107,7 @@ def extract_imgur_album_urls(album_url):
return urls



def download_from_url(url, dest_file):
"""
Attempt to download file specified by url to 'dest_file'
Expand Down Expand Up @@ -232,8 +241,12 @@ def slugify(value):
# with some modification
import unicodedata
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
value = unicode(re.sub(r'[^\w\s-]', '', value).strip())
# value = re.sub(r'[-\s]+', '-', value) # not replacing space with hypen
tmp0 = re.sub(r'[^\w\s-]', '', value.decode())
tmp1 = tmp0.strip()
value = str(tmp1)
#value = unicode(re.sub(r'[^\w\s-]', '', value.decode()).strip())
#value = str(re.sub(r'[^\w\s-]', '', value.decode()).strip())
#value = re.sub(r'[-\s]+', '-', value) # not replacing space with hypen
return value


Expand Down Expand Up @@ -294,8 +307,102 @@ def parse_reddit_argument(reddit_args):
# print in one line but with nicer format
return 'Downloading images from "{}" subreddit'.format(', '.join(reddit_args.split('+')))

#Read a file, and write back file name to the image itself.
#Write in some random localtion, using a default font that exists everywhere.
#Note that I just want to get some o/p out. I can (hopefully) improve it later.
#TODO: This writing only works for still images, not for gifs.


def extract_nouns(text):
nouns = []
tokens = nltk.word_tokenize(text)
tagged_words = nltk.pos_tag(tokens)
for word, pos in tagged_words:
if pos.startswith('N'):
nouns.append(word)
return nouns

def writeTitleIntoImage(filename):
img = Image.open(filename)
draw = ImageDraw.Draw(img)
textToWrite0 = filename
textToWrite00 = extract_nouns(textToWrite0)
textToWrite1 = ' '.join(textToWrite00)
myFont = ImageFont.truetype('FreeMono.ttf', 55)

pattern_order = ['x', 'OC', r'\.jpg', r'\.jpeg', r'[0-9]', r'\.png', r'\.webm', r'\.gifs']

for pattern in pattern_order:
if re.search(pattern, textToWrite1):
textToWrite2 = re.sub(pattern, '', textToWrite1)
textToWrite1 = textToWrite2

textToWrite = textToWrite1.capitalize()

text_width, text_height = draw.textsize(textToWrite, font=myFont)
img_width, img_height = img.size

if img_width < img_height:
position = ((img_height - img_width) // 2, img_height - text_height - 200)
else:
position = ((img_width - img_height) // 2, img_height - text_height - 200)

draw.rectangle(
[(position[0] - 10, position[1] - 5), (position[0] + text_width + 10, position[1] + text_height + 20)],
fill='white')
draw.text(position, textToWrite, font=myFont, fill='blue')
# img.show()
img.save(filename) # Write to the same file!

# draw.text((140, 100), textToWrite, font=myFont, fill='black')
# draw.text((540, 120), textToWrite, font=myFont, fill='gray')
# draw.text((140, 520), textToWrite, font=myFont, fill='yellow')



# def configure():
# load_dotenv()

def get_first_comment_from_post(post_id):

reddit = praw.Reddit(client_id='',
client_secret='',
user_agent=''
)
# return url

# post_id = '13vxtfl'

post = reddit.submission(id=post_id)

first_comment = post.comments[1].body

return first_comment


def writeCommentIntoImage(filename, url):
img = Image.open(filename)
draw = ImageDraw.Draw(img)
myFont = ImageFont.truetype('FreeMono.ttf', 55)
textToWrite = get_first_comment_from_post(url)
text_width, text_height = draw.textsize(textToWrite, font=myFont)
img_width, img_height = img.size

if img_width < img_height:
position = ((img_height - img_width) // 2, img_height - text_height - 100)
else:
position = ((img_width - img_height) // 2, img_height - text_height - 100)

draw.rectangle(
[(position[0] - 10, position[1] - 5), (position[0] + text_width + 10, position[1] + text_height + 20)],
fill='white')
draw.text(position, textToWrite, font=myFont, fill='blue')
# img.show()
img.save(filename) # Write to the same file!


def main():
# configure()
ARGS = parse_args(sys.argv[1:])

logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -332,22 +439,24 @@ def main():

# measure time and set the program to wait 4 second between request
# as per reddit api guidelines
end_time = time.clock()
end_time = time.perf_counter()

if start_time is not None:
elapsed_time = end_time - start_time

if elapsed_time <= 4: # throttling
time.sleep(4 - elapsed_time)

start_time = time.clock()
start_time = time.perf_counter()

if not ITEMS:
# No more items to process
break

for ITEM in ITEMS:
TOTAL += 1
# data = json.loads(ITEM)
comment_url = ITEM["id"]

# not downloading if url is reddit comment
if ('reddit.com/r/' + ARGS.reddit + '/comments/' in ITEM['url'] or
Expand Down Expand Up @@ -439,6 +548,7 @@ def main():
raise URLError('Url is empty')
else:
text_templ = ' Attempting to download URL[{}] as [{}].'
#pdb.set_trace()
print(text_templ.format(URL.encode('utf-8'), FILENAME.encode('utf-8')))

# Download the image
Expand All @@ -448,6 +558,11 @@ def main():
print(' Sucessfully downloaded URL [%s] as [%s].' % (URL, FILENAME))
DOWNLOADED += 1
FILECOUNT += 1
#DOwnload successful. Now write the file name INTO the IMAGE.
#If an exception is thrown, it is caught and we move on to next picture/gif
writeTitleIntoImage(FILENAME)
comm = get_first_comment_from_post(comment_url)
writeCommentIntoImage(FILENAME, comm)

except Exception as exc:
print(' %s' % (exc,))
Expand Down