From ac6d255f856f674156f250524b33f725c6b9030b Mon Sep 17 00:00:00 2001 From: Michael Durso Jr Date: Sun, 28 Oct 2018 10:22:16 +0000 Subject: [PATCH 1/3] Fixing a README typo. --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index febc64f..3f59f7b 100644 --- a/readme.md +++ b/readme.md @@ -70,7 +70,7 @@ wallpaper would be as follows: And to run the same query but only get new images you don't already have, run the following: - python redditdl.py wallpaper wallpaper --score 50 -update + python redditdl.py wallpaper wallpaper --score 50 --update For getting some nice pictures of cats in your catsfolder (wich will be created if it doesn't exist yet) run: From 33a04e75d1b52c2e984979e69d29336126d3248d Mon Sep 17 00:00:00 2001 From: Michael Durso Jr Date: Sun, 28 Oct 2018 10:22:41 +0000 Subject: [PATCH 2/3] Adding a few file and directory patterns to the git ignore. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 60c4470..d03caf9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ */*.jpg +*/*.png +*.egg-info *.swp *.bak *.DS_Store @@ -12,4 +14,5 @@ /*.webm /gfycat /build +/dist /.pydevproject From 49789035beb30d51c4e5227c8f56e2e66897355a Mon Sep 17 00:00:00 2001 From: Michael Durso Jr Date: Sun, 28 Oct 2018 10:27:04 +0000 Subject: [PATCH 3/3] Added python3 support. --- redditdownload/__init__.py | 5 +++++ redditdownload/gfycat.py | 4 ++-- redditdownload/img_scrap_stuff.py | 22 ++++++++++++---------- redditdownload/reddit.py | 19 +++++++++++++------ redditdownload/redditdownload.py | 31 ++++++++++++++++++++----------- redditdownload/scrap_wrongies.py | 2 +- 6 files changed, 53 insertions(+), 30 deletions(-) diff --git a/redditdownload/__init__.py b/redditdownload/__init__.py index 8d43315..49634ab 100644 --- a/redditdownload/__init__.py +++ b/redditdownload/__init__.py @@ -1 +1,6 @@ from redditdownload import * + + +def running_python2(): + from platform import python_version_tuple + return int(python_version_tuple()[0]) == 2 diff --git a/redditdownload/gfycat.py b/redditdownload/gfycat.py index 41debf2..30ccff3 100644 --- a/redditdownload/gfycat.py +++ b/redditdownload/gfycat.py @@ -30,7 +30,7 @@ def __fetch(self, url, param): headers = {'User-Agent': 'Mozilla/5.0'} req = urllib2.Request(url+param, None, headers) connection = urllib2.urlopen(req).read() - except urllib2.HTTPError, err: + except urllib2.HTTPError as err: raise ValueError(err.read()) result = namedtuple("result", "raw json") return result(raw=connection, json=json.loads(connection)) @@ -132,7 +132,7 @@ def download(self, location): data = file.read() with open(location, "wb") as mp4: mp4.write(data) - except urllib2.HTTPError, err: + except urllib2.HTTPError as err: raise ValueError(err.read()) def formated(self, ignoreNull=False): diff --git a/redditdownload/img_scrap_stuff.py b/redditdownload/img_scrap_stuff.py index 025656d..0c536bd 100755 --- a/redditdownload/img_scrap_stuff.py +++ b/redditdownload/img_scrap_stuff.py @@ -14,15 +14,18 @@ import traceback from PIL import Image -from cStringIO import StringIO -import lxml -import html5lib # Heavily recommended for bs4 (apparently) import bs4 import requests import magic # python-magic import pyaux +from . import running_python2 + +if running_python2(): + from cStringIO import StringIO +else: + from io import StringIO # Config-ish _requests_params = dict(timeout=20, verify=False) ## Also global-ish stuff @@ -52,7 +55,7 @@ def indexall_re(topstr, substr_re): def walker(text, opening='{', closing='}'): """ A near-useless experiment that was intended for `get_all_objects` """ stack = [] - for pos in xrange(len(text)): + for pos in range(len(text)): if text[pos:pos + len(opening)] == opening: stack.append(pos) continue @@ -88,7 +91,7 @@ def get_all_objects(text, beginning=r'{', debug=False): """ def _dbg_actual(st, *ar): - print "D: ", st % ar + print("D: ", st % ar) _dbg = _dbg_actual if debug else (lambda *ar: None) @@ -106,7 +109,6 @@ def __getitem__(self, key): class TheLoader(yaml.SafeLoader): ESCAPE_REPLACEMENTS = ddd(yaml.SafeLoader.ESCAPE_REPLACEMENTS) - from cStringIO import StringIO # optimised slicing if isinstance(text, unicode): _dbg("encoding") @@ -205,7 +207,7 @@ def get_get_get(url, **kwa): params = dict(_requests_params) params.update(kwa) reqr = get_reqr() - + try: return reqr.get(url, **params) except Exception as exc: @@ -214,13 +216,13 @@ def get_get_get(url, **kwa): def get_get(*ar, **kwa): retries = kwa.pop('_xretries', 5) - for retry in xrange(retries): + for retry in range(retries): try: return get_get_get(*ar, **kwa) except Exception as exc: traceback.print_exc() ee = exc - print "On retry #%r (%s)" % (retry, repr(exc)[:30]) + print("On retry #%r (%s)" % (retry, repr(exc)[:30])) raise GetError(ee) @@ -244,7 +246,7 @@ def get(url, cache_file=None, req_params=None, bs=True, response=False, undecode for chunk in resp.iter_content(chunk_size=16384): data += chunk if len(data) > _max_len: - print "Too large" + print("Too large") break data = bytes(data) ## Have to, alas. data_bytes = data diff --git a/redditdownload/reddit.py b/redditdownload/reddit.py index 9130af7..b8a3ca3 100755 --- a/redditdownload/reddit.py +++ b/redditdownload/reddit.py @@ -2,9 +2,16 @@ """Return list of items from a sub-reddit of reddit.com.""" import sys -import HTMLParser -from urllib2 import urlopen, Request, HTTPError from json import JSONDecoder +from . import running_python2 + +if running_python2(): + import HTMLParser + from urllib2 import urlopen, Request, HTTPError +else: + from html.parser import HTMLParser + from urllib.request import urlopen, Request + from urllib.error import HTTPError def getitems(subreddit, multireddit=False, previd='', reddit_sort=None): @@ -33,7 +40,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None): if '/m/' not in subreddit: warning = ('That doesn\'t look like a multireddit. Are you sure' 'you need that multireddit flag?') - print warning + print(warning) sys.exit(1) url = 'http://www.reddit.com/user/%s.json' % subreddit if not multireddit: @@ -41,7 +48,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None): warning = ('It looks like you are trying to fetch a multireddit. \n' 'Check the multireddit flag. ' 'Call --help for more info') - print warning + print(warning) sys.exit(1) # no sorting needed if reddit_sort is None: @@ -96,7 +103,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None): try: req = Request(url, headers=hdr) json = urlopen(req).read() - data = JSONDecoder().decode(json) + data = JSONDecoder().decode(json.decode('utf-8')) if isinstance(data, dict): items = [x['data'] for x in data['data']['children']] elif isinstance(data, list): @@ -119,7 +126,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None): # returns `url` values html-escaped, whereas we normally need them # in the way they are meant to be downloaded (i.e. urlquoted at # most). - htmlparser = HTMLParser.HTMLParser() + htmlparser = HTMLParser() for item in items: if item.get('url'): item['url'] = htmlparser.unescape(item['url']) diff --git a/redditdownload/redditdownload.py b/redditdownload/redditdownload.py index 6c807c1..e6776ea 100755 --- a/redditdownload/redditdownload.py +++ b/redditdownload/redditdownload.py @@ -1,15 +1,10 @@ #!/usr/bin/env python2 """Download images from a reddit.com subreddit.""" -from __future__ import print_function - import os import re -import StringIO import sys import logging -from urllib2 import urlopen, HTTPError, URLError -from httplib import InvalidURL from argparse import ArgumentParser from os.path import ( exists as pathexists, join as pathjoin, basename as pathbasename, @@ -20,7 +15,17 @@ from .gfycat import gfycat from .reddit import getitems from .deviantart import process_deviant_url - +from . import running_python2 + +if running_python2(): + from cStringIO import StringIO + from urllib2 import urlopen, HTTPError, URLError + from httplib import InvalidURL +else: + from io import StringIO + import urllib + from urllib.request import urlopen, HTTPError, URLError + from http.client import InvalidURL _log = logging.getLogger('redditdownload') @@ -29,7 +34,7 @@ def request(url, *ar, **kwa): _retries = kwa.pop('_retries', 4) _retry_pause = kwa.pop('_retry_pause', 0) res = None - for _try in xrange(_retries): + for _try in range(_retries): try: res = urlopen(url, *ar, **kwa) except Exception as exc: @@ -83,7 +88,7 @@ def extract_imgur_album_urls(album_url): match = re.compile(r'\"hash\":\"(.[^\"]*)\",\"title\"') items = [] - memfile = StringIO.StringIO(filedata) + memfile = StringIO(filedata) for line in memfile.readlines(): results = re.findall(match, line) @@ -326,9 +331,13 @@ def main(): sort_type = sort_type.lower() while not FINISHED: - ITEMS = getitems( - ARGS.reddit, multireddit=ARGS.multireddit, previd=LAST, - reddit_sort=sort_type) + try: + ITEMS = getitems( + ARGS.reddit, multireddit=ARGS.multireddit, previd=LAST, + reddit_sort=sort_type) + except urllib.error.URLError as e: + print(' Error trying to get items: {}'.format(str(e))) + continue # measure time and set the program to wait 4 second between request # as per reddit api guidelines diff --git a/redditdownload/scrap_wrongies.py b/redditdownload/scrap_wrongies.py index 0df5dbd..ad939b5 100755 --- a/redditdownload/scrap_wrongies.py +++ b/redditdownload/scrap_wrongies.py @@ -108,7 +108,7 @@ def consecutive_filename(filename): filebase, fileext = fileparts[0], None else: filebase, fileext = fileparts - for i in xrange(1, 9000): + for i in range(1, 9000): filetry = '%s__%02d' % (filebase, i) if fileext is not None: filetry = '%s.%s' % (filetry, fileext)