Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
*/*.jpg
*/*.png
*.egg-info
*.swp
*.bak
*.DS_Store
Expand All @@ -12,4 +14,5 @@
/*.webm
/gfycat
/build
/dist
/.pydevproject
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ wallpaper would be as follows:
And to run the same query but only get new images you don't already
have, run the following:

python redditdl.py wallpaper wallpaper --score 50 -update
python redditdl.py wallpaper wallpaper --score 50 --update

For getting some nice pictures of cats in your catsfolder (wich will be created if it
doesn't exist yet) run:
Expand Down
5 changes: 5 additions & 0 deletions redditdownload/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
from redditdownload import *


def running_python2():
from platform import python_version_tuple
return int(python_version_tuple()[0]) == 2
4 changes: 2 additions & 2 deletions redditdownload/gfycat.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __fetch(self, url, param):
headers = {'User-Agent': 'Mozilla/5.0'}
req = urllib2.Request(url+param, None, headers)
connection = urllib2.urlopen(req).read()
except urllib2.HTTPError, err:
except urllib2.HTTPError as err:
raise ValueError(err.read())
result = namedtuple("result", "raw json")
return result(raw=connection, json=json.loads(connection))
Expand Down Expand Up @@ -132,7 +132,7 @@ def download(self, location):
data = file.read()
with open(location, "wb") as mp4:
mp4.write(data)
except urllib2.HTTPError, err:
except urllib2.HTTPError as err:
raise ValueError(err.read())

def formated(self, ignoreNull=False):
Expand Down
22 changes: 12 additions & 10 deletions redditdownload/img_scrap_stuff.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@
import traceback

from PIL import Image
from cStringIO import StringIO
import lxml
import html5lib # Heavily recommended for bs4 (apparently)
import bs4
import requests
import magic # python-magic

import pyaux

from . import running_python2

if running_python2():
from cStringIO import StringIO
else:
from io import StringIO

# Config-ish
_requests_params = dict(timeout=20, verify=False) ## Also global-ish stuff
Expand Down Expand Up @@ -52,7 +55,7 @@ def indexall_re(topstr, substr_re):
def walker(text, opening='{', closing='}'):
""" A near-useless experiment that was intended for `get_all_objects` """
stack = []
for pos in xrange(len(text)):
for pos in range(len(text)):
if text[pos:pos + len(opening)] == opening:
stack.append(pos)
continue
Expand Down Expand Up @@ -88,7 +91,7 @@ def get_all_objects(text, beginning=r'{', debug=False):
"""

def _dbg_actual(st, *ar):
print "D: ", st % ar
print("D: ", st % ar)

_dbg = _dbg_actual if debug else (lambda *ar: None)

Expand All @@ -106,7 +109,6 @@ def __getitem__(self, key):
class TheLoader(yaml.SafeLoader):
ESCAPE_REPLACEMENTS = ddd(yaml.SafeLoader.ESCAPE_REPLACEMENTS)

from cStringIO import StringIO
# optimised slicing
if isinstance(text, unicode):
_dbg("encoding")
Expand Down Expand Up @@ -205,7 +207,7 @@ def get_get_get(url, **kwa):
params = dict(_requests_params)
params.update(kwa)
reqr = get_reqr()

try:
return reqr.get(url, **params)
except Exception as exc:
Expand All @@ -214,13 +216,13 @@ def get_get_get(url, **kwa):

def get_get(*ar, **kwa):
retries = kwa.pop('_xretries', 5)
for retry in xrange(retries):
for retry in range(retries):
try:
return get_get_get(*ar, **kwa)
except Exception as exc:
traceback.print_exc()
ee = exc
print "On retry #%r (%s)" % (retry, repr(exc)[:30])
print("On retry #%r (%s)" % (retry, repr(exc)[:30]))
raise GetError(ee)


Expand All @@ -244,7 +246,7 @@ def get(url, cache_file=None, req_params=None, bs=True, response=False, undecode
for chunk in resp.iter_content(chunk_size=16384):
data += chunk
if len(data) > _max_len:
print "Too large"
print("Too large")
break
data = bytes(data) ## Have to, alas.
data_bytes = data
Expand Down
19 changes: 13 additions & 6 deletions redditdownload/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@
"""Return list of items from a sub-reddit of reddit.com."""

import sys
import HTMLParser
from urllib2 import urlopen, Request, HTTPError
from json import JSONDecoder
from . import running_python2

if running_python2():
import HTMLParser
from urllib2 import urlopen, Request, HTTPError
else:
from html.parser import HTMLParser
from urllib.request import urlopen, Request
from urllib.error import HTTPError


def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
Expand Down Expand Up @@ -33,15 +40,15 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
if '/m/' not in subreddit:
warning = ('That doesn\'t look like a multireddit. Are you sure'
'you need that multireddit flag?')
print warning
print(warning)
sys.exit(1)
url = 'http://www.reddit.com/user/%s.json' % subreddit
if not multireddit:
if '/m/' in subreddit:
warning = ('It looks like you are trying to fetch a multireddit. \n'
'Check the multireddit flag. '
'Call --help for more info')
print warning
print(warning)
sys.exit(1)
# no sorting needed
if reddit_sort is None:
Expand Down Expand Up @@ -96,7 +103,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
try:
req = Request(url, headers=hdr)
json = urlopen(req).read()
data = JSONDecoder().decode(json)
data = JSONDecoder().decode(json.decode('utf-8'))
if isinstance(data, dict):
items = [x['data'] for x in data['data']['children']]
elif isinstance(data, list):
Expand All @@ -119,7 +126,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
# returns `url` values html-escaped, whereas we normally need them
# in the way they are meant to be downloaded (i.e. urlquoted at
# most).
htmlparser = HTMLParser.HTMLParser()
htmlparser = HTMLParser()
for item in items:
if item.get('url'):
item['url'] = htmlparser.unescape(item['url'])
Expand Down
31 changes: 20 additions & 11 deletions redditdownload/redditdownload.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
#!/usr/bin/env python2
"""Download images from a reddit.com subreddit."""

from __future__ import print_function

import os
import re
import StringIO
import sys
import logging
from urllib2 import urlopen, HTTPError, URLError
from httplib import InvalidURL
from argparse import ArgumentParser
from os.path import (
exists as pathexists, join as pathjoin, basename as pathbasename,
Expand All @@ -20,7 +15,17 @@
from .gfycat import gfycat
from .reddit import getitems
from .deviantart import process_deviant_url

from . import running_python2

if running_python2():
from cStringIO import StringIO
from urllib2 import urlopen, HTTPError, URLError
from httplib import InvalidURL
else:
from io import StringIO
import urllib
from urllib.request import urlopen, HTTPError, URLError
from http.client import InvalidURL

_log = logging.getLogger('redditdownload')

Expand All @@ -29,7 +34,7 @@ def request(url, *ar, **kwa):
_retries = kwa.pop('_retries', 4)
_retry_pause = kwa.pop('_retry_pause', 0)
res = None
for _try in xrange(_retries):
for _try in range(_retries):
try:
res = urlopen(url, *ar, **kwa)
except Exception as exc:
Expand Down Expand Up @@ -83,7 +88,7 @@ def extract_imgur_album_urls(album_url):
match = re.compile(r'\"hash\":\"(.[^\"]*)\",\"title\"')
items = []

memfile = StringIO.StringIO(filedata)
memfile = StringIO(filedata)

for line in memfile.readlines():
results = re.findall(match, line)
Expand Down Expand Up @@ -326,9 +331,13 @@ def main():
sort_type = sort_type.lower()

while not FINISHED:
ITEMS = getitems(
ARGS.reddit, multireddit=ARGS.multireddit, previd=LAST,
reddit_sort=sort_type)
try:
ITEMS = getitems(
ARGS.reddit, multireddit=ARGS.multireddit, previd=LAST,
reddit_sort=sort_type)
except urllib.error.URLError as e:
print(' Error trying to get items: {}'.format(str(e)))
continue

# measure time and set the program to wait 4 second between request
# as per reddit api guidelines
Expand Down
2 changes: 1 addition & 1 deletion redditdownload/scrap_wrongies.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def consecutive_filename(filename):
filebase, fileext = fileparts[0], None
else:
filebase, fileext = fileparts
for i in xrange(1, 9000):
for i in range(1, 9000):
filetry = '%s__%02d' % (filebase, i)
if fileext is not None:
filetry = '%s.%s' % (filetry, fileext)
Expand Down