Hamuko · matoro · Feb 8, 2019 · Feb 9, 2019 · Feb 11, 2019 · Feb 13, 2019
diff --git a/cum/cum.py b/cum/cum.py
@@ -265,21 +265,21 @@ def get(input, directory):
     """
     chapter_list = []
     for item in input:
+        series = None
         try:
             series = utility.series_by_url(item)
         except exceptions.ScrapingError:
-            output.warning('Scraping error ({})'.format(item))
-            continue
+            pass
         except exceptions.LoginError as e:
             output.warning('{} ({})'.format(e.message, item))
             continue
         if series:
             chapter_list += series.chapters
+        chapter = None
         try:
             chapter = utility.chapter_by_url(item)
         except exceptions.ScrapingError:
-            output.warning('Scraping error ({})'.format(item))
-            continue
+            pass
         except exceptions.LoginError as e:
             output.warning('{} ({})'.format(e.message, item))
             continue

diff --git a/cum/db.py b/cum/db.py
@@ -242,7 +242,15 @@ def to_object(self):
         if parse.netloc == 'www.yuri-ism.net':
             from cum.scrapers.yuriism import YuriismChapter
             return YuriismChapter(**kwargs)
-
+        if parse.netloc == 'mangaseeonline.us':
+            from cum.scrapers.mangasee import MangaseeChapter
+            return MangaseeChapter(**kwargs)
+        if parse.netloc in ('www.mangahere.cc', 'm.mangahere.cc'):
+            from cum.scrapers.mangahere import MangahereChapter
+            return MangahereChapter(**kwargs)
+        if parse.netloc == 'manganelo.com':
+            from cum.scrapers.manganelo import ManganeloChapter
+            return ManganeloChapter(**kwargs)
 
 class Group(Base):
     __tablename__ = 'groups'

diff --git a/cum/scrapers/__init__.py b/cum/scrapers/__init__.py
@@ -2,19 +2,28 @@
 from cum.scrapers.dynastyscans import DynastyScansChapter, DynastyScansSeries
 from cum.scrapers.madokami import MadokamiChapter, MadokamiSeries
 from cum.scrapers.mangadex import MangadexSeries, MangadexChapter
+from cum.scrapers.manganelo import ManganeloSeries, ManganeloChapter
+from cum.scrapers.mangasee import MangaseeSeries, MangaseeChapter
+from cum.scrapers.mangahere import MangahereSeries, MangahereChapter
 from cum.scrapers.yuriism import YuriismChapter, YuriismSeries
 
 series_scrapers = [
     DokiReaderSeries,
     DynastyScansSeries,
     MadokamiSeries,
     MangadexSeries,
+    ManganeloSeries,
+    MangaseeSeries,
+    MangahereSeries,
     YuriismSeries,
 ]
 chapter_scrapers = [
     DokiReaderChapter,
     DynastyScansChapter,
     MadokamiChapter,
     MangadexChapter,
+    ManganeloChapter,
+    MangaseeChapter,
+    MangahereChapter,
     YuriismChapter,
 ]
diff --git a/cum/scrapers/base.py b/cum/scrapers/base.py
@@ -1,6 +1,6 @@
 from abc import ABCMeta, abstractmethod
 from concurrent.futures import ThreadPoolExecutor
-from cum import config, db, output
+from cum import config, db, exceptions, output
 from mimetypes import guess_extension
 from re import match, sub
 from sqlalchemy.exc import IntegrityError, SQLAlchemyError
@@ -195,6 +195,11 @@ def filename(self):
         elif match(r'[0-9]*\.[0-9]*$', self.chapter):
             number, decimal = self.chapter.split('.')
             chapter = 'c{:0>3} x{}'.format(number, decimal)
+        # Individually numbered chapter with double-decimal (e.g. '2.164.5').
+        # Used by titles with multiple volumes/seasons and special chapters.
+        elif match(r'[0-9]*(\.[0-9]*){2}$', self.chapter):
+            volume, number, decimal = self.chapter.split('.')
+            chapter = 'c{:0>3} x{:0>3}.{}'.format(volume, number, decimal)
         # Failing all else, e.g. 'Special'. Becomes 'c000 [Special]'.
         else:
             chapter = 'c000 [{}]'.format(self.chapter)
@@ -209,13 +214,20 @@ def filename(self):
         else:
             ext = 'zip'
 
+        directory_set = False
         if self.directory:
             directory = os.path.expanduser(self.directory)
+            directory_set = True
         else:
             directory = name
         download_dir = os.path.expanduser(config.get().download_directory)
         download_dir = os.path.join(download_dir, directory)
-        download_dir = self._strip_unwanted_characters(download_dir)
+        # only sanitize download_dir if the user did not explicitly set it
+        # assume that if it is set, the user wanted it exactly as set
+        # if they include bad characters and it breaks things, that's their
+        #  fault.
+        if not directory_set:
+            download_dir = self._strip_unwanted_characters(download_dir)
         download_dir = self.create_directory(download_dir)
 
         # Format the filename somewhat based on Daiz's manga naming scheme.
@@ -292,16 +304,45 @@ def page_download_finish(bar, files, fs):
         bar.update(1)
 
     @staticmethod
-    def page_download_task(page_num, r):
+    def page_download_task(page_num, r, page_url = None):
         """Saves the response body of a single request, returning the file
         handle and the passed through number of the page to allow for non-
         sequential downloads in parallel.
         """
         ext = BaseChapter.guess_extension(r.headers.get('content-type'))
         f = NamedTemporaryFile(suffix=ext, delete=False)
-        for chunk in r.iter_content(chunk_size=4096):
-            if chunk:
-                f.write(chunk)
+        retries = 20
+        while retries > 0:
+            try:
+                for chunk in r.iter_content(chunk_size=4096):
+                    if chunk:
+                        f.write(chunk)
+                retries = 0
+            # basically ignores this exception that requests throws.  my
+            # understanding is that it is raised when you attempt to iter_content()
+            # over the same content twice.  don't understand how that situation
+            # arises with the current code but it did somehow.
+            # https://stackoverflow.com/questions/45379903/
+            except requests.exceptions.StreamConsumedError:
+                pass
+            # when under heavy load, Mangadex will often kill the connection in
+            # the middle of an image download.  in the original architecture,
+            # the requests are all opened in the scrapers in stream mode, then
+            # the actual image payloads are downloaded in the asynchronous
+            # callbacks.  when this occurs we have not choice but to re-request
+            # the image from the beginning (easier than playing around with range
+            # headers).  this means each thread may issue multiple new requests.
+            # I have found the performance overhead to be mostly negligible.
+            except requests.exceptions.ChunkedEncodingError:
+                if not page_url:
+                    output.error("Connection killed on page {} but scraper does not support retries".format(str(page_num)))
+                    raise exceptions.ScrapingError
+                output.warning("Connection killed on page {}, {} retries remaining".format(str(page_num), str(retries)))
+                retries = retries - 1
+                if retries <= 0:
+                    output.error("Connection killed on page {}, no retries remaining - aborting chapter".format(str(page_num)))
+                    raise exceptions.ScrapingError
+                r = requests.get(page_url, stream = True)
         f.flush()
         f.close()
         r.close()
@@ -326,15 +367,19 @@ def progress_bar(self, arg):
 
     def save(self, series, ignore=False):
         """Save a chapter to database."""
+        # check if chapter already exists in database
         try:
-            c = db.Chapter(self, series)
-        except IntegrityError:
-            db.session.rollback()
-        else:
-            if ignore:
-                c.downloaded = -1
-            db.session.add(c)
+            c = db.session.query(db.Chapter).filter_by(url=self.url).one()
+        except NoResultFound:
             try:
-                db.session.commit()
+                c = db.Chapter(self, series)
             except IntegrityError:
                 db.session.rollback()
+            else:
+                if ignore:
+                    c.downloaded = -1
+                db.session.add(c)
+                try:
+                    db.session.commit()
+                except IntegrityError:
+                    db.session.rollback()
diff --git a/cum/scrapers/mangadex.py b/cum/scrapers/mangadex.py
@@ -124,13 +124,25 @@ def download(self):
                 if guess_type(page)[0]:
                     image = server + chapter_hash + '/' + page
                 else:
-                    print('Unkown image type for url {}'.format(page))
-                    raise ValueError
-                r = requests.get(image, stream=True)
+                    print('Unknown image type for url {}'.format(page))
+                    raise exceptions.ScrapingError
+                retries = 3
+                r = None
+                while retries > 0:
+                    try:
+                        r = requests.get(image, stream=True)
+                        break
+                    except requests.exceptions.ConnectionError:
+                        output.warning("Initial request for page {} failed, {} retries remaining".format(str(i), str(retries)))
+                        retries = retries - 1
+                if not r:
+                    output.error("Failed to request page {}".format(str(i)))
+                    raise exceptions.ScrapingError
                 if r.status_code == 404:
                     r.close()
-                    raise ValueError
-                fut = download_pool.submit(self.page_download_task, i, r)
+                    raise exceptions.ScrapingError
+                fut = download_pool.submit(self.page_download_task,
+                                              i, r, page_url = image)
                 fut.add_done_callback(partial(self.page_download_finish,
                                               bar, files))
                 futures.append(fut)