diff --git a/cum/cum.py b/cum/cum.py index 689c205..041a961 100755 --- a/cum/cum.py +++ b/cum/cum.py @@ -265,21 +265,21 @@ def get(input, directory): """ chapter_list = [] for item in input: + series = None try: series = utility.series_by_url(item) except exceptions.ScrapingError: - output.warning('Scraping error ({})'.format(item)) - continue + pass except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, item)) continue if series: chapter_list += series.chapters + chapter = None try: chapter = utility.chapter_by_url(item) except exceptions.ScrapingError: - output.warning('Scraping error ({})'.format(item)) - continue + pass except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, item)) continue diff --git a/cum/db.py b/cum/db.py index 8f05241..9b1bf66 100644 --- a/cum/db.py +++ b/cum/db.py @@ -242,7 +242,15 @@ def to_object(self): if parse.netloc == 'www.yuri-ism.net': from cum.scrapers.yuriism import YuriismChapter return YuriismChapter(**kwargs) - + if parse.netloc == 'mangaseeonline.us': + from cum.scrapers.mangasee import MangaseeChapter + return MangaseeChapter(**kwargs) + if parse.netloc in ('www.mangahere.cc', 'm.mangahere.cc'): + from cum.scrapers.mangahere import MangahereChapter + return MangahereChapter(**kwargs) + if parse.netloc == 'manganelo.com': + from cum.scrapers.manganelo import ManganeloChapter + return ManganeloChapter(**kwargs) class Group(Base): __tablename__ = 'groups' diff --git a/cum/scrapers/__init__.py b/cum/scrapers/__init__.py index cb05f04..4823bf9 100644 --- a/cum/scrapers/__init__.py +++ b/cum/scrapers/__init__.py @@ -2,6 +2,9 @@ from cum.scrapers.dynastyscans import DynastyScansChapter, DynastyScansSeries from cum.scrapers.madokami import MadokamiChapter, MadokamiSeries from cum.scrapers.mangadex import MangadexSeries, MangadexChapter +from cum.scrapers.manganelo import ManganeloSeries, ManganeloChapter +from cum.scrapers.mangasee import MangaseeSeries, MangaseeChapter +from cum.scrapers.mangahere import MangahereSeries, MangahereChapter from cum.scrapers.yuriism import YuriismChapter, YuriismSeries series_scrapers = [ @@ -9,6 +12,9 @@ DynastyScansSeries, MadokamiSeries, MangadexSeries, + ManganeloSeries, + MangaseeSeries, + MangahereSeries, YuriismSeries, ] chapter_scrapers = [ @@ -16,5 +22,8 @@ DynastyScansChapter, MadokamiChapter, MangadexChapter, + ManganeloChapter, + MangaseeChapter, + MangahereChapter, YuriismChapter, ] diff --git a/cum/scrapers/base.py b/cum/scrapers/base.py index b074658..b7bc448 100644 --- a/cum/scrapers/base.py +++ b/cum/scrapers/base.py @@ -1,6 +1,6 @@ from abc import ABCMeta, abstractmethod from concurrent.futures import ThreadPoolExecutor -from cum import config, db, output +from cum import config, db, exceptions, output from mimetypes import guess_extension from re import match, sub from sqlalchemy.exc import IntegrityError, SQLAlchemyError @@ -195,6 +195,11 @@ def filename(self): elif match(r'[0-9]*\.[0-9]*$', self.chapter): number, decimal = self.chapter.split('.') chapter = 'c{:0>3} x{}'.format(number, decimal) + # Individually numbered chapter with double-decimal (e.g. '2.164.5'). + # Used by titles with multiple volumes/seasons and special chapters. + elif match(r'[0-9]*(\.[0-9]*){2}$', self.chapter): + volume, number, decimal = self.chapter.split('.') + chapter = 'c{:0>3} x{:0>3}.{}'.format(volume, number, decimal) # Failing all else, e.g. 'Special'. Becomes 'c000 [Special]'. else: chapter = 'c000 [{}]'.format(self.chapter) @@ -209,13 +214,20 @@ def filename(self): else: ext = 'zip' + directory_set = False if self.directory: directory = os.path.expanduser(self.directory) + directory_set = True else: directory = name download_dir = os.path.expanduser(config.get().download_directory) download_dir = os.path.join(download_dir, directory) - download_dir = self._strip_unwanted_characters(download_dir) + # only sanitize download_dir if the user did not explicitly set it + # assume that if it is set, the user wanted it exactly as set + # if they include bad characters and it breaks things, that's their + # fault. + if not directory_set: + download_dir = self._strip_unwanted_characters(download_dir) download_dir = self.create_directory(download_dir) # Format the filename somewhat based on Daiz's manga naming scheme. @@ -292,16 +304,45 @@ def page_download_finish(bar, files, fs): bar.update(1) @staticmethod - def page_download_task(page_num, r): + def page_download_task(page_num, r, page_url = None): """Saves the response body of a single request, returning the file handle and the passed through number of the page to allow for non- sequential downloads in parallel. """ ext = BaseChapter.guess_extension(r.headers.get('content-type')) f = NamedTemporaryFile(suffix=ext, delete=False) - for chunk in r.iter_content(chunk_size=4096): - if chunk: - f.write(chunk) + retries = 20 + while retries > 0: + try: + for chunk in r.iter_content(chunk_size=4096): + if chunk: + f.write(chunk) + retries = 0 + # basically ignores this exception that requests throws. my + # understanding is that it is raised when you attempt to iter_content() + # over the same content twice. don't understand how that situation + # arises with the current code but it did somehow. + # https://stackoverflow.com/questions/45379903/ + except requests.exceptions.StreamConsumedError: + pass + # when under heavy load, Mangadex will often kill the connection in + # the middle of an image download. in the original architecture, + # the requests are all opened in the scrapers in stream mode, then + # the actual image payloads are downloaded in the asynchronous + # callbacks. when this occurs we have not choice but to re-request + # the image from the beginning (easier than playing around with range + # headers). this means each thread may issue multiple new requests. + # I have found the performance overhead to be mostly negligible. + except requests.exceptions.ChunkedEncodingError: + if not page_url: + output.error("Connection killed on page {} but scraper does not support retries".format(str(page_num))) + raise exceptions.ScrapingError + output.warning("Connection killed on page {}, {} retries remaining".format(str(page_num), str(retries))) + retries = retries - 1 + if retries <= 0: + output.error("Connection killed on page {}, no retries remaining - aborting chapter".format(str(page_num))) + raise exceptions.ScrapingError + r = requests.get(page_url, stream = True) f.flush() f.close() r.close() @@ -326,15 +367,19 @@ def progress_bar(self, arg): def save(self, series, ignore=False): """Save a chapter to database.""" + # check if chapter already exists in database try: - c = db.Chapter(self, series) - except IntegrityError: - db.session.rollback() - else: - if ignore: - c.downloaded = -1 - db.session.add(c) + c = db.session.query(db.Chapter).filter_by(url=self.url).one() + except NoResultFound: try: - db.session.commit() + c = db.Chapter(self, series) except IntegrityError: db.session.rollback() + else: + if ignore: + c.downloaded = -1 + db.session.add(c) + try: + db.session.commit() + except IntegrityError: + db.session.rollback() diff --git a/cum/scrapers/mangadex.py b/cum/scrapers/mangadex.py index 1d7c0c7..0bdbc18 100644 --- a/cum/scrapers/mangadex.py +++ b/cum/scrapers/mangadex.py @@ -124,13 +124,25 @@ def download(self): if guess_type(page)[0]: image = server + chapter_hash + '/' + page else: - print('Unkown image type for url {}'.format(page)) - raise ValueError - r = requests.get(image, stream=True) + print('Unknown image type for url {}'.format(page)) + raise exceptions.ScrapingError + retries = 3 + r = None + while retries > 0: + try: + r = requests.get(image, stream=True) + break + except requests.exceptions.ConnectionError: + output.warning("Initial request for page {} failed, {} retries remaining".format(str(i), str(retries))) + retries = retries - 1 + if not r: + output.error("Failed to request page {}".format(str(i))) + raise exceptions.ScrapingError if r.status_code == 404: r.close() - raise ValueError - fut = download_pool.submit(self.page_download_task, i, r) + raise exceptions.ScrapingError + fut = download_pool.submit(self.page_download_task, + i, r, page_url = image) fut.add_done_callback(partial(self.page_download_finish, bar, files)) futures.append(fut) diff --git a/cum/scrapers/mangahere.py b/cum/scrapers/mangahere.py new file mode 100644 index 0000000..07e0073 --- /dev/null +++ b/cum/scrapers/mangahere.py @@ -0,0 +1,234 @@ +from bs4 import BeautifulSoup +from cum import config, exceptions, output +from cum.scrapers.base import BaseChapter, BaseSeries, download_pool +from functools import partial +from jsbeautifier import beautify +from json import loads +import concurrent.futures +import re +import requests + +# as of 2020/04/04, the old mobile interface which allowed easy scraping +# has been removed, and mobile now copies desktop which is protected +# by Cloudflare's Bot Management +# https://www.cloudflare.com/products/bot-management/ +# in my personal testing, the following heuristic headers do reliably bypass +# it, at least to the extent necessary to hit their new progressive page load system +# however, because it is a heuristic-based system, there is no guarantee +# that just because it works from one machine and/or network location +# that it will work for others. +# feedback is appreciated. +chrome_headers = { + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + "accept-language": "en-US,en;q=0.9", + "upgrade-insecure-requests": "1", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36" + } + +class MangahereSeries(BaseSeries): + url_re = re.compile(r'https?://((www|m)\.)?mangahere\.cc/manga/.+') + + def __init__(self, url, **kwargs): + super().__init__(url, **kwargs) + # convert desktop link to mobile + # bypasses adult content warning js + spage = requests.get(url.replace("m.", "www."), cookies = { "isAdult": "1" }) + if spage.status_code == 404: + raise exceptions.ScrapingError + self.soup = BeautifulSoup(spage.text, config.get().html_parser) + self.chapters = self.get_chapters() + + def get_chapters(self): + try: + # broken 2020/04/04 + # rows = self.soup.find("div", class_="manga-chapters")\ + # .find("ul").find_all("li") + rows = self.soup.find("ul", class_="detail-main-list")\ + .find_all("a") + except AttributeError: + raise exceptions.ScrapingError() + chapters = [] + for i, row in enumerate(rows): + chap_num = re.match((r"/manga/[^/]+(?:(?:/v[0-9]+)?" + r"/c([0-9\.]+))/[0-9]+\.html$"), + row.get("href")).groups()[0]\ + .replace("/", "") + if "v" in chap_num: + chap_num = chap_num.replace("v", "").replace("c", ".") + else: + chap_num = chap_num.replace("c", "") + if chap_num == "000": + chap_num = "0" + else: + chap_num = chap_num.lstrip("0") + # convert mobile link to desktop + chap_url = "https://www.mangahere.cc" + row.get("href")\ + .replace("/roll_manga/", "/manga/") + chap_name = row.find("p").text + result = MangahereChapter(name=self.name, + alias=self.alias, + chapter=chap_num, + url=chap_url, + title=chap_name, + groups=[]) + chapters.append(result) + return chapters + + @property + def name(self): + try: + # so I'm not sure if this is an anti-scraping measure or not, but + # sometimes the name of the series returned in the raw page text + # has a space as the first character. my measurements put it + # occurring ~30% of the time. if that's the case, then we need + # to replace the first letter with the capitalized first letter + # of the series name from the url + tentative_name = re.match(r"(.+) Manga - Read .+ Online at MangaHere", + self.soup.find("title").text).groups()[0] + if tentative_name.startswith(" "): + first_letter = self.url.replace("m.", "www.")[31].upper() + tentative_name[0] = first_letter + return tentative_name + except AttributeError: + raise exceptions.ScrapingError + + +class MangahereChapter(BaseChapter): + url_re = re.compile((r'https?://((www|m)\.)?mangahere\.cc' + r'/(roll_)?manga/[^/]+(/v[0-9]+)?/c[0-9\.]+/[0-9]+\.html$')) + upload_date = None + uses_pages = True + + def _request_pages(self, mid, cid, pages): + base_url = re.search(r"(.+/)[0-9]\.html", self.url.replace("m.", "www.")).groups()[0] + data_url = base_url + "chapterfun.ashx?cid=" + str(cid) + "&page=" + str(len(pages) + 1) + "&key=" + chrome_headers["accept"] = "*/*" + chrome_headers["referer"] = self.url.replace("m.", "www.") + chrome_headers["x-requested-with"] = "XMLHttpRequest" + data = self.session.get(data_url, headers = chrome_headers) + if data.text == "": + raise cum.exceptions.ScrapingError + try: + data_clean = beautify(data.text) + if not getattr(self, "pvalue", None): + self.pvalue = "https:" + re.search(r"pvalue\[i\] = \"(.+)\" \+ pvalue\[i\];", data_clean).groups()[0] + # formatted_chap_num = re.search(r".+/c([0-9\.]+)/[0-9]\.html", self.url).groups()[0] + # if "." not in formatted_chap_num: + # formatted_chap_num += ".0" + for page in loads(re.search("var pvalue = (.+);", data_clean).groups()[0]): + full_page = self.pvalue + page + if full_page not in pages: + pages.append(full_page) + except Exception: + raise exceptions.ScrapingError + return pages + + def download(self): + + self.session = requests.Session() + + if not getattr(self, "cpage", None): + self.cpage = self.session.get(self.url.replace("m.", "www."), headers = chrome_headers) + if self.cpage.status_code == 404: + raise exceptions.ScrapingError + + if not getattr(self, "soup", None): + self.soup = BeautifulSoup(self.cpage.text, + config.get().html_parser) + + # broken 2020/04/04 + # image_list = self.soup.find("div", class_="mangaread-img")\ + # .find_all("img") + # pages = [] + # for image in image_list: + # pages.append(image["data-original"].replace("http://", "https://")) + + pages = [] + (mid, cid) = (None, None) + # index of script with ids may vary + # it may also change as ads are added/removed from the site + for f in range(0, len(self.soup.find_all("script"))): + try: + if len(self.soup.find_all("script")[f].contents): + mid = re.search("var comicid = ([0-9]+)", self.soup.find_all("script")[f].contents[0]).groups()[0] + cid = re.search("var chapterid =([0-9]+)", self.soup.find_all("script")[f].contents[0]).groups()[0] + except AttributeError: + pass + if mid and cid: + old_num_pages = -1 + while old_num_pages != len(pages): + old_num_pages = len(pages) + pages = self._request_pages(mid, cid, pages) + else: + # some titles (seems to be ones with low page counts like webtoons) + # don't use progressively-loaded pages. for these, the image list + # can be extracted directly off the main page + for g in range(0, len(self.soup.find_all("script"))): + try: + pages = loads(re.search("var newImgs = (.+);var newImginfos", beautify(self.soup.find_all("script")[g].text).replace("\\", "").replace("'", "\"")).groups()[0]) + except AttributeError: + pass + if not len(pages): + raise exceptions.ScrapingError + for i, page in enumerate(pages): + pages[i] = "https:" + page + + futures = [] + files = [None] * len(pages) + with self.progress_bar(pages) as bar: + for i, page in enumerate(pages): + retries = 0 + while retries < 10: + try: + r = self.session.get(page, stream=True) + break + except requests.exceptions.ConnectionError: + retries += 1 + # end of chapter detection in the web ui is done by issuing requests + # for nonexistent pages which return 404s (who comes up with this) + if r.status_code != 404: + if r.status_code != 200: + r.close() + output.error("Page download got status code {}".format(str(r.status_code))) + raise exceptions.ScrapingError + fut = download_pool.submit(self.page_download_task, i, r) + fut.add_done_callback(partial(self.page_download_finish, + bar, files)) + futures.append(fut) + else: + try: + del files[i] + except IndexError: + self.session.close() + raise exceptions.ScrapingError + concurrent.futures.wait(futures) + self.create_zip(files) + self.session.close() + + def from_url(url): + chap_num = re.match((r"https?://(?:(?:www|m)\.)?mangahere\.cc/(?:roll_)?" + r"manga/[^/]+(?:(?:/v[0-9]+)?/c([0-9\.]+))" + r"/[0-9]+\.html"), url)\ + .groups()[0] + if "v" in chap_num: + chap_num = chap_num.replace("v", "").replace("c", ".") + else: + chap_num = chap_num.replace("c", "") + if chap_num == "000": + chap_num = "0" + else: + chap_num = chap_num.lstrip("0") + parent_url = re.match((r"(https?://((www|m)\.)?mangahere\.cc/(roll_)?" + r"manga/[^/]+)(/v[0-9]+)?/" + r"c[0-9\.]+/[0-9]+\.html"), + url).groups()[0] + series = MangahereSeries(parent_url) + for chapter in series.chapters: + if chapter.chapter == str(chap_num): + return chapter + return None + + def available(self): + if not getattr(self, "cpage", None): + self.cpage = requests.get(self.url.replace("m.", "www.")) + return self.cpage.status_code == 200 diff --git a/cum/scrapers/manganelo.py b/cum/scrapers/manganelo.py new file mode 100644 index 0000000..35f7ac9 --- /dev/null +++ b/cum/scrapers/manganelo.py @@ -0,0 +1,124 @@ +from bs4 import BeautifulSoup +from cum import config, exceptions, output +from cum.scrapers.base import BaseChapter, BaseSeries, download_pool +from functools import partial +from warnings import filterwarnings +import concurrent.futures +import json +import re +import requests + + +class ManganeloSeries(BaseSeries): + url_re = re.compile(r'https?://manganelo\.com/manga/.+') + + def __init__(self, url, **kwargs): + super().__init__(url, **kwargs) + filterwarnings(action = "ignore", message = "unclosed", category = ResourceWarning) + spage = requests.get(url) + if spage.status_code == 404: + raise exceptions.ScrapingError + self.soup = BeautifulSoup(spage.text, config.get().html_parser) + # 404 pages actually return HTTP 200 + if self.soup.find("title").text == "404 Not Found": + raise exceptions.ScrapingError + self.chapters = self.get_chapters() + + def get_chapters(self): + try: + rows = self.soup.find_all("li", class_="a-h") + except AttributeError: + raise exceptions.ScrapingError() + chapters = [] + for i, row in enumerate(rows): + chap_num = re.match(r"https?://manganelo\.com/chapter/.+/?chapter_([0-9\.]+)", + row.find("a")["href"]).groups()[0] + chap_url = row.find("a")["href"] + chap_name = row.find("a")["title"] + chap_date = row.find_all("span")[1]["title"] + result = ManganeloChapter(name=self.name, + alias=self.alias, + chapter=chap_num, + url=chap_url, + title=chap_name, + groups=[], + upload_date=chap_date) + chapters.append(result) + return chapters + + @property + def name(self): + try: + return re.match(r"(.+) Manga Online Free - Manganelo", + self.soup.find("title").text).groups()[0] + except AttributeError: + raise exceptions.ScrapingError + + +class ManganeloChapter(BaseChapter): + url_re = re.compile((r'https?://manganelo\.com/' + r'chapter/.+/chapter_[0-9\.]')) + upload_date = None + uses_pages = True + + # 404 pages actually return HTTP 200 + # thus this method override + def available(self): + if not getattr(self, "cpage", None): + self.cpage = requests.get(self.url) + if not getattr(self, "soup", None): + self.soup = BeautifulSoup(self.cpage.text, + config.get().html_parser) + return self.soup.find("title").text != "404 Not Found" + + def download(self): + if not getattr(self, "cpage", None): + self.cpage = requests.get(self.url) + if not getattr(self, "soup", None): + self.soup = BeautifulSoup(self.cpage.text, + config.get().html_parser) + + # 404 pages actually return HTTP 200 + if self.soup.find("title").text == "404 Not Found": + raise exceptions.ScrapingError + pages = [ image["src"] for image in self.soup.find("div", class_ = "container-chapter-reader").find_all("img") ] + + futures = [] + files = [None] * len(pages) + req_session = requests.Session() + with self.progress_bar(pages) as bar: + for i, page in enumerate(pages): + retries = 0 + while retries < 10: + try: + r = req_session.get(page, stream=True) + if r.status_code != 200: + output.warning('Failed to fetch page with status {}, retrying #{}' + .format(str(r.status_code), str(retries))) + retries += 1 + else: + break + except requests.exceptions.ConnectionError: + retries += 1 + if r.status_code != 200: + output.error('Failed to fetch page with status {}, giving up' + .format(str(r.status_code))) + raise ValueError + fut = download_pool.submit(self.page_download_task, i, r) + fut.add_done_callback(partial(self.page_download_finish, + bar, files)) + futures.append(fut) + concurrent.futures.wait(futures) + self.create_zip(files) + req_session.close() + + def from_url(url): + cpage = requests.get(url) + soup = BeautifulSoup(cpage.text, config.get().html_parser) + iname = re.match("https?://manganelo\.com/chapter/(.+)/chapter_[0-9\.]+", + url).groups()[0] + series = ManganeloSeries("https://manganelo.com/manga/" + iname) + for chapter in series.chapters: + if chapter.url == url: + return chapter + return None diff --git a/cum/scrapers/mangasee.py b/cum/scrapers/mangasee.py new file mode 100644 index 0000000..60e2d73 --- /dev/null +++ b/cum/scrapers/mangasee.py @@ -0,0 +1,148 @@ +from bs4 import BeautifulSoup +from cum import config, exceptions, output +from cum.scrapers.base import BaseChapter, BaseSeries, download_pool +from functools import partial +import concurrent.futures +import json +import re +import requests + + +class MangaseeSeries(BaseSeries): + url_re = re.compile(r'https?://mangaseeonline\.us/manga/.+') + multi_season_regex = re.compile((r"(https?://mangaseeonline\.us)" + r"?/read-online/" + r".+-chapter-[0-9\.]+-index-" + r"([0-9]+)-page-[0-9]+\.html")) + + def __init__(self, url, **kwargs): + super().__init__(url, **kwargs) + spage = requests.get(url) + if spage.status_code == 404: + raise exceptions.ScrapingError + self.soup = BeautifulSoup(spage.text, config.get().html_parser) + self.chapters = self.get_chapters() + + def _get_chapnum_multiseason_series(self, url, chap_num): + if not re.match(self.multi_season_regex, url): + # chapter is from season 1 + return "01." + chap_num.zfill(3) + else: + # chapter is from season >1 + season = re.match(self.multi_season_regex, url).groups()[1] + return season.zfill(2) + "." + chap_num.zfill(3) + + def get_chapters(self): + try: + rows = self.soup.find_all("a", class_="list-group-item") + except AttributeError: + raise exceptions.ScrapingError() + chapters = [] + for i, row in enumerate(rows): + chap_num = re.match(r"Read .+ Chapter ([0-9\.]+) For Free Online", + row["title"]).groups()[0] + if not hasattr(self, "is_multi_season"): + if re.match(self.multi_season_regex, row["href"]): + self.is_multi_season = True + chap_url = "https://mangaseeonline.us" + row["href"] + chap_name = row.find("span").text + chap_date = row.find("time").text + result = MangaseeChapter(name=self.name, + alias=self.alias, + chapter=chap_num, + url=chap_url, + title=chap_name, + groups=[], + upload_date=chap_date) + chapters.append(result) + # the chapters in the first season of a multi-season title + # are indistinguishable from a non-multi-season title. thus + # we must retroactively reanalyze all chapters and adjust + # chapter numbers if *any* are multi-season + if hasattr(self, "is_multi_season"): + for chapter in chapters: + chapter.chapter = self.\ + _get_chapnum_multiseason_series(chapter.url, + chapter.chapter) + + return chapters + + @property + def name(self): + try: + return re.match(r"Read (.+) Man[a-z]+ For Free \| MangaSee", + self.soup.find("title").text).groups()[0] + except AttributeError: + raise exceptions.ScrapingError + + +class MangaseeChapter(BaseChapter): + url_re = re.compile((r'https?://mangaseeonline\.us/' + r'read-online/.+-chapter-[0-9\.]+-page-[0-9]+\.html')) + upload_date = None + uses_pages = True + + def download(self): + if not getattr(self, "cpage", None): + self.cpage = requests.get(self.url) + if not getattr(self, "soup", None): + self.soup = BeautifulSoup(self.cpage.text, + config.get().html_parser) + + for script in self.soup.find_all("script"): + if len(script.contents) and re.match("\n\tChapterArr=.+", script.contents[0]): + image_list = script.contents[0] + continue + + image_list = re.sub("\n\tChapterArr=", "", image_list) + image_list = re.sub(";\n\t?", "", image_list) + image_list = re.sub("PageArr=", ",", image_list) + image_list = "[" + image_list + "]" + image_list = json.loads(image_list)[1] + pages = [] + for image in image_list: + if image != "CurPage": + if re.match(".+blogspot.+", image_list[image]): + image_list[image] = image_list[image].\ + replace("http://", "https://") + pages.append(image_list[image]) + + futures = [] + files = [None] * len(pages) + req_session = requests.Session() + with self.progress_bar(pages) as bar: + for i, page in enumerate(pages): + retries = 0 + while retries < 10: + try: + r = req_session.get(page, stream=True) + if r.status_code != 200: + output.warning('Failed to fetch page with status {}, retrying #{}' + .format(str(r.status_code), str(retries))) + retries += 1 + else: + break + except requests.exceptions.ConnectionError: + retries += 1 + if r.status_code != 200: + output.error('Failed to fetch page with status {}, giving up' + .format(str(r.status_code))) + raise exceptions.ScrapingError + fut = download_pool.submit(self.page_download_task, i, r) + fut.add_done_callback(partial(self.page_download_finish, + bar, files)) + futures.append(fut) + concurrent.futures.wait(futures) + self.create_zip(files) + req_session.close() + + def from_url(url): + cpage = requests.get(url) + soup = BeautifulSoup(cpage.text, config.get().html_parser) + # chap_num = soup.find("span", class_="CurChapter").text + iname = soup.find("a", class_="list-link")["href"] + series = MangaseeSeries("https://mangaseeonline.us" + iname) + for chapter in series.chapters: + if chapter.url == url: + return chapter + return None diff --git a/setup.py b/setup.py index f254766..9f4cbfb 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,8 @@ def write_version_file(): 'Click', 'natsort', 'requests', - 'SQLAlchemy' + 'SQLAlchemy', + 'jsbeautifier' ], extras_require={ 'testing': ['codecov', 'cov-core', 'nose2', 'pycodestyle'] diff --git a/tests/test_scraper_mangahere.py b/tests/test_scraper_mangahere.py new file mode 100644 index 0000000..cba8dfd --- /dev/null +++ b/tests/test_scraper_mangahere.py @@ -0,0 +1,221 @@ +from bs4 import BeautifulSoup +from cum import config, exceptions +from nose.tools import nottest +from urllib.parse import urljoin +import cumtest +import os +import requests +import unittest +import zipfile + + +class TestMangahere(cumtest.CumTest): + MANGAHERE_URL = 'https://www.mangahere.cc/' + + def setUp(self): + super().setUp() + global mangahere + from cum.scrapers import mangahere + + def tearDown(self): + self.directory.cleanup() + + def get_five_latest_releases(self): + r = requests.get(self.MANGAHERE_URL) + soup = BeautifulSoup(r.text, config.get().html_parser) + chapters = soup.find("ul", class_="manga-list-1-list").find_all("li") + links = [urljoin(self.MANGAHERE_URL, + x.find("p", class_="manga-list-1-item-subtitle") + .find("a").get("href")) for x in chapters] + return links[:5] + + @nottest + def series_information_tester(self, data): + series = mangahere.MangahereSeries(data['url']) + self.assertEqual(series.name, data['name']) + self.assertEqual(series.alias, data['alias']) + self.assertEqual(series.url, data['url']) + self.assertIs(series.directory, None) + self.assertEqual(len(series.chapters), len(data['chapters'])) + for chapter in series.chapters: + self.assertEqual(chapter.name, data['name']) + self.assertEqual(chapter.alias, data['alias']) + self.assertIn(chapter.chapter, data['chapters']) + data['chapters'].remove(chapter.chapter) + self.assertIs(chapter.directory, None) + self.assertEqual(len(data['chapters']), 0) + + # This test is disabled because I have discovered (via this test) + # that for some series, the mobile links for chapters return 404s, + # even the links on the actual mobile index page, making those + # chapters unavailable via mobile. Until I can get around to + # reverse-engineering the obfuscation on the desktop site, + # some series may not be able to be downloaded/followed. + @nottest + def test_chapter_download_latest(self): + latest_releases = self.get_five_latest_releases() + for release in latest_releases: + try: + chapter = mangahere.MangahereChapter.from_url(release) + except exceptions.ScrapingError as e: + print('scraping error for {} - {}'.format(release, e)) + continue + else: + chapter.get(use_db=False) + + def test_chapter_filename_decimal(self): + URL = "https://www.mangahere.cc/manga/citrus_saburouta/" + URL = "https://www.mangahere.cc/manga/citrus_saburouta/" + \ + "c020.5/1.html" + chapter = mangahere.MangahereChapter.from_url(URL) + path = os.path.join(self.directory.name, 'Citrus Saburouta', + 'Citrus Saburouta - c020 x5 [Unknown].zip') + self.assertEqual(chapter.chapter, '20.5') + self.assertEqual(chapter.filename, path) + + def test_chapter_information_normal(self): + URL = "https://www.mangahere.cc/manga/" + \ + "ramen_daisuki_koizumi_san/c018/1.html" + chapter = mangahere.MangahereChapter.from_url(URL) + self.assertEqual(chapter.alias, 'ramen-daisuki-koizumi-san') + self.assertTrue(chapter.available()) + self.assertEqual(chapter.chapter, '18') + self.assertEqual(chapter.name, 'Ramen Daisuki Koizumi san') + self.assertEqual(chapter.title, 'Ch.018 - Eighteenth Bowl: Strange-flavored Ramen') + path = os.path.join(self.directory.name, + 'Ramen Daisuki Koizumi san', + 'Ramen Daisuki Koizumi san - c018 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 9) + + def test_chapter_information_multidigit(self): + URL = "https://www.mangahere.cc/manga/" + \ + "tsurezure_children/c192/1.html" + chapter = mangahere.MangahereChapter.from_url(URL) + self.assertEqual(chapter.alias, 'tsurezure-children') + self.assertTrue(chapter.available()) + self.assertEqual(chapter.chapter, '192') + self.assertEqual(chapter.name, 'Tsurezure Children') + self.assertEqual(chapter.title, 'Ch.192 - There\'s Nothing Tying Us Together (Shibasaki/Ubukata)') + path = os.path.join(self.directory.name, + 'Tsurezure Children', + 'Tsurezure Children - c192 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 7) + + def test_chapter_information_chapterzero(self): + URL = "https://www.mangahere.cc/manga/" + \ + "inu_to_hasami_wa_tsukaiyou/c000/1.html" + chapter = mangahere.MangahereChapter.from_url(URL) + self.assertEqual(chapter.alias, 'inu-to-hasami-wa-tsukaiyou') + self.assertEqual(chapter.chapter, '0') + self.assertEqual(chapter.name, 'Inu to Hasami wa Tsukaiyou') + self.assertEqual(chapter.title, 'Ch.000') + path = os.path.join( + self.directory.name, 'Inu to Hasami wa Tsukaiyou', + 'Inu to Hasami wa Tsukaiyou - c000 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 33) + + def test_chapter_information_volume(self): + URL = "https://www.mangahere.cc/manga/" + \ + "full_metal_alchemist/v026/c107/1.html" + chapter = mangahere.MangahereChapter.from_url(URL) + self.assertEqual(chapter.alias, 'full-metal-alchemist') + self.assertEqual(chapter.chapter, '107') + self.assertEqual(chapter.name, 'Full Metal Alchemist') + self.assertEqual(chapter.title, 'Vol.026 Ch.107 - The Final Battle') + path = os.path.join( + self.directory.name, 'Full Metal Alchemist', + 'Full Metal Alchemist - c107 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 70) + + def test_chapter_information_volume_decimal(self): + URL = "https://www.mangahere.cc/manga/" + \ + "ai_yori_aoshi/v16/c133.5/1.html" + chapter = mangahere.MangahereChapter.from_url(URL) + self.assertEqual(chapter.alias, 'ai-yori-aoshi') + self.assertEqual(chapter.chapter, '133.5') + self.assertEqual(chapter.name, 'Ai Yori Aoshi') + self.assertEqual(chapter.title, 'Vol.16 Ch.133.5 - Special Chapter - Hanakotoba - Language of Flower') + path = os.path.join( + self.directory.name, 'Ai Yori Aoshi', + 'Ai Yori Aoshi - c133 x5 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 15) + + def test_series_invalid(self): + URL = "https://www.mangahere.cc/manga/not_a_manga" + with self.assertRaises(exceptions.ScrapingError): + series = mangahere.MangahereSeries(url=URL) + + def test_chapter_unavailable_badvolume(self): + URL = "https://www.mangahere.cc/manga/oyasumi_punpun/v99/c147/1.html" + chapter = mangahere.MangahereChapter(url=URL) + self.assertFalse(chapter.available()) + + def test_chapter_unavailable_badchapter(self): + URL = "https://www.mangahere.cc/manga/oyasumi_punpun/v09/c999/1.html" + chapter = mangahere.MangahereChapter(url=URL) + self.assertFalse(chapter.available()) + + def test_chapter_unavailable_flatchapters(self): + URL = "https://www.mangahere.cc/manga/nikoniko_x_punpun/c999/1.html" + chapter = mangahere.MangahereChapter(url=URL) + self.assertFalse(chapter.available()) + + def test_series_flatchapters(self): + data = {'alias': 'aria', + 'chapters': ['1', '2', '3', '4', '5', '6', '7', '8', + '9', '10', '10.5', '11', '12', '13', '14', '15', + '16', '17', '18', '19', '20', '21', '22', '23', + '24', '25', '26', '27', '28', '29', '30', '30.5', + '31', '32', '33', '34', '35', '35.5', '36', + '37', '37.5', '38', '39', '40', '41', '42', '43', + '44', '45', '45.5', '46', '47', '48', '49', + '50', '50.5', '51', '52', '53', '54', '55', '56', + '57', '57.5', '58', '59', '60', '60.1'], + 'name': 'Aria', + 'url': 'https://www.mangahere.cc/manga/aria'} + self.series_information_tester(data) + + def test_series_volumes(self): + data = {'alias': 'prunus-girl', + 'chapters': ['1', '1.5', '2', '3', '4', + '5', '5.5', '6', '7', '8', + '9', '10', '11', '11.5', '12', + '13', '14', '15', '14', '15', + '16', '17', '18', '19', '20', + '21', '22', '23', '24', '25', + '26', '27', '28', '29', '30', + '31', '32', '32.5', '33', '34', + '35', '36', '37', '38', '39', + '40', '41', '42', '42.5'], + 'name': 'Prunus Girl', + 'url': 'https://www.mangahere.cc/manga/prunus_girl'} + self.series_information_tester(data) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_scraper_manganelo.py b/tests/test_scraper_manganelo.py new file mode 100644 index 0000000..f28a69c --- /dev/null +++ b/tests/test_scraper_manganelo.py @@ -0,0 +1,149 @@ +from bs4 import BeautifulSoup +from cum import config, exceptions +from nose.tools import nottest +from urllib.parse import urljoin +from warnings import filterwarnings +import cumtest +import os +import requests +import unittest +import zipfile + + +class TestManganelo(cumtest.CumTest): + MANGANELO_URL = 'https://manganelo.com/genre-all' + + def setUp(self): + super().setUp() + global manganelo + filterwarnings(action = "ignore", message = "unclosed", category = ResourceWarning) + from cum.scrapers import manganelo + + def tearDown(self): + self.directory.cleanup() + + def get_five_latest_releases(self): + r = requests.get(self.MANGANELO_URL) + soup = BeautifulSoup(r.text, config.get().html_parser) + chapters = soup.find_all("a", class_="genres-item-chap") + links = [x["href"] for x in chapters] + return links[:5] + + @nottest + def series_information_tester(self, data): + series = manganelo.ManganeloSeries(data['url']) + self.assertEqual(series.name, data['name']) + self.assertEqual(series.alias, data['alias']) + self.assertEqual(series.url, data['url']) + self.assertIs(series.directory, None) + self.assertEqual(len(series.chapters), len(data['chapters'])) + for chapter in series.chapters: + self.assertEqual(chapter.name, data['name']) + self.assertEqual(chapter.alias, data['alias']) + self.assertIn(chapter.chapter, data['chapters']) + data['chapters'].remove(chapter.chapter) + self.assertIs(chapter.directory, None) + self.assertEqual(len(data['chapters']), 0) + + # This test is disabled temporarily due to the architecture of + # the chapter.from_url method, which assumes that if a chapter + # exists then it will be listed on the series page. Manganelo + # seems to violate this assumption, in that there are chapters + # which are accessible from the "latest chapters" page but which + # are not listed on their respective series' pages, at least + # not immediately. + # TODO: come back to this test and find a way to construct a + # chapter without requiring metadata from the series page + def _test_chapter_download_latest(self): + latest_releases = self.get_five_latest_releases() + for release in latest_releases: + try: + chapter = manganelo.ManganeloChapter.from_url(release) + except exceptions.ScrapingError as e: + print('scraping error for {} - {}'.format(release, e)) + continue + else: + chapter.get(use_db=False) + + def test_chapter_filename_decimal(self): + URL = "https://manganelo.com/chapter/citrus_saburo_uta/chapter_24.6" + chapter = manganelo.ManganeloChapter.from_url(URL) + path = os.path.join(self.directory.name, 'Citrus Saburo Uta', + 'Citrus Saburo Uta - c024 x6 [Unknown].zip') + self.assertEqual(chapter.chapter, '24.6') + self.assertEqual(chapter.filename, path) + + def test_chapter_information_normal(self): + URL = "https://manganelo.com/chapter/ramen_daisuki_koizumisan/chapter_18" + chapter = manganelo.ManganeloChapter.from_url(URL) + self.assertEqual(chapter.alias, 'ramen-daisuki-koizumi-san') + self.assertTrue(chapter.available()) + self.assertEqual(chapter.chapter, '18') + self.assertEqual(chapter.name, 'Ramen Daisuki Koizumi-San') + self.assertEqual(chapter.title, 'Ramen Daisuki Koizumi-san Chapter 18') + path = os.path.join(self.directory.name, + 'Ramen Daisuki Koizumi-San', + 'Ramen Daisuki Koizumi-San - c018 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 8) + + def test_chapter_information_chapterzero(self): + URL = "https://manganelo.com/chapter/inu_to_hasami_wa_tsukaiyou/chapter_0" + chapter = manganelo.ManganeloChapter.from_url(URL) + self.assertEqual(chapter.alias, 'inu-to-hasami-wa-tsukaiyou') + self.assertEqual(chapter.chapter, '0') + self.assertEqual(chapter.name, 'Inu To Hasami Wa Tsukaiyou') + self.assertEqual(chapter.title, 'Inu to Hasami wa Tsukaiyou Vol.1 Chapter 0') + path = os.path.join( + self.directory.name, 'Inu To Hasami Wa Tsukaiyou', + 'Inu To Hasami Wa Tsukaiyou - c000 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 32) + + def test_series_invalid(self): + URL = "https://manganelo.com/manga/test_bad_manga_name" + with self.assertRaises(exceptions.ScrapingError): + series = manganelo.ManganeloSeries(url=URL) + + def test_chapter_unavailable(self): + URL = "https://manganelo.com/chapter/oyasumi_punpun/chapter_999" + chapter = manganelo.ManganeloChapter(url=URL) + self.assertFalse(chapter.available()) + + def test_series_oneword(self): + data = {'alias': 'aria', + 'chapters': ['1', '2', '3', '4', '5', '6', '7', '8', + '9', '10', '10.5', '11', '12', '13', '14', '15', + '16', '17', '18', '19', '20', '21', '22', '23', + '24', '25', '26', '27', '28', '29', '30', '30.5', + '31', '32', '33', '34', '35', '35.5', '36', + '37', '37.5', '38', '39', '40', '41', '42', '43', + '44', '45', '45.5', '46', '47', '48', '49', + '50', '50.5', '51', '52', '53', '54', '55', '56', + '57', '57.5', '58', '59', '60', '60.1'], + 'name': 'Aria', + 'url': 'https://manganelo.com/manga/aria'} + self.series_information_tester(data) + + def test_series_multiplewords(self): + data = {'alias': 'prunus-girl', + 'chapters': ['1', '1.5', '2', '3', '4', '5', '5.5', '6', '7', '8', + '9', '10', '11', '11.5', '12', '13', '14', '15', + '16', '16.5', '17', '18', '19', '20', '21', '22', + '23', '24', '25', '26', '27', '28', '29', '30', + '31', '32', '32.5', '33', '34', '35', '36', '37', + '38', '39', '40', '41', '42', '42.5'], + 'name': 'Prunus Girl', + 'url': 'https://manganelo.com/manga/prunus_girl'} + self.series_information_tester(data) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_scraper_mangasee.py b/tests/test_scraper_mangasee.py new file mode 100644 index 0000000..ccf9176 --- /dev/null +++ b/tests/test_scraper_mangasee.py @@ -0,0 +1,180 @@ +from bs4 import BeautifulSoup +from cum import config, exceptions +from nose.tools import nottest +from urllib.parse import urljoin +import cumtest +import os +import requests +import unittest +import zipfile + + +class TestMangasee(cumtest.CumTest): + MANGASEE_URL = 'https://mangaseeonline.us/' + + def setUp(self): + super().setUp() + global mangasee + from cum.scrapers import mangasee + + def tearDown(self): + self.directory.cleanup() + + def get_five_latest_releases(self): + r = requests.get(self.MANGASEE_URL) + soup = BeautifulSoup(r.text, config.get().html_parser) + chapters = soup.find_all("a", class_="latestSeries") + links = [urljoin(self.MANGASEE_URL, x.get("href")) for x in chapters] + return links[:5] + + @nottest + def series_information_tester(self, data): + series = mangasee.MangaseeSeries(data['url']) + self.assertEqual(series.name, data['name']) + self.assertEqual(series.alias, data['alias']) + self.assertEqual(series.url, data['url']) + self.assertIs(series.directory, None) + self.assertEqual(len(series.chapters), len(data['chapters'])) + for chapter in series.chapters: + self.assertEqual(chapter.name, data['name']) + self.assertEqual(chapter.alias, data['alias']) + self.assertIn(chapter.chapter, data['chapters']) + data['chapters'].remove(chapter.chapter) + self.assertIs(chapter.directory, None) + self.assertEqual(len(data['chapters']), 0) + + def test_chapter_download_latest(self): + latest_releases = self.get_five_latest_releases() + for release in latest_releases: + try: + chapter = mangasee.MangaseeChapter.from_url(release) + except exceptions.ScrapingError as e: + print('scraping error for {} - {}'.format(release, e)) + continue + else: + chapter.get(use_db=False) + + def test_chapter_filename_decimal(self): + URL = "https://mangaseeonline.us/read-online/" + \ + "Citrus-S-A-B-U-R-O-Uta-chapter-20.5-page-1.html" + chapter = mangasee.MangaseeChapter.from_url(URL) + path = os.path.join(self.directory.name, 'Citrus SABURO Uta', + 'Citrus SABURO Uta - c020 x5 [Unknown].zip') + self.assertEqual(chapter.chapter, '20.5') + self.assertEqual(chapter.filename, path) + + def test_chapter_information_normal(self): + URL = "https://mangaseeonline.us/read-online/" + \ + "Ramen-Daisuki-Koizumi-San-chapter-18-page-1.html" + chapter = mangasee.MangaseeChapter.from_url(URL) + self.assertEqual(chapter.alias, 'ramen-daisuki-koizumi-san') + self.assertTrue(chapter.available()) + self.assertEqual(chapter.chapter, '18') + self.assertEqual(chapter.name, 'Ramen Daisuki Koizumi-san') + self.assertEqual(chapter.title, 'Chapter 18') + path = os.path.join(self.directory.name, + 'Ramen Daisuki Koizumi-san', + 'Ramen Daisuki Koizumi-san - c018 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 8) + + def test_chapter_information_chapterzero(self): + URL = "https://mangaseeonline.us/read-online/" + \ + "Inu-To-Hasami-Wa-Tsukaiyou-chapter-0-page-1.html" + chapter = mangasee.MangaseeChapter.from_url(URL) + self.assertEqual(chapter.alias, 'inu-to-hasami-wa-tsukaiyou') + self.assertEqual(chapter.chapter, '0') + self.assertEqual(chapter.name, 'Inu to Hasami wa Tsukaiyou') + self.assertEqual(chapter.title, 'Chapter 0') + path = os.path.join( + self.directory.name, 'Inu to Hasami wa Tsukaiyou', + 'Inu to Hasami wa Tsukaiyou - c000 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 51) + + def test_chapter_information_multiseason(self): + URL = "https://mangaseeonline.us/read-online/" + \ + "Kubera-chapter-3-index-2-page-1.html" + chapter = mangasee.MangaseeChapter.from_url(URL) + self.assertEqual(chapter.alias, 'kubera') + self.assertEqual(chapter.chapter, '02.003') + self.assertEqual(chapter.name, 'Kubera') + self.assertEqual(chapter.title, 'S2 - Chapter 3') + path = os.path.join( + self.directory.name, 'Kubera', + 'Kubera - c002 x003 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 39) + + def test_chapter_information_multiseason_decimal(self): + URL = "https://mangaseeonline.us/read-online/" + \ + "Kubera-chapter-164.5-index-2-page-1.html" + chapter = mangasee.MangaseeChapter.from_url(URL) + self.assertEqual(chapter.alias, 'kubera') + self.assertEqual(chapter.chapter, '02.164.5') + self.assertEqual(chapter.name, 'Kubera') + self.assertEqual(chapter.title, 'S2 - Chapter 164.5') + path = os.path.join( + self.directory.name, 'Kubera', + 'Kubera - c002 x164.5 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 45) + + def test_series_invalid(self): + URL = "https://mangaseeonline.us/read-online/" + \ + "not_a_manga" + with self.assertRaises(exceptions.ScrapingError): + series = mangasee.MangaseeSeries(url=URL) + + def test_chapter_unavailable(self): + URL = "https://mangaseeonline.us/read-online/" + \ + "Oyasumi-Punpun-chapter-999-page-1.html" + chapter = mangasee.MangaseeChapter(url=URL) + self.assertFalse(chapter.available()) + + def test_series_oneword(self): + data = {'alias': 'aria', + 'chapters': ['1', '2', '3', '4', '5', '6', '7', '8', + '9', '10', '10.5', '11', '12', '13', '14', '15', + '16', '17', '18', '19', '20', '21', '22', '23', + '24', '25', '26', '27', '28', '29', '30', '30.5', + '31', '32', '33', '34', '35', '35.5', '36', + '37', '37.5', '38', '39', '40', '41', '42', '43', + '44', '45', '45.5', '46', '47', '48', '49', + '50', '50.5', '51', '52', '53', '54', '55', '56', + '57', '57.5', '58', '59', '60', '60.5'], + 'name': 'Aria', + 'url': 'https://mangaseeonline.us/manga/Aria'} + self.series_information_tester(data) + + def test_series_multiplewords(self): + data = {'alias': 'prunus-girl', + 'chapters': ['1', '2', '3', '4', '5', '6', '7', '8', + '9', '10', '11', '12', '13', '14', '15', + '16', '17', '18', '19', '20', '21', '22', + '23', '24', '25', '26', '27', '28', '29', '30', + '31', '32', '32.5', '33', '34', '35', '36', '37', + '38', '39', '40', '41', '42', '43'], + 'name': 'Prunus Girl', + 'url': 'https://mangaseeonline.us/manga/Prunus-Girl'} + self.series_information_tester(data) + + +if __name__ == '__main__': + unittest.main()