diff --git a/cum/db.py b/cum/db.py index 8f05241..f26add3 100644 --- a/cum/db.py +++ b/cum/db.py @@ -242,6 +242,9 @@ def to_object(self): if parse.netloc == 'www.yuri-ism.net': from cum.scrapers.yuriism import YuriismChapter return YuriismChapter(**kwargs) + if parse.netloc == 'mangakakalot.com': + from cum.scrapers.mangakakalot import MangaKakalotChapter + return MangaKakalotChapter(**kwargs) class Group(Base): diff --git a/cum/scrapers/__init__.py b/cum/scrapers/__init__.py index cb05f04..2a3690e 100644 --- a/cum/scrapers/__init__.py +++ b/cum/scrapers/__init__.py @@ -3,6 +3,7 @@ from cum.scrapers.madokami import MadokamiChapter, MadokamiSeries from cum.scrapers.mangadex import MangadexSeries, MangadexChapter from cum.scrapers.yuriism import YuriismChapter, YuriismSeries +from cum.scrapers.mangakakalot import MangaKakalotChapter, MangaKakalotSeries series_scrapers = [ DokiReaderSeries, @@ -10,6 +11,7 @@ MadokamiSeries, MangadexSeries, YuriismSeries, + MangaKakalotSeries, ] chapter_scrapers = [ DokiReaderChapter, @@ -17,4 +19,5 @@ MadokamiChapter, MangadexChapter, YuriismChapter, + MangaKakalotChapter, ] diff --git a/cum/scrapers/mangakakalot.py b/cum/scrapers/mangakakalot.py new file mode 100644 index 0000000..86cef47 --- /dev/null +++ b/cum/scrapers/mangakakalot.py @@ -0,0 +1,86 @@ +from bs4 import BeautifulSoup +from cum import config, exceptions +from cum.scrapers.base import BaseChapter, BaseSeries, download_pool +from functools import partial +import concurrent.futures +import re +import requests + + +class MangaKakalotSeries(BaseSeries): + chapter_re = re.compile(r'https://mangakakalot\.com/chapter/' + r'(?P\w+)/chapter_(?P\d+(\.\d+)?)') + url_re = re.compile(r'https://mangakakalot\.com/manga/') + + def __init__(self, url, **kwargs): + super().__init__(url, **kwargs) + + response = requests.get(url) + self.soup = BeautifulSoup(response.content, config.get().html_parser) + + # mangakakalot does not return 404 if there is no such title + try: + self.cached_name = self.soup.select('.manga-info-text h1')[0].text + except IndexError: + raise exceptions.ScrapingError() + + self.chapters = self.get_chapters() + + def get_chapters(self): + chapter_links = self.soup.select('.chapter-list a') + chapters = [] + for chapter_link in chapter_links: + url = chapter_link.attrs['href'] + chapter_info = self.chapter_re.search(url) + chapter = chapter_info.group('chapter') + # TODO: chapter titles, how do they work? + c = MangaKakalotChapter(name=self.cached_name, alias=self.alias, + chapter=chapter, + url=url, groups=[]) + chapters.append(c) + return chapters + + @property + def name(self): + return self.cached_name + + +class MangaKakalotChapter(BaseChapter): + uses_pages = True + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + url = kwargs.get('url') + response = requests.get(url) + soup = BeautifulSoup(response.content, config.get().html_parser) + self.pages = soup.select('.vung-doc img') + + def available(self): + if len(self.pages) < 1: + return False + else: + return True + + def download(self): + files = [None] * len(self.pages) + futures = [] + with self.progress_bar(self.pages) as bar: + for i, page in enumerate(self.pages): + r = requests.get(page.attrs['src'], stream=True) + fut = download_pool.submit(self.page_download_task, i, r) + fut.add_done_callback(partial(self.page_download_finish, + bar, files)) + futures.append(fut) + concurrent.futures.wait(futures) + self.create_zip(files) + + @staticmethod + def from_url(url): + pattern = MangaKakalotSeries.chapter_re + series_name = pattern.search(url).group('series') + series_url = 'https://mangakakalot.com/manga/{}'.format(series_name) + series = MangaKakalotSeries(series_url) + for chapter in series.chapters: + if chapter.url == url: + return chapter + return [] diff --git a/tests/test_scraper_mangakakalot.py b/tests/test_scraper_mangakakalot.py new file mode 100644 index 0000000..3d3cf33 --- /dev/null +++ b/tests/test_scraper_mangakakalot.py @@ -0,0 +1,60 @@ +from cum import exceptions +from cumtest import CumTest +import os +import zipfile + + +class TestMangaKakalot(CumTest): + def setUp(self): + super().setUp() + global mangakakalot + from cum.scrapers import mangakakalot + + def test_chapter_bonnouji_2(self): + URL = 'https://mangakakalot.com/chapter/bonnouji/chapter_2' + ALIAS = 'bonnouji' + NAME = 'Bonnouji' + chapter = mangakakalot.MangaKakalotChapter.from_url(URL) + self.assertEqual(chapter.alias, ALIAS) + self.assertTrue(chapter.available()) + self.assertEqual(chapter.chapter, '2') + self.assertIs(chapter.directory, None) + self.assertEqual(chapter.name, NAME) + path = os.path.join(self.directory.name, NAME, + 'Bonnouji - c002 [Unknown].zip') + self.assertEqual(chapter.filename, path) + chapter.download() + self.assertTrue(os.path.isfile(path)) + with zipfile.ZipFile(path) as chapter_zip: + files = chapter_zip.infolist() + self.assertEqual(len(files), 21) + + def test_chapter_unavailable(self): + URL = 'https://mangakakalot.com/chapter/dk918935/chapter_' \ + '9999999999999999999999999999999999999999999999' + chapter = mangakakalot.MangaKakalotChapter(url=URL) + self.assertFalse(chapter.available()) + + def test_series_invalid(self): + URL = 'https://mangakakalot.com/manga/not_a_manga/' + with self.assertRaises(exceptions.ScrapingError): + series = mangakakalot.MangaKakalotSeries(URL) + + def test_series_kiss_and_harmony(self): + ALIAS = 'kiss--harmony' + CHAPTERS = ['0.1', '0.2', '0.3', '0.4', '0.5'] + NAME = 'Kiss & Harmony' + URL = 'https://mangakakalot.com/manga/dk918935' + series = mangakakalot.MangaKakalotSeries(URL) + self.assertEqual(series.name, NAME) + self.assertEqual(series.alias, ALIAS) + self.assertEqual(series.url, URL) + self.assertIs(series.directory, None) + self.assertEqual(len(series.chapters), len(CHAPTERS)) + for chapter in series.chapters: + self.assertEqual(chapter.name, NAME) + self.assertEqual(chapter.alias, ALIAS) + self.assertIn(chapter.chapter, CHAPTERS) + CHAPTERS.remove(chapter.chapter) + self.assertIs(chapter.directory, None) + self.assertEqual(len(CHAPTERS), 0)