diff --git a/dosagelib/cmd.py b/dosagelib/cmd.py index 48f774b4e3..877541d238 100644 --- a/dosagelib/cmd.py +++ b/dosagelib/cmd.py @@ -115,7 +115,7 @@ def setup_options(console: console.Console) -> ArgumentParser: parser.add_argument('--list-all', action='store_true', help=argparse.SUPPRESS) comic_arg = parser.add_argument('comic', nargs='*', - help='comic module name (including case insensitive substrings)') + help='comic module name (including case insensitive substrings). Also accepts URLs on certain websites (ComicFury, WebToons).') comic_arg.completer = scraper_completion with contextlib.suppress(ImportError): completers = importlib.import_module('argcomplete.completers') diff --git a/dosagelib/director.py b/dosagelib/director.py index 7402650c83..4dfe083813 100644 --- a/dosagelib/director.py +++ b/dosagelib/director.py @@ -210,13 +210,17 @@ def getScrapers(comics: Collection[str], basepath: str, adult=True, listing=Fals # make the following command work: # find Comics -type d | xargs -n1 -P10 dosage -b Comics comic = comic[len(basepath) + 1:].lstrip(os.sep) - if ':' in comic: - name, index = comic.split(':', 1) - indexes = index.split(',') + if comic.startswith("http:") or comic.startswith("https:"): + scraper = scrapercache.findbyurl(comic) + indexes = None else: - name = comic - indexes = None - scraper = scrapercache.find(name) + if ':' in comic: + name, index = comic.split(':', 1) + indexes = index.split(',') + else: + name = comic + indexes = None + scraper = scrapercache.find(name) if shouldRunScraper(scraper, adult, listing): # FIXME: Find a better way to work with indexes scraper.indexes = indexes diff --git a/dosagelib/plugins/comicfury.py b/dosagelib/plugins/comicfury.py index d7e514ead5..797a42fb55 100644 --- a/dosagelib/plugins/comicfury.py +++ b/dosagelib/plugins/comicfury.py @@ -88,6 +88,26 @@ def shouldSkipUrl(self, url, data): return (self.match(data, '//div[@id="comicimagewrap"]//video') and not self.match(data, '//div[@id="comicimagewrap"]//img')) + @classmethod + def handleurl(cls, url) -> list[ParserScraper]: + import re + rs = [ + r"^http.*comicfury\.com/read/([^/]+)/?.*", + r"^http.*://(.+?)\.thecomicseries\.com/?.*", + r"^http.*://(.+?)\.the-comic\.org/?.*", + r"^http.*://(.+?)\.thecomicstrip\.org/?.*", + r"^http.*://(.+?)\.cfw\.me/?.*", + r"^http.*://(.+?)\.webcomic\.ws/?.*" + ] + for r in rs: + m = re.match(r, url.lower()) + if m != None: + name = m.group(1) + ps = cls(name, name) + ps.multipleImagesPerStrip = True + return [ps] + return [] + @classmethod def getmodules(cls): return ( diff --git a/dosagelib/plugins/webtoons.py b/dosagelib/plugins/webtoons.py index c62726e959..8f836c36d4 100644 --- a/dosagelib/plugins/webtoons.py +++ b/dosagelib/plugins/webtoons.py @@ -47,6 +47,16 @@ def namer(self, imageUrl, pageUrl): imageExt = pageUrl.rsplit('.', 1)[-1].split('?', 1)[0] return "%s-%03d.%s" % (episodeNum, imageNum, imageExt) + @classmethod + def handleurl(cls, url) -> list[ParserScraper]: + import re + m = re.match(r"^http.*webtoons\.com/.+?/(.+?/.+?)/.+title_no=(\d+)", url.lower()) + if m != None: + path = m.group(1) + number = m.group(2) + return [cls(path, path, number)] + return [] + @classmethod def getmodules(cls): return ( diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 648ff5fdb9..e5c47bc0cf 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -54,6 +54,7 @@ class Scraper: # Stop search for previous URLs at this URL firstStripUrl: Optional[str] = None + #THINK Is there harm in defaulting this to True? # if more than one image per URL is expected multipleImagesPerStrip: bool = False @@ -95,6 +96,10 @@ class Scraper: # HTTP session for configuration & cookies session: http.Session = http.default_session + @classmethod + def handleurl(cls, url) -> list[Scraper]: + return [] + @classmethod def getmodules(cls) -> Collection[Scraper]: if cls.url is None: @@ -537,6 +542,7 @@ class Cache: """ def __init__(self) -> None: self.data: List[Scraper] = [] + self.plugins: List[Scraper] = [] self.userdirs: set[pathlib.Path] = set() def find(self, comic: str) -> Scraper: @@ -547,6 +553,7 @@ def find(self, comic: str) -> Scraper: if not comic: raise ValueError("empty comic name") candidates = [] + cname = comic.lower() for scraper in self.all(include_removed=True): lname = scraper.name.lower() @@ -600,6 +607,7 @@ def addmodule(self, module) -> int: classes = 0 for plugin in loader.get_module_plugins(module, Scraper): classes += 1 + self.plugins.append(plugin) self.data.extend(plugin.getmodules()) return classes @@ -615,6 +623,18 @@ def all(self, include_removed=False) -> list[Scraper]: else: return [x for x in self.data if x.url] + def findbyurl(self, url) -> list[Scraper]: + candidates = [] + for plugin in self.plugins: + candidates.extend(plugin.handleurl(url)) + + if len(candidates) > 1: + comics = ", ".join(x.name for x in candidates) + raise ValueError('multiple comics found: %s' % comics) + elif not candidates: + raise ValueError('comic %r not found' % comic) + return candidates[0] + def validate(self) -> None: """Check for duplicate scraper names.""" d: Dict[str, Scraper] = {}