diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 53c8833509a..e78880f4797 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -859,6 +859,12 @@ Consider all listed sites to potentially be NSFW. Galleries + + ToonDex + https://toondex.net/ + Chapters, Manga + + Toyhouse https://toyhou.se/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 8e7129618af..a76144c11b6 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -154,6 +154,7 @@ "tcbscans", "telegraph", "tmohentai", + "toondex", "toyhouse", "tsumino", "tumblr", diff --git a/gallery_dl/extractor/toondex.py b/gallery_dl/extractor/toondex.py new file mode 100644 index 00000000000..52d2d76975e --- /dev/null +++ b/gallery_dl/extractor/toondex.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://toondex.net/""" + +import re +from .common import MangaExtractor, ChapterExtractor +from .. import text + + +BASE_PATTERN = r"(?:https?://)?toondex\.net" + + +class ToondexBase: + """Base class for Toondex extractors""" + + category = "toondex" + root = "https://toondex.net/" + + def get_title(self, page): + """Gets the title of the manga""" + title = text.extr(page, "", "") + title = text.unescape(title).strip() + match = re.search( + r"(?:Chapter \d+ \| |Chapter \d+\.\d+ \| |Read )(.+)(?: - (Read Free Online Comics at )?ToonDex)", + title, + ) + if match: + title = match.group(1) + return title + + +class ToondexChapterExtractor(ToondexBase, ChapterExtractor): + """Extractor for manga chapters from Toondex.net""" + + subcategory = "chapter" + directory_fmt = ("{category}", "{manga}", "Chapter-{chapter:03}{chapter_minor}") + archive_fmt = "{chapter:03}{chapter_minor}_{page}" + pattern = BASE_PATTERN + r"/comics/([\w\d-]+)\/chapter-(\d+-[\d+]|\d+)/?" + example = "https://toondex.net/comics/sex-stopwatch/chapter-1/" + + def __init__(self, match): + url = match.group(0) + self.gid, self.chapter = match.groups() + ChapterExtractor.__init__(self, match, url) + + def metadata(self, page): + chapter, sep, minor = self.chapter.partition("-") + + data = { + "manga": self.get_title(page), + "manga_id": self.gid, + "chapter": text.parse_int(chapter), + "chapter_id": f"{self.gid}-chapter-{self.chapter}", + "chapter_minor": sep + minor, + } + return data + + def images(self, page): + images = [] + first_img = text.extract( + page, '