dickloraine · TimDuffy · Aug 10, 2025 · Aug 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -138,3 +138,4 @@ dmypy.json
 cython_debug/
 
 EmbedComicMetadata.zip
+Embed Comic Metadata.zip
diff --git a/Makefile b/Makefile
@@ -0,0 +1,3 @@
+zip:
+	rm -f 'Embed Comic Metadata.zip'
+	zip -r 'Embed Comic Metadata.zip' .
diff --git a/comicbookinfo.py b/comicbookinfo.py
@@ -53,6 +53,7 @@ def xlate(cbi_entry):
         metadata.month = xlate('publicationMonth')
         metadata.year = xlate('publicationYear')
         metadata.issueCount = xlate('numberOfIssues')
+        metadata.pageCount = xlate('pageCount')
         metadata.comments = xlate('comments')
         metadata.credits = xlate('credits')
         metadata.genre = xlate('genre')
@@ -123,6 +124,7 @@ def toInt(s):
         assign('publicationMonth', toInt(metadata.month))
         assign('publicationYear', toInt(metadata.year))
         assign('numberOfIssues', toInt(metadata.issueCount))
+        assign('pageCount', toInt(metadata.pageCount))
         assign('comments', metadata.comments)
         assign('genre', metadata.genre)
         assign('volume', toInt(metadata.volume))

diff --git a/comicinfoxml.py b/comicinfoxml.py
@@ -97,6 +97,20 @@ def assign(cix_entry, md_entry):
             if md_entry is not None:
                 ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry)
 
+        def clean_tags(tag_list):
+            # Remove Goodreads tags
+            gr_tags = [
+                'gr-read',
+                'gr-want-to-read',
+                'gr-reading'
+            ]
+
+            for t in gr_tags:
+                if t in tag_list:
+                    tag_list.remove(t)
+
+            return tag_list
+
         assign('Title', md.title)
         assign('Series', md.series)
         assign('Number', md.issue)
@@ -181,7 +195,7 @@ def assign(cix_entry, md_entry):
         md.teams = tuple_to_string(md.teams)
         md.locations = tuple_to_string(md.locations)
         md.genre = tuple_to_string(md.genre)
-        md.tags = tuple_to_string(md.tags)
+        md.tags = tuple_to_string(clean_tags(md.tags))
 
         assign('Publisher', md.publisher)
         assign('Imprint', md.imprint)

diff --git a/comicmetadata.py b/comicmetadata.py
@@ -1,5 +1,9 @@
 from __future__ import (unicode_literals, division, absolute_import,
                         print_function)
+import io
+import pathlib
+import re
+import unicodedata
 
 __license__   = 'GPL v3'
 __copyright__ = '2015, dloraine'
@@ -14,6 +18,7 @@
 from calibre_plugins.EmbedComicMetadata.genericmetadata import GenericMetadata
 from calibre_plugins.EmbedComicMetadata.comicinfoxml import ComicInfoXml
 from calibre_plugins.EmbedComicMetadata.comicbookinfo import ComicBookInfo
+from calibre.utils.zipfile import safe_replace
 
 import os
 import sys
@@ -75,6 +80,7 @@ def __init__(self, book_id, ia):
     def __del__(self):
         delete_temp_file(self.file)
 
+    # Metadata embed
     def get_comic_metadata_from_file(self):
         if self.checked_for_metadata:
             return
@@ -155,9 +161,13 @@ def convert_calibre_md_to_comic_md(self):
         role = partial(set_role, credits=self.calibre_md_in_comic_format.credits)
         update_field = partial(update_comic_field, target=self.calibre_md_in_comic_format)
 
+        # Hack for no_sync authors
+        author_clean = clean_authors(mi.authors)
+
         # update the fields of comic metadata
         update_field("title", mi.title)
-        role("Writer", mi.authors)
+        # role("Writer", mi.authors)
+        role("Writer", author_clean)
         update_field("series", mi.series)
         update_field("issue", mi.series_index)
         update_field("tags", mi.tags)
@@ -277,7 +287,6 @@ def convert_comic_md_to_calibre_md(self, comic_metadata):
         # gtin
         if co.gtin:
             mi.set_identifiers({"gtin": co.gtin})
-
         # custom columns
         update_column = partial(update_custom_column, calibre_metadata=mi,
                                 custom_cols=self.db.field_metadata.custom_field_metadata())
@@ -307,10 +316,18 @@ def convert_comic_md_to_calibre_md(self, comic_metadata):
 
         self.comic_md_in_calibre_format = mi
 
+    # Conversion
     def make_temp_cbz_file(self):
         if not self.file and self.format == "cbz":
             self.file = self.db.format(self.book_id, "cbz", as_path=True)
 
+    def add_dir_to_zip(self, zf, tdir, arcname):
+        import os
+        for dirpath, dirs, files in os.walk(tdir):
+            for f in files:
+                fn = os.path.join(dirpath, f)
+                zf.write(fn, f'{arcname}/{f}')
+
     def convert_cbr_to_cbz(self):
         '''
         Converts a rar or cbr-comic to a cbz-comic
@@ -327,14 +344,17 @@ def convert_cbr_to_cbz(self):
             # make the cbz file
             with TemporaryFile("comic.cbz") as tf:
                 zf = ZipFile(tf, "w")
-                add_dir_to_zipfile(zf, tdir)
+                self.add_dir_to_zip(zf, tdir, clean_title(self.calibre_metadata.title))
                 if comments:
                     zf.comment = comments.encode("utf-8")
                 zf.close()
                 # add the cbz format to calibres library
                 self.db.add_format(self.book_id, "cbz", tf)
                 self.format = "cbz"
 
+            if prefs['clean_cbz']:
+                self.clean_cbz()
+
     def convert_zip_to_cbz(self):
         import os
 
@@ -345,6 +365,193 @@ def convert_zip_to_cbz(self):
         delete_temp_file(new_fname)
         self.format = "cbz"
 
+        if prefs['clean_cbz']:
+            self.clean_cbz()
+
+    # CBZ mark
+    def is_cbi_valid(self):
+        # Ensure metadata is set
+        self.overlay_metadata()
+
+        # Generate what the string should be
+        cbi_string = ComicBookInfo().stringFromMetadata(self.comic_metadata)
+        if not python3:
+            cbi_string = cbi_string.decode('utf-8', 'ignore')
+
+        # ensure we have a temp file
+        self.make_temp_cbz_file()
+
+        # Read current cbi comment
+        zf = ZipFile(self.file, "r")
+        curr_str = zf.comment
+        zf.close()
+
+        return cbi_string == curr_str
+
+    def is_cbi_empty(self):
+       # ensure we have a temp file
+        self.make_temp_cbz_file()
+
+        # Read current cbi comment
+        zf = ZipFile(self.file, "r")
+        curr_str = zf.comment
+        zf.close()
+
+        return curr_str == None or curr_str == "".encode("utf-8")
+
+    def is_cix_valid(self):
+        # Ensure metadata is set
+        self.overlay_metadata()
+
+        # Generate what the string should be
+        cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata)
+        if not python3:
+            cix_string = cix_string.decode('utf-8', 'ignore')
+
+        # ensure we have a temp file
+        self.make_temp_cbz_file()
+
+        # Read current xml file
+        zf = ZipFile(self.file, "r")
+        curr_file = zf.open('ComicInfo.xml', 'r')
+        curr_str = io.TextIOWrapper(curr_file).read()
+        curr_file.close()
+
+        # count current # of pages
+        pages = 0
+        for name in zf.namelist():
+            if name.lower().rpartition('.')[-1] in IMG_EXTENSIONS:
+                pages += 1
+        zf.close()
+
+        if self.comic_metadata.pageCount != pages:
+            return False
+
+        return cix_string == curr_str
+
+    def is_cbz_dirty(self):
+        '''
+        Determines if a CBZ file has a dirty/unwanted file structure
+        '''
+        ffile = self.db.format(self.book_id, self.format, as_path=True)
+        tmpf = ZipFile(ffile)
+        filename_list = tmpf.namelist()
+
+        # A 'dirty' zip has one (or more) of these cases:
+        #   Case 1: Metadata is not clean
+        #       a. There are duplicate files
+        #       b. ComicInfo.xml does not exist at <root_dir>/ComicInfo.xml
+        #       c. ComicInfo.xml content is not up to date
+        #       d. ComicBookInfo comment is not up to date
+        #   Case 2: Directory structure is up to date
+        #       a. file name matches <root_dir>/<book_name>/*
+        #       b. filename does not contain invalid extension
+        #       c. filename does not match scanner tag
+        #       d. filename does not match embedded cover
+
+        # Case 1a
+        if len(set(filename_list)) < len(filename_list):
+            return True
+        # Case 1b
+        if 'ComicInfo.xml' not in filename_list:
+            return True
+        else:
+            # Case 1c
+            if not self.is_cix_valid():
+                return True
+        # Case 1d
+        if not self.is_cbi_empty():
+            return True
+
+        for f in filename_list:
+            # We already checked ComicInfo.xml, so ignore it here
+            if f == 'ComicInfo.xml':
+                continue
+            # Case 2a
+            if os.path.dirname(f) != clean_title(self.calibre_metadata.title):
+                return True
+            # Case 2b
+            if pathlib.Path(f).suffix in [".xhtml", ".html", ".css", ".xml", ".sfv"]:
+                return True
+            # Case 2c+d
+            if os.path.basename(f).__contains__('zz'):
+                return True
+            # Case 2c+d
+            if os.path.basename(f) in ['cover.jpeg', 'cover.jpeg', 'page.jpg', 'zSoU-Nerd.jpg']:
+                return True
+
+        return False
+
+    def action_mark_cbz(self):
+        should_mark = True if self.format in ["cbr", "zip"] else self.is_cbz_dirty()
+        if should_mark:
+            self.ia.gui.current_db.data.add_marked_ids({self.book_id: 'shit_files_m8'})
+
+        return should_mark
+
+    # CBZ cleanup
+    def clean_cbz(self):
+        '''
+        cleans directory structure for a cbz comic
+        '''
+
+        # Shortcut for files that are already cleaned
+        should_clean = self.is_cbz_dirty()
+        if not should_clean:
+            return False
+
+        with TemporaryDirectory('_extractedfiles') as tdir:
+            # extract the zip file
+            ffile = self.db.format(self.book_id, self.format, as_path=True)
+            tmpf = ZipFile(ffile)
+            tmpf.extractall(tdir)
+            comments = tmpf.comment
+            delete_temp_file(ffile)
+            tmpf.close()
+
+            # Gather file paths from extracted zip
+            all_files = []
+            for root, _, files in os.walk(tdir):
+                for f in files:
+                    all_files.append(os.path.abspath(os.path.join(root, f)))
+
+            # clean up dir structure
+            with TemporaryDirectory('_cleancbz') as cleandir:
+                with TemporaryFile("comic.cbz") as tf:
+                    zf = ZipFile(tf, "w")
+
+                    for f in all_files:
+                        # Skip non-image files
+                        if pathlib.Path(f).suffix in [".xhtml", ".html", ".css", ".xml", ".sfv"]:
+                            continue
+                        # Remove scanner tags
+                        if os.path.basename(f).__contains__('zz'):
+                            continue
+                        # Remove embedded covers and scanner tags
+                        if os.path.basename(f) in ['cover.jpg', 'cover.jpeg', 'page.jpg', 'zSoU-Nerd.jpg']:
+                            continue
+                        else:
+                            zf.write(f, f'{clean_title(self.calibre_metadata.title)}/{os.path.basename(f)}')
+
+                    if comments:
+                        zf.comment = "".encode("utf-8")
+
+                    self.overlay_metadata()
+                    if prefs['cix_embed']:
+                        cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata)
+                        zf.writestr("ComicInfo.xml", cix_string)
+
+                    zf.close()
+
+                    # add the cbz format to calibres library
+                    self.db.add_format(self.book_id, "cbz", tf)
+                    self.format = "cbz"
+
+                    delete_temp_file(tf)
+            self.file = self.db.format(self.book_id, "cbz", as_path=True)
+
+            return True
+
     def update_cover(self):
         # get the calibre cover
         cover_path = self.db.cover(self.book_id, as_path=True)
@@ -378,11 +585,14 @@ def update_cover(self):
     def count_pages(self):
         self.make_temp_cbz_file()
         zf = ZipFile(self.file)
+        namelist = zf.namelist()
+        zf.close()
+
         pages = 0
-        for name in zf.namelist():
+        for name in namelist:
             if name.lower().rpartition('.')[-1] in IMG_EXTENSIONS:
                 pages += 1
-        zf.close()
+
         return pages
 
     def action_count_pages(self):
@@ -465,6 +675,7 @@ def remove_embedded_metadata(self):
 
         return True
 
+    # Metadata import
     def get_comic_metadata_from_cbz(self):
         '''
         Reads the comic metadata from the comic cbz file as comictagger metadata
@@ -592,7 +803,6 @@ def swap_author_names_back(author):
 
 def delete_temp_file(ffile):
     try:
-        import os
         if os.path.exists(ffile):
             os.remove(ffile)
     except:
@@ -634,6 +844,17 @@ def add_dir_to_zipfile(zf, path, prefix=''):
             zf.write(f, arcname)
 
 
+def clean_title(s):
+    return re.sub(r'[^\w_,\-\.\(\)\s]', '_', strip_accents(s))
+
+
+def clean_authors(l: list[str]):
+    return [a.replace("_no_sync", "") for a in l]
+
+
+def strip_accents(s):
+    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')
+
 def safe_delete(zipstream, name):
     '''
     Delete a file in a zip file in a safe manner. This proceeds by extracting
@@ -664,4 +885,4 @@ def safe_delete(zipstream, name):
         zipstream.seek(0)
         zipstream.truncate()
         shutil.copyfileobj(temp, zipstream)
-        zipstream.flush()
+        zipstream.flush()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -138,3 +138,4 @@ dmypy.json
		cython_debug/

		EmbedComicMetadata.zip
		Embed Comic Metadata.zip