Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,4 @@ dmypy.json
cython_debug/

EmbedComicMetadata.zip
Embed Comic Metadata.zip
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
zip:
rm -f 'Embed Comic Metadata.zip'
zip -r 'Embed Comic Metadata.zip' .
2 changes: 2 additions & 0 deletions comicbookinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def xlate(cbi_entry):
metadata.month = xlate('publicationMonth')
metadata.year = xlate('publicationYear')
metadata.issueCount = xlate('numberOfIssues')
metadata.pageCount = xlate('pageCount')
metadata.comments = xlate('comments')
metadata.credits = xlate('credits')
metadata.genre = xlate('genre')
Expand Down Expand Up @@ -123,6 +124,7 @@ def toInt(s):
assign('publicationMonth', toInt(metadata.month))
assign('publicationYear', toInt(metadata.year))
assign('numberOfIssues', toInt(metadata.issueCount))
assign('pageCount', toInt(metadata.pageCount))
assign('comments', metadata.comments)
assign('genre', metadata.genre)
assign('volume', toInt(metadata.volume))
Expand Down
16 changes: 15 additions & 1 deletion comicinfoxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,20 @@ def assign(cix_entry, md_entry):
if md_entry is not None:
ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry)

def clean_tags(tag_list):
# Remove Goodreads tags
gr_tags = [
'gr-read',
'gr-want-to-read',
'gr-reading'
]

for t in gr_tags:
if t in tag_list:
tag_list.remove(t)

return tag_list

assign('Title', md.title)
assign('Series', md.series)
assign('Number', md.issue)
Expand Down Expand Up @@ -181,7 +195,7 @@ def assign(cix_entry, md_entry):
md.teams = tuple_to_string(md.teams)
md.locations = tuple_to_string(md.locations)
md.genre = tuple_to_string(md.genre)
md.tags = tuple_to_string(md.tags)
md.tags = tuple_to_string(clean_tags(md.tags))

assign('Publisher', md.publisher)
assign('Imprint', md.imprint)
Expand Down
235 changes: 228 additions & 7 deletions comicmetadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from __future__ import (unicode_literals, division, absolute_import,
print_function)
import io
import pathlib
import re
import unicodedata

__license__ = 'GPL v3'
__copyright__ = '2015, dloraine'
Expand All @@ -14,6 +18,7 @@
from calibre_plugins.EmbedComicMetadata.genericmetadata import GenericMetadata
from calibre_plugins.EmbedComicMetadata.comicinfoxml import ComicInfoXml
from calibre_plugins.EmbedComicMetadata.comicbookinfo import ComicBookInfo
from calibre.utils.zipfile import safe_replace

import os
import sys
Expand Down Expand Up @@ -75,6 +80,7 @@ def __init__(self, book_id, ia):
def __del__(self):
delete_temp_file(self.file)

# Metadata embed
def get_comic_metadata_from_file(self):
if self.checked_for_metadata:
return
Expand Down Expand Up @@ -155,9 +161,13 @@ def convert_calibre_md_to_comic_md(self):
role = partial(set_role, credits=self.calibre_md_in_comic_format.credits)
update_field = partial(update_comic_field, target=self.calibre_md_in_comic_format)

# Hack for no_sync authors
author_clean = clean_authors(mi.authors)

# update the fields of comic metadata
update_field("title", mi.title)
role("Writer", mi.authors)
# role("Writer", mi.authors)
role("Writer", author_clean)
update_field("series", mi.series)
update_field("issue", mi.series_index)
update_field("tags", mi.tags)
Expand Down Expand Up @@ -277,7 +287,6 @@ def convert_comic_md_to_calibre_md(self, comic_metadata):
# gtin
if co.gtin:
mi.set_identifiers({"gtin": co.gtin})

# custom columns
update_column = partial(update_custom_column, calibre_metadata=mi,
custom_cols=self.db.field_metadata.custom_field_metadata())
Expand Down Expand Up @@ -307,10 +316,18 @@ def convert_comic_md_to_calibre_md(self, comic_metadata):

self.comic_md_in_calibre_format = mi

# Conversion
def make_temp_cbz_file(self):
if not self.file and self.format == "cbz":
self.file = self.db.format(self.book_id, "cbz", as_path=True)

def add_dir_to_zip(self, zf, tdir, arcname):
import os
for dirpath, dirs, files in os.walk(tdir):
for f in files:
fn = os.path.join(dirpath, f)
zf.write(fn, f'{arcname}/{f}')

def convert_cbr_to_cbz(self):
'''
Converts a rar or cbr-comic to a cbz-comic
Expand All @@ -327,14 +344,17 @@ def convert_cbr_to_cbz(self):
# make the cbz file
with TemporaryFile("comic.cbz") as tf:
zf = ZipFile(tf, "w")
add_dir_to_zipfile(zf, tdir)
self.add_dir_to_zip(zf, tdir, clean_title(self.calibre_metadata.title))
if comments:
zf.comment = comments.encode("utf-8")
zf.close()
# add the cbz format to calibres library
self.db.add_format(self.book_id, "cbz", tf)
self.format = "cbz"

if prefs['clean_cbz']:
self.clean_cbz()

def convert_zip_to_cbz(self):
import os

Expand All @@ -345,6 +365,193 @@ def convert_zip_to_cbz(self):
delete_temp_file(new_fname)
self.format = "cbz"

if prefs['clean_cbz']:
self.clean_cbz()

# CBZ mark
def is_cbi_valid(self):
# Ensure metadata is set
self.overlay_metadata()

# Generate what the string should be
cbi_string = ComicBookInfo().stringFromMetadata(self.comic_metadata)
if not python3:
cbi_string = cbi_string.decode('utf-8', 'ignore')

# ensure we have a temp file
self.make_temp_cbz_file()

# Read current cbi comment
zf = ZipFile(self.file, "r")
curr_str = zf.comment
zf.close()

return cbi_string == curr_str

def is_cbi_empty(self):
# ensure we have a temp file
self.make_temp_cbz_file()

# Read current cbi comment
zf = ZipFile(self.file, "r")
curr_str = zf.comment
zf.close()

return curr_str == None or curr_str == "".encode("utf-8")

def is_cix_valid(self):
# Ensure metadata is set
self.overlay_metadata()

# Generate what the string should be
cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata)
if not python3:
cix_string = cix_string.decode('utf-8', 'ignore')

# ensure we have a temp file
self.make_temp_cbz_file()

# Read current xml file
zf = ZipFile(self.file, "r")
curr_file = zf.open('ComicInfo.xml', 'r')
curr_str = io.TextIOWrapper(curr_file).read()
curr_file.close()

# count current # of pages
pages = 0
for name in zf.namelist():
if name.lower().rpartition('.')[-1] in IMG_EXTENSIONS:
pages += 1
zf.close()

if self.comic_metadata.pageCount != pages:
return False

return cix_string == curr_str

def is_cbz_dirty(self):
'''
Determines if a CBZ file has a dirty/unwanted file structure
'''
ffile = self.db.format(self.book_id, self.format, as_path=True)
tmpf = ZipFile(ffile)
filename_list = tmpf.namelist()

# A 'dirty' zip has one (or more) of these cases:
# Case 1: Metadata is not clean
# a. There are duplicate files
# b. ComicInfo.xml does not exist at <root_dir>/ComicInfo.xml
# c. ComicInfo.xml content is not up to date
# d. ComicBookInfo comment is not up to date
# Case 2: Directory structure is up to date
# a. file name matches <root_dir>/<book_name>/*
# b. filename does not contain invalid extension
# c. filename does not match scanner tag
# d. filename does not match embedded cover

# Case 1a
if len(set(filename_list)) < len(filename_list):
return True
# Case 1b
if 'ComicInfo.xml' not in filename_list:
return True
else:
# Case 1c
if not self.is_cix_valid():
return True
# Case 1d
if not self.is_cbi_empty():
return True

for f in filename_list:
# We already checked ComicInfo.xml, so ignore it here
if f == 'ComicInfo.xml':
continue
# Case 2a
if os.path.dirname(f) != clean_title(self.calibre_metadata.title):
return True
# Case 2b
if pathlib.Path(f).suffix in [".xhtml", ".html", ".css", ".xml", ".sfv"]:
return True
# Case 2c+d
if os.path.basename(f).__contains__('zz'):
return True
# Case 2c+d
if os.path.basename(f) in ['cover.jpeg', 'cover.jpeg', 'page.jpg', 'zSoU-Nerd.jpg']:
return True

return False

def action_mark_cbz(self):
should_mark = True if self.format in ["cbr", "zip"] else self.is_cbz_dirty()
if should_mark:
self.ia.gui.current_db.data.add_marked_ids({self.book_id: 'shit_files_m8'})

return should_mark

# CBZ cleanup
def clean_cbz(self):
'''
cleans directory structure for a cbz comic
'''

# Shortcut for files that are already cleaned
should_clean = self.is_cbz_dirty()
if not should_clean:
return False

with TemporaryDirectory('_extractedfiles') as tdir:
# extract the zip file
ffile = self.db.format(self.book_id, self.format, as_path=True)
tmpf = ZipFile(ffile)
tmpf.extractall(tdir)
comments = tmpf.comment
delete_temp_file(ffile)
tmpf.close()

# Gather file paths from extracted zip
all_files = []
for root, _, files in os.walk(tdir):
for f in files:
all_files.append(os.path.abspath(os.path.join(root, f)))

# clean up dir structure
with TemporaryDirectory('_cleancbz') as cleandir:
with TemporaryFile("comic.cbz") as tf:
zf = ZipFile(tf, "w")

for f in all_files:
# Skip non-image files
if pathlib.Path(f).suffix in [".xhtml", ".html", ".css", ".xml", ".sfv"]:
continue
# Remove scanner tags
if os.path.basename(f).__contains__('zz'):
continue
# Remove embedded covers and scanner tags
if os.path.basename(f) in ['cover.jpg', 'cover.jpeg', 'page.jpg', 'zSoU-Nerd.jpg']:
continue
else:
zf.write(f, f'{clean_title(self.calibre_metadata.title)}/{os.path.basename(f)}')

if comments:
zf.comment = "".encode("utf-8")

self.overlay_metadata()
if prefs['cix_embed']:
cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata)
zf.writestr("ComicInfo.xml", cix_string)

zf.close()

# add the cbz format to calibres library
self.db.add_format(self.book_id, "cbz", tf)
self.format = "cbz"

delete_temp_file(tf)
self.file = self.db.format(self.book_id, "cbz", as_path=True)

return True

def update_cover(self):
# get the calibre cover
cover_path = self.db.cover(self.book_id, as_path=True)
Expand Down Expand Up @@ -378,11 +585,14 @@ def update_cover(self):
def count_pages(self):
self.make_temp_cbz_file()
zf = ZipFile(self.file)
namelist = zf.namelist()
zf.close()

pages = 0
for name in zf.namelist():
for name in namelist:
if name.lower().rpartition('.')[-1] in IMG_EXTENSIONS:
pages += 1
zf.close()

return pages

def action_count_pages(self):
Expand Down Expand Up @@ -465,6 +675,7 @@ def remove_embedded_metadata(self):

return True

# Metadata import
def get_comic_metadata_from_cbz(self):
'''
Reads the comic metadata from the comic cbz file as comictagger metadata
Expand Down Expand Up @@ -592,7 +803,6 @@ def swap_author_names_back(author):

def delete_temp_file(ffile):
try:
import os
if os.path.exists(ffile):
os.remove(ffile)
except:
Expand Down Expand Up @@ -634,6 +844,17 @@ def add_dir_to_zipfile(zf, path, prefix=''):
zf.write(f, arcname)


def clean_title(s):
return re.sub(r'[^\w_,\-\.\(\)\s]', '_', strip_accents(s))


def clean_authors(l: list[str]):
return [a.replace("_no_sync", "") for a in l]


def strip_accents(s):
return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')

def safe_delete(zipstream, name):
'''
Delete a file in a zip file in a safe manner. This proceeds by extracting
Expand Down Expand Up @@ -664,4 +885,4 @@ def safe_delete(zipstream, name):
zipstream.seek(0)
zipstream.truncate()
shutil.copyfileobj(temp, zipstream)
zipstream.flush()
zipstream.flush()
Loading