cook: More compat. Add class args

This commit is contained in:
HackerNCoder 2024-02-23 01:16:45 +01:00
parent 341bc5879b
commit 9850e93d89

34
cook.py
View file

@ -1,11 +1,27 @@
from zipfile import ZipFile from zipfile import ZipFile
from pathlib import Path from pathlib import Path
import os, sys, time, re, shutil, subprocess import os, sys, time, re, shutil, argparse
file_name = sys.argv[1] parser = argparse.ArgumentParser("epub cooking")
parser.add_argument("--cc", metavar="CHAPTERS_CLASS", help="The class used in chapters. (default: hlink)", type=str)
parser.add_argument("--fc", metavar="FOOTNOTES_CLASS", help="The class used in footnotes. (default: hanging1)", type=str)
#parser.add_argument("--test", help="Test a link or footnote against the regex. (NotImplemented)", type=bool)
parser.add_argument("file", help="The file to be worked on.", type=str)
args = parser.parse_args()
file_name = args.file
files_to_work_on = [] files_to_work_on = []
files_to_zip = [] files_to_zip = []
if args.cc:
chapter_class = args.cc
else:
chapter_class = "hlink" #VHS: Change hlink, this is the class found in the chapters
if args.fc:
footnotes_class = args.fc
else:
footnotes_class = "hanging1" # VHS: Change hanging1, this is the class found in the footnotes
def progressbar(it, prefix="", size=60, out=sys.stdout): # Python3.6+ def progressbar(it, prefix="", size=60, out=sys.stdout): # Python3.6+
count = len(it) count = len(it)
start = time.time() start = time.time()
@ -43,28 +59,28 @@ with ZipFile(file_name, 'r') as zip:
files_to_work_on.append(filename) files_to_work_on.append(filename)
Path("work").mkdir(parents=True, exist_ok=True) Path("work").mkdir(parents=True, exist_ok=True)
Path(f"work/{files_to_work_on[0].split('/')[0]}").mkdir(parents=True, exist_ok=True) Path(f"work/{files_to_work_on[2].split('/')[0]}").mkdir(parents=True, exist_ok=True)
print(f"Cooking on {file_name}, with {len(files_to_work_on)} files in it") print(f"Cooking on {file_name}, with {len(files_to_work_on)} files in it")
for file in progressbar(files_to_work_on, "", 40): for file in files_to_work_on: #progressbar(files_to_work_on, "", 40):
with open(file, 'r') as epub_file: with open(file, 'r') as epub_file:
text = epub_file.read() text = epub_file.read()
test = re.findall('id="toc"', text) test = re.findall('id="toc"|epub:type="toc"', text)
if test: if test:
continue continue
matches = re.findall('<a class="hlink"(?:(?:href="(.*?)#(.*?)")*.*?)+>', text) matches = re.findall(f'<a (?:(?:class="{chapter_class}")|(?:href="(.*?)#(.*?)")|(?:.*?))+>', text)
if matches: if matches:
for match in matches: for match in matches:
if match[0] != '': if match[0] != '':
for dd in files_to_work_on: for dd in files_to_work_on:
if re.search(f".*?{match[0]}.*?", dd): if re.search(f".*?{match[0]}.*?", dd):
with open(dd, 'r') as source: with open(dd, 'r') as source:
source_match = re.search(f"<p class=\"hanging1\".*?id=\"{match[1]}\">.*?</p>", source.read()) # VHS: Change hanging1 source_match = re.search(f"<p class=\"{footnotes_class}\".*?id=\"{match[1]}\".*?</p>", source.read())
if source_match: if source_match:
source_match_fixed = re.sub('<p(?:(.*?)?(?:id=".*?")?)><a.*?<\/a>[\.\s ]*(.*?)<\/p>', rf'<p\1 id={match[1]}>\2</p>', source_match.group()) source_match_fixed = re.sub('<p(?:(.*?)?(?:id=".*?")?)><a.*?<\/a>[\.\s ]*(.*?)<\/p>', rf'<p\1 id={match[1]}>\2</p>', source_match.group())
source_match_fixed = re.sub('<p (.*?)</p>', r'<p epub:type="footnote" \1</p>', source_match_fixed) source_match_fixed = re.sub('<p (.*?)</p>', r'<p epub:type="footnote" \1</p>', source_match_fixed)
fixed_text = re.sub(f"<a class=\"hlink\" ((?:id=\"{match[0]}\")*(?:href=\".*?#.*?\")*.*?)*>", f"<a epub:type=\"noteref\" class=\"hlink\" href=\"#{match[1]}\">", text) fixed_text = re.sub(f"<a (?:(?:class=\"{chapter_class}\")|(?:href=\".*?#{match[1]}\")|(?:.*?))+>", f"<a epub:type=\"noteref\" class=\"{chapter_class}\" href=\"#{match[1]}\">", text)
text = re.sub(f"\n\s*</body>", f"\n{source_match_fixed}\n</body>", fixed_text) text = re.sub(f"\n\s*</body>", f"\n{source_match_fixed}\n</body>", fixed_text)
with open(f"work/{file}", 'w') as output: with open(f"work/{file}", 'w') as output:
output.write(text) output.write(text)
@ -77,4 +93,4 @@ with ZipFile("output.epub", 'a') as zip:
for file in files_to_zip: for file in files_to_zip:
zip.write(f"work/{file}", file) zip.write(f"work/{file}", file)
# shutil.rmtree("work") shutil.rmtree("work")