diff --git a/cook.py b/cook.py index 67db092..33eddea 100644 --- a/cook.py +++ b/cook.py @@ -1,10 +1,8 @@ from zipfile import ZipFile from pathlib import Path -import os, sys, time, re, shutil, argparse +import os, sys, re, shutil, argparse parser = argparse.ArgumentParser("epub cooking") -parser.add_argument("--cc", metavar="CHAPTERS_CLASS", help="The class used in chapters. (default: hlink)", type=str) -parser.add_argument("--fc", metavar="FOOTNOTES_CLASS", help="The class used in footnotes. (default: hanging1)", type=str) #parser.add_argument("--test", help="Test a link or footnote against the regex. (NotImplemented)", type=bool) parser.add_argument("file", help="The file to be worked on.", type=str) args = parser.parse_args() @@ -12,27 +10,22 @@ args = parser.parse_args() file_name = args.file files_to_work_on = [] files_to_zip = [] - -if args.cc: - chapter_class = args.cc -else: - chapter_class = "hlink" #VHS: Change hlink, this is the class found in the chapters -if args.fc: - footnotes_class = args.fc -else: - footnotes_class = "hanging1" # VHS: Change hanging1, this is the class found in the footnotes +lastItemLength = 1 def progressbar(it, prefix="", size=60, out=sys.stdout): # Python3.6+ count = len(it) - start = time.time() - def show(j): + def show(j, item): + global lastItemLength x = int(size*j/count) + spacesToPrint = lastItemLength - len(item) - print(f"{prefix}[{u'█'*x}{('.'*(size-x))}] {j}/{count}", end='\r', file=out, flush=True) + print(f"{prefix}[{u'█'*x}{('.'*(size-x))}] {j}/{count} (Currently working on: {item}){' '*spacesToPrint}", end='\r', file=out, flush=True) + lastItemLength = len(item) for i, item in enumerate(it): yield item - show(i+1) + show(i+1, item) + print(f"{prefix}[{u'█'*size}] {count}/{count} {' '*lastItemLength}", end='\r', file=out, flush=True) print("\n", flush=True, file=out) class NoStdStreams(object): @@ -54,7 +47,7 @@ class NoStdStreams(object): with ZipFile(file_name, 'r') as zip: for filename in zip.namelist(): - if re.search('.x?html?', filename): + if re.search('.*?(?!titlepage).*?\.x?html?$', filename): zip.extract(filename) files_to_work_on.append(filename) @@ -63,24 +56,28 @@ Path(f"work/{files_to_work_on[2].split('/')[0]}").mkdir(parents=True, exist_ok=T print(f"Cooking on {file_name}, with {len(files_to_work_on)} files in it") -for file in files_to_work_on: #progressbar(files_to_work_on, "", 40): +for file in progressbar(files_to_work_on, "", 40): with open(file, 'r') as epub_file: text = epub_file.read() - test = re.findall('id="toc"|epub:type="toc"', text) + test = re.findall('id="toc"|epub:type=".*?toc.*?"|epub:type=".*?frontmatter.*?"|epub:type=".*?backmatter.*?"|epub:type=".*?appendix.*?"', text) if test: + print(f"Skipping {file}, it seems to not be a chapter{' '*42}") continue - matches = re.findall(f'', text) + matches = re.findall(f'', text) if matches: for match in matches: - if match[0] != '': + if match[0] != '' and not re.search("toc\.x?html?", match[0]): for dd in files_to_work_on: if re.search(f".*?{match[0]}.*?", dd): with open(dd, 'r') as source: - source_match = re.search(f"

", source.read()) + test = re.findall('id="toc"|epub:type=".*?toc.*?"|epub:type=".*?frontmatter.*?"|epub:type=".*?backmatter.*?"|epub:type=".*?appendix.*?"', text) + if test: + continue + source_match = re.search(f"", source.read()) if source_match: - source_match_fixed = re.sub('[\.\s ]*(.*?)<\/p>', rf'\2

', source_match.group()) + source_match_fixed = re.sub('[\.\s ]*(.*?)<\/p>', rf'\2

', source_match.group()) source_match_fixed = re.sub('

', r'

', source_match_fixed) - fixed_text = re.sub(f"", f"", text) + fixed_text = re.sub(f'', fr'', text) text = re.sub(f"\n\s*", f"\n{source_match_fixed}\n", fixed_text) with open(f"work/{file}", 'w') as output: output.write(text) @@ -94,3 +91,4 @@ with ZipFile("output.epub", 'a') as zip: zip.write(f"work/{file}", file) shutil.rmtree("work") +shutil.rmtree(f"{files_to_work_on[2].split('/')[0]}")