cook: Better comp + cli output. Cleanup.
This commit is contained in:
parent
9850e93d89
commit
d5db6efde3
46
cook.py
46
cook.py
|
@ -1,10 +1,8 @@
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import os, sys, time, re, shutil, argparse
|
import os, sys, re, shutil, argparse
|
||||||
|
|
||||||
parser = argparse.ArgumentParser("epub cooking")
|
parser = argparse.ArgumentParser("epub cooking")
|
||||||
parser.add_argument("--cc", metavar="CHAPTERS_CLASS", help="The class used in chapters. (default: hlink)", type=str)
|
|
||||||
parser.add_argument("--fc", metavar="FOOTNOTES_CLASS", help="The class used in footnotes. (default: hanging1)", type=str)
|
|
||||||
#parser.add_argument("--test", help="Test a link or footnote against the regex. (NotImplemented)", type=bool)
|
#parser.add_argument("--test", help="Test a link or footnote against the regex. (NotImplemented)", type=bool)
|
||||||
parser.add_argument("file", help="The file to be worked on.", type=str)
|
parser.add_argument("file", help="The file to be worked on.", type=str)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
@ -12,27 +10,22 @@ args = parser.parse_args()
|
||||||
file_name = args.file
|
file_name = args.file
|
||||||
files_to_work_on = []
|
files_to_work_on = []
|
||||||
files_to_zip = []
|
files_to_zip = []
|
||||||
|
lastItemLength = 1
|
||||||
if args.cc:
|
|
||||||
chapter_class = args.cc
|
|
||||||
else:
|
|
||||||
chapter_class = "hlink" #VHS: Change hlink, this is the class found in the chapters
|
|
||||||
if args.fc:
|
|
||||||
footnotes_class = args.fc
|
|
||||||
else:
|
|
||||||
footnotes_class = "hanging1" # VHS: Change hanging1, this is the class found in the footnotes
|
|
||||||
|
|
||||||
def progressbar(it, prefix="", size=60, out=sys.stdout): # Python3.6+
|
def progressbar(it, prefix="", size=60, out=sys.stdout): # Python3.6+
|
||||||
count = len(it)
|
count = len(it)
|
||||||
start = time.time()
|
def show(j, item):
|
||||||
def show(j):
|
global lastItemLength
|
||||||
x = int(size*j/count)
|
x = int(size*j/count)
|
||||||
|
spacesToPrint = lastItemLength - len(item)
|
||||||
|
|
||||||
print(f"{prefix}[{u'█'*x}{('.'*(size-x))}] {j}/{count}", end='\r', file=out, flush=True)
|
|
||||||
|
|
||||||
|
print(f"{prefix}[{u'█'*x}{('.'*(size-x))}] {j}/{count} (Currently working on: {item}){' '*spacesToPrint}", end='\r', file=out, flush=True)
|
||||||
|
lastItemLength = len(item)
|
||||||
for i, item in enumerate(it):
|
for i, item in enumerate(it):
|
||||||
yield item
|
yield item
|
||||||
show(i+1)
|
show(i+1, item)
|
||||||
|
print(f"{prefix}[{u'█'*size}] {count}/{count} {' '*lastItemLength}", end='\r', file=out, flush=True)
|
||||||
print("\n", flush=True, file=out)
|
print("\n", flush=True, file=out)
|
||||||
|
|
||||||
class NoStdStreams(object):
|
class NoStdStreams(object):
|
||||||
|
@ -54,7 +47,7 @@ class NoStdStreams(object):
|
||||||
|
|
||||||
with ZipFile(file_name, 'r') as zip:
|
with ZipFile(file_name, 'r') as zip:
|
||||||
for filename in zip.namelist():
|
for filename in zip.namelist():
|
||||||
if re.search('.x?html?', filename):
|
if re.search('.*?(?!titlepage).*?\.x?html?$', filename):
|
||||||
zip.extract(filename)
|
zip.extract(filename)
|
||||||
files_to_work_on.append(filename)
|
files_to_work_on.append(filename)
|
||||||
|
|
||||||
|
@ -63,24 +56,28 @@ Path(f"work/{files_to_work_on[2].split('/')[0]}").mkdir(parents=True, exist_ok=T
|
||||||
|
|
||||||
print(f"Cooking on {file_name}, with {len(files_to_work_on)} files in it")
|
print(f"Cooking on {file_name}, with {len(files_to_work_on)} files in it")
|
||||||
|
|
||||||
for file in files_to_work_on: #progressbar(files_to_work_on, "", 40):
|
for file in progressbar(files_to_work_on, "", 40):
|
||||||
with open(file, 'r') as epub_file:
|
with open(file, 'r') as epub_file:
|
||||||
text = epub_file.read()
|
text = epub_file.read()
|
||||||
test = re.findall('id="toc"|epub:type="toc"', text)
|
test = re.findall('id="toc"|epub:type=".*?toc.*?"|epub:type=".*?frontmatter.*?"|epub:type=".*?backmatter.*?"|epub:type=".*?appendix.*?"', text)
|
||||||
if test:
|
if test:
|
||||||
|
print(f"Skipping {file}, it seems to not be a chapter{' '*42}")
|
||||||
continue
|
continue
|
||||||
matches = re.findall(f'<a (?:(?:class="{chapter_class}")|(?:href="(.*?)#(.*?)")|(?:.*?))+>', text)
|
matches = re.findall(f'<a[\w\d\s="-]*?href="(.*?)#(.*?)"[\w\d\s="-]*?>', text)
|
||||||
if matches:
|
if matches:
|
||||||
for match in matches:
|
for match in matches:
|
||||||
if match[0] != '':
|
if match[0] != '' and not re.search("toc\.x?html?", match[0]):
|
||||||
for dd in files_to_work_on:
|
for dd in files_to_work_on:
|
||||||
if re.search(f".*?{match[0]}.*?", dd):
|
if re.search(f".*?{match[0]}.*?", dd):
|
||||||
with open(dd, 'r') as source:
|
with open(dd, 'r') as source:
|
||||||
source_match = re.search(f"<p class=\"{footnotes_class}\".*?id=\"{match[1]}\".*?</p>", source.read())
|
test = re.findall('id="toc"|epub:type=".*?toc.*?"|epub:type=".*?frontmatter.*?"|epub:type=".*?backmatter.*?"|epub:type=".*?appendix.*?"', text)
|
||||||
|
if test:
|
||||||
|
continue
|
||||||
|
source_match = re.search(f"<p.*?id=\"{match[1]}\".*?</p>", source.read())
|
||||||
if source_match:
|
if source_match:
|
||||||
source_match_fixed = re.sub('<p(?:(.*?)?(?:id=".*?")?)><a.*?<\/a>[\.\s ]*(.*?)<\/p>', rf'<p\1 id={match[1]}>\2</p>', source_match.group())
|
source_match_fixed = re.sub('<p(?:(.*?)?(?:id=".*?")?)><a.*?<\/a>[\.\s ]*(.*?)<\/p>', rf'<p\1 id="{match[1]}">\2</p>', source_match.group())
|
||||||
source_match_fixed = re.sub('<p (.*?)</p>', r'<p epub:type="footnote" \1</p>', source_match_fixed)
|
source_match_fixed = re.sub('<p (.*?)</p>', r'<p epub:type="footnote" \1</p>', source_match_fixed)
|
||||||
fixed_text = re.sub(f"<a (?:(?:class=\"{chapter_class}\")|(?:href=\".*?#{match[1]}\")|(?:.*?))+>", f"<a epub:type=\"noteref\" class=\"{chapter_class}\" href=\"#{match[1]}\">", text)
|
fixed_text = re.sub(f'<a (?:(?:href=".*?#.*?")|(class=".*?")|(?:.*?))+>', fr'<a epub:type="noteref" \1 href="#{match[1]}">', text)
|
||||||
text = re.sub(f"\n\s*</body>", f"\n{source_match_fixed}\n</body>", fixed_text)
|
text = re.sub(f"\n\s*</body>", f"\n{source_match_fixed}\n</body>", fixed_text)
|
||||||
with open(f"work/{file}", 'w') as output:
|
with open(f"work/{file}", 'w') as output:
|
||||||
output.write(text)
|
output.write(text)
|
||||||
|
@ -94,3 +91,4 @@ with ZipFile("output.epub", 'a') as zip:
|
||||||
zip.write(f"work/{file}", file)
|
zip.write(f"work/{file}", file)
|
||||||
|
|
||||||
shutil.rmtree("work")
|
shutil.rmtree("work")
|
||||||
|
shutil.rmtree(f"{files_to_work_on[2].split('/')[0]}")
|
||||||
|
|
Loading…
Reference in a new issue