diff --git a/cook.py b/cook.py index cf3cc12..77356e7 100644 --- a/cook.py +++ b/cook.py @@ -1,36 +1,71 @@ from zipfile import ZipFile from pathlib import Path -import sys, re, shutil, subprocess +import os, sys, time, re, shutil, subprocess file_name = sys.argv[1] files_to_work_on = [] files_to_zip = [] -files_to_dl = [] + +def progressbar(it, prefix="", size=60, out=sys.stdout): # Python3.6+ + count = len(it) + start = time.time() + def show(j): + x = int(size*j/count) + + print(f"{prefix}[{u'█'*x}{('.'*(size-x))}] {j}/{count}", end='\r', file=out, flush=True) + + for i, item in enumerate(it): + yield item + show(i+1) + print("\n", flush=True, file=out) + +class NoStdStreams(object): + def __init__(self,stdout = None, stderr = None): + self.devnull = open(os.devnull,'w') + self._stdout = stdout or self.devnull or sys.stdout + self._stderr = stderr or self.devnull or sys.stderr + + def __enter__(self): + self.old_stdout, self.old_stderr = sys.stdout, sys.stderr + self.old_stdout.flush(); self.old_stderr.flush() + sys.stdout, sys.stderr = self._stdout, self._stderr + + def __exit__(self, exc_type, exc_value, traceback): + self._stdout.flush(); self._stderr.flush() + sys.stdout = self.old_stdout + sys.stderr = self.old_stderr + self.devnull.close() with ZipFile(file_name, 'r') as zip: for filename in zip.namelist(): - if filename.startswith("text/"): - if filename != "text/": - zip.extract(filename) - files_to_work_on.append(filename) + if re.search('.x?html?', filename): + zip.extract(filename) + files_to_work_on.append(filename) -print(files_to_work_on) +Path("work").mkdir(parents=True, exist_ok=True) +Path(f"work/{files_to_work_on[0].split('/')[0]}").mkdir(parents=True, exist_ok=True) -for file in files_to_work_on: +print(f"Cooking on {file_name}, with {len(files_to_work_on)} files in it") + +for file in progressbar(files_to_work_on, "", 40): with open(file, 'r') as epub_file: text = epub_file.read() - matches = re.findall("", text) + test = re.findall('id="toc"', text) + if test: + continue + matches = re.findall('', text) if matches: for match in matches: - match_dir = "text/" + match[1] - with open(match_dir, 'r') as source: - source_match = re.search(f"

.*?

", source.read()) - source_match_fixed = re.sub(' (.*?)

', r'\2

', source_match.group()) - source_match_fixed = re.sub('

', r'

', source_match_fixed) - fixed_text = re.sub(f"", f"", text) - text = re.sub(f"\n\s*", f"\n{source_match_fixed}\n", fixed_text) - if not match_dir in files_to_dl: - files_to_dl.append(match_dir) + if match[0] != '': + for dd in files_to_work_on: + if re.search(f".*?{match[0]}.*?", dd): + with open(dd, 'r') as source: + source_match = re.search(f"

.*?

", source.read()) # VHS: Change hanging1 + if source_match: + source_match_fixed = re.sub('[\.\s ]*(.*?)<\/p>', rf'\2

', source_match.group()) + source_match_fixed = re.sub('

', r'

', source_match_fixed) + fixed_text = re.sub(f"", f"", text) + text = re.sub(f"\n\s*", f"\n{source_match_fixed}\n", fixed_text) with open(f"work/{file}", 'w') as output: output.write(text) files_to_zip.append(file) @@ -38,7 +73,8 @@ for file in files_to_work_on: shutil.copy(file_name, "output.epub") with ZipFile("output.epub", 'a') as zip: - for file in files_to_zip: - zip.write(f"work/{file}", file) -#cmd=['zip', '-d', "output.epub"] + files_to_dl -#subprocess.check_call(cmd) + with NoStdStreams(): # ZipFile.write will throw a warning about duplicate files, we don't care, we just want it to overwrite those already in the epub. + for file in files_to_zip: + zip.write(f"work/{file}", file) + +# shutil.rmtree("work")