diff --git a/cook.py b/cook.py new file mode 100644 index 0000000..cf3cc12 --- /dev/null +++ b/cook.py @@ -0,0 +1,44 @@ +from zipfile import ZipFile +from pathlib import Path +import sys, re, shutil, subprocess + +file_name = sys.argv[1] +files_to_work_on = [] +files_to_zip = [] +files_to_dl = [] + +with ZipFile(file_name, 'r') as zip: + for filename in zip.namelist(): + if filename.startswith("text/"): + if filename != "text/": + zip.extract(filename) + files_to_work_on.append(filename) + +print(files_to_work_on) + +for file in files_to_work_on: + with open(file, 'r') as epub_file: + text = epub_file.read() + matches = re.findall("", text) + if matches: + for match in matches: + match_dir = "text/" + match[1] + with open(match_dir, 'r') as source: + source_match = re.search(f"

.*?

", source.read()) + source_match_fixed = re.sub(' (.*?)

', r'\2

', source_match.group()) + source_match_fixed = re.sub('

', r'

', source_match_fixed) + fixed_text = re.sub(f"", f"", text) + text = re.sub(f"\n\s*", f"\n{source_match_fixed}\n", fixed_text) + if not match_dir in files_to_dl: + files_to_dl.append(match_dir) + with open(f"work/{file}", 'w') as output: + output.write(text) + files_to_zip.append(file) + +shutil.copy(file_name, "output.epub") + +with ZipFile("output.epub", 'a') as zip: + for file in files_to_zip: + zip.write(f"work/{file}", file) +#cmd=['zip', '-d', "output.epub"] + files_to_dl +#subprocess.check_call(cmd)