from zipfile import ZipFile from pathlib import Path import sys, re, shutil, subprocess file_name = sys.argv[1] files_to_work_on = [] files_to_zip = [] files_to_dl = [] with ZipFile(file_name, 'r') as zip: for filename in zip.namelist(): if filename.startswith("text/"): if filename != "text/": zip.extract(filename) files_to_work_on.append(filename) print(files_to_work_on) for file in files_to_work_on: with open(file, 'r') as epub_file: text = epub_file.read() matches = re.findall("", text) if matches: for match in matches: match_dir = "text/" + match[1] with open(match_dir, 'r') as source: source_match = re.search(f"

.*?

", source.read()) source_match_fixed = re.sub(' (.*?)

', r'\2

', source_match.group()) source_match_fixed = re.sub('

', r'

', source_match_fixed) fixed_text = re.sub(f"", f"", text) text = re.sub(f"\n\s*", f"\n{source_match_fixed}\n", fixed_text) if not match_dir in files_to_dl: files_to_dl.append(match_dir) with open(f"work/{file}", 'w') as output: output.write(text) files_to_zip.append(file) shutil.copy(file_name, "output.epub") with ZipFile("output.epub", 'a') as zip: for file in files_to_zip: zip.write(f"work/{file}", file) #cmd=['zip', '-d', "output.epub"] + files_to_dl #subprocess.check_call(cmd)