stuff/cook.py

from zipfile import ZipFile
from pathlib import Path
import sys, re, shutil, subprocess

file_name = sys.argv[1]
files_to_work_on = []
files_to_zip = []
files_to_dl = []

with ZipFile(file_name, 'r') as zip:
    for filename in zip.namelist():
        if filename.startswith("text/"):
            if filename != "text/":
                zip.extract(filename)
                files_to_work_on.append(filename)

print(files_to_work_on)

for file in files_to_work_on:
    with open(file, 'r') as epub_file:
        text = epub_file.read()
        matches = re.findall("<a class=\"hlink\" id=\"(.*?)\" href=\"(.*?)#(.*?)\">", text)
        if matches:
            for match in matches:
                match_dir = "text/" + match[1]
                with open(match_dir, 'r') as source:
                    source_match = re.search(f"<p class=\"block\" id=\"{match[2]}\">.*?</p>", source.read())
                    source_match_fixed = re.sub('<p(.*?)><a.*?</a> (.*?)</p>', r'<p\1>\2</p>', source_match.group())
                    source_match_fixed = re.sub('<p (.*?)</p>', r'<p epub:type="footnote" \1</p>', source_match_fixed)
                    fixed_text = re.sub(f"<a class=\"hlink\" id=\"{match[0]}\" href=\".*?#.*?\">", f"<a epub:type=\"noteref\" class=\"hlink\" href=\"#{match[2]}\">", text)
                    text = re.sub(f"\n\s*</body></html>", f"\n{source_match_fixed}\n</body></html>", fixed_text)
                if not match_dir in files_to_dl:
                    files_to_dl.append(match_dir)
            with open(f"work/{file}", 'w') as output:
                output.write(text)
            files_to_zip.append(file)

shutil.copy(file_name, "output.epub")

with ZipFile("output.epub", 'a') as zip:
    for file in files_to_zip:
        zip.write(f"work/{file}", file)
#cmd=['zip', '-d', "output.epub"] + files_to_dl
#subprocess.check_call(cmd)
python script for epub endnote>footnote 2024-02-21 03:22:19 +00:00			`from zipfile import ZipFile`
			`from pathlib import Path`
			`import sys, re, shutil, subprocess`

			`file_name = sys.argv[1]`
			`files_to_work_on = []`
			`files_to_zip = []`
			`files_to_dl = []`

			`with ZipFile(file_name, 'r') as zip:`
			`for filename in zip.namelist():`
			`if filename.startswith("text/"):`
			`if filename != "text/":`
			`zip.extract(filename)`
			`files_to_work_on.append(filename)`

			`print(files_to_work_on)`

			`for file in files_to_work_on:`
			`with open(file, 'r') as epub_file:`
			`text = epub_file.read()`
			`matches = re.findall("<a class=\"hlink\" id=\"(.?)\" href=\"(.?)#(.*?)\">", text)`
			`if matches:`
			`for match in matches:`
			`match_dir = "text/" + match[1]`
			`with open(match_dir, 'r') as source:`
			`source_match = re.search(f"<p class=\"block\" id=\"{match[2]}\">.*?</p>", source.read())`
			`source_match_fixed = re.sub('<p(.?)><a.?</a> (.*?)</p>', r'<p\1>\2</p>', source_match.group())`
			`source_match_fixed = re.sub('<p (.*?)</p>', r'<p epub:type="footnote" \1</p>', source_match_fixed)`
			`fixed_text = re.sub(f"<a class=\"hlink\" id=\"{match[0]}\" href=\".?#.?\">", f"<a epub:type=\"noteref\" class=\"hlink\" href=\"#{match[2]}\">", text)`
			`text = re.sub(f"\n\s*</body></html>", f"\n{source_match_fixed}\n</body></html>", fixed_text)`
			`if not match_dir in files_to_dl:`
			`files_to_dl.append(match_dir)`
			`with open(f"work/{file}", 'w') as output:`
			`output.write(text)`
			`files_to_zip.append(file)`

			`shutil.copy(file_name, "output.epub")`

			`with ZipFile("output.epub", 'a') as zip:`
			`for file in files_to_zip:`
			`zip.write(f"work/{file}", file)`
			`#cmd=['zip', '-d', "output.epub"] + files_to_dl`
			`#subprocess.check_call(cmd)`