python script for epub endnote>footnote
This commit is contained in:
parent
c20d3feca4
commit
cdeffb3e86
44
cook.py
Normal file
44
cook.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
from zipfile import ZipFile
|
||||||
|
from pathlib import Path
|
||||||
|
import sys, re, shutil, subprocess
|
||||||
|
|
||||||
|
file_name = sys.argv[1]
|
||||||
|
files_to_work_on = []
|
||||||
|
files_to_zip = []
|
||||||
|
files_to_dl = []
|
||||||
|
|
||||||
|
with ZipFile(file_name, 'r') as zip:
|
||||||
|
for filename in zip.namelist():
|
||||||
|
if filename.startswith("text/"):
|
||||||
|
if filename != "text/":
|
||||||
|
zip.extract(filename)
|
||||||
|
files_to_work_on.append(filename)
|
||||||
|
|
||||||
|
print(files_to_work_on)
|
||||||
|
|
||||||
|
for file in files_to_work_on:
|
||||||
|
with open(file, 'r') as epub_file:
|
||||||
|
text = epub_file.read()
|
||||||
|
matches = re.findall("<a class=\"hlink\" id=\"(.*?)\" href=\"(.*?)#(.*?)\">", text)
|
||||||
|
if matches:
|
||||||
|
for match in matches:
|
||||||
|
match_dir = "text/" + match[1]
|
||||||
|
with open(match_dir, 'r') as source:
|
||||||
|
source_match = re.search(f"<p class=\"block\" id=\"{match[2]}\">.*?</p>", source.read())
|
||||||
|
source_match_fixed = re.sub('<p(.*?)><a.*?</a> (.*?)</p>', r'<p\1>\2</p>', source_match.group())
|
||||||
|
source_match_fixed = re.sub('<p (.*?)</p>', r'<p epub:type="footnote" \1</p>', source_match_fixed)
|
||||||
|
fixed_text = re.sub(f"<a class=\"hlink\" id=\"{match[0]}\" href=\".*?#.*?\">", f"<a epub:type=\"noteref\" class=\"hlink\" href=\"#{match[2]}\">", text)
|
||||||
|
text = re.sub(f"\n\s*</body></html>", f"\n{source_match_fixed}\n</body></html>", fixed_text)
|
||||||
|
if not match_dir in files_to_dl:
|
||||||
|
files_to_dl.append(match_dir)
|
||||||
|
with open(f"work/{file}", 'w') as output:
|
||||||
|
output.write(text)
|
||||||
|
files_to_zip.append(file)
|
||||||
|
|
||||||
|
shutil.copy(file_name, "output.epub")
|
||||||
|
|
||||||
|
with ZipFile("output.epub", 'a') as zip:
|
||||||
|
for file in files_to_zip:
|
||||||
|
zip.write(f"work/{file}", file)
|
||||||
|
#cmd=['zip', '-d', "output.epub"] + files_to_dl
|
||||||
|
#subprocess.check_call(cmd)
|
Loading…
Reference in a new issue