-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript.py
More file actions
98 lines (71 loc) · 2.77 KB
/
script.py
File metadata and controls
98 lines (71 loc) · 2.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import fitz
INPUT_PDF = "input.pdf"
OUTPUT_PDF = "output.pdf"
REPLACEMENTS = [
{"old_text": "11/10/2025", "new_text": "22/10/2025", "max_replace": 1},
{"old_text": "10/10/2025", "new_text": "22/10/2025", "max_replace": 1},
]
BUILTIN_FONTS = {"helv", "cour", "tiro", "times", "symbol", "zapfdingbats"}
FONT_ALIASES = {
"Arial": "helv",
"Calibri": "helv",
"Helvetica": "helv",
"Times-Roman": "times",
"TimesNewRoman": "times",
"Courier": "cour",
}
def sanitize_font_name(font_name: str) -> str:
if not font_name:
return "helv"
clean_name = font_name.split("+")[-1]
for alias, builtin in FONT_ALIASES.items():
if clean_name.lower().startswith(alias.lower()):
return builtin
if clean_name not in BUILTIN_FONTS:
return "helv"
return clean_name
def find_font_and_size(page, target_text: str) -> tuple[str, float]:
blocks = page.get_text("dict")["blocks"]
for block in blocks:
for line in block.get("lines", []):
for span in line.get("spans", []):
if target_text in span["text"]:
font = sanitize_font_name(span["font"])
return font, span["size"]
return "helv", 10 # fallback
def replace_text_in_page(page, old_text: str, new_text: str, max_replace=None):
matches = page.search_for(old_text)
if not matches:
return 0, None, None
if max_replace is not None:
matches = matches[:max_replace]
font_name, font_size = find_font_and_size(page, old_text)
for rect in matches:
page.add_redact_annot(rect, fill=(1, 1, 1))
page.apply_redactions()
for rect in matches:
x, y = rect.x0, rect.y1 - 2
page.insert_text(
(x, y),
new_text,
fontname=font_name,
fontsize=font_size,
color=(0, 0, 0),
)
return len(matches), font_name, font_size
def process_pdf(input_path: str, output_path: str, replacements: list[dict]):
doc = fitz.open(input_path)
for page_num, page in enumerate(doc, start=1):
print(f"🔹 Processing page {page_num}...")
for rep in replacements:
old_text = rep["old_text"]
new_text = rep["new_text"]
max_replace = rep.get("max_replace")
count, font, size = replace_text_in_page(page, old_text, new_text, max_replace)
if count > 0:
print(f" ✅ '{old_text}' → '{new_text}' ({count}x, font={font}, size={size})")
doc.save(output_path)
doc.close()
print(f"\n🎉 Done! Saved edited PDF → {output_path}")
if __name__ == "__main__":
process_pdf(INPUT_PDF, OUTPUT_PDF, REPLACEMENTS)