-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf-outline.py
129 lines (113 loc) · 3.84 KB
/
pdf-outline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from dataclasses import dataclass
from enum import StrEnum, auto
from PyPDF2 import PageObject, PdfReader, PdfWriter
from PyPDF2.generic import PAGE_FIT, Fit
class Fits(StrEnum):
PAGE = auto()
TOP = auto()
TOP2 = auto()
CENTER = auto()
BOTTOM = auto()
BOTTOM2 = auto()
def get_fit(page: PageObject, fit: Fits = Fits.PAGE) -> Fit:
page_height = float(page.mediabox.height)
match fit:
case Fits.TOP:
return Fit.fit_horizontally(top=(page_height / 3 * 2))
case Fits.TOP2:
return Fit.fit_horizontally(top=(page_height / 4 * 3))
case Fits.CENTER:
return Fit.fit_horizontally(top=(page_height / 2))
case Fits.BOTTOM:
return Fit.fit_horizontally(top=(page_height / 3))
case Fits.BOTTOM2:
return Fit.fit_horizontally(top=(page_height / 4))
case _:
return PAGE_FIT
@dataclass
class OutlineItem:
title: str
page: int
fit: Fits = Fits.PAGE
class BaseOutline:
def __init__(self, outline: OutlineItem, children: list[OutlineItem] = []) -> None:
self.outline = outline
self.children = children
def add_pdf_outline(
new_pdf: PdfWriter, outline: list[BaseOutline], first_page: int = -1
):
before = None
for item in outline:
# add prent
page_number = first_page + item.outline.page
page_object = new_pdf.pages[page_number]
parent = new_pdf.add_outline_item(
title=item.outline.title,
page_number=page_number,
before=before,
fit=get_fit(page=page_object, fit=item.outline.fit),
)
# set brefore
if before is None:
before = parent
# add children
for child in item.children:
page_number = first_page + child.page
page_object = new_pdf.pages[page_number]
new_pdf.add_outline_item(
title=child.title,
page_number=page_number,
parent=parent,
fit=get_fit(page=page_object, fit=child.fit),
)
document_outline_tree = [
BaseOutline(
outline=OutlineItem(title="outline 1", page=1),
children=[
OutlineItem(title="child 1 - fit", page=2),
OutlineItem(title="child 2 - top", page=2, fit=Fits.TOP),
OutlineItem(title="child 3 - top", page=2, fit=Fits.CENTER),
OutlineItem(title="child 3 - top", page=2, fit=Fits.CENTER),
],
),
BaseOutline(
outline=OutlineItem(title="outline 2", page=3),
children=[
OutlineItem(title="child 1 - fit", page=4),
OutlineItem(title="child 2 - top", page=4, fit=Fits.TOP),
OutlineItem(title="child 3 - top", page=4, fit=Fits.CENTER),
OutlineItem(title="child 3 - top", page=4, fit=Fits.CENTER),
],
),
]
if __name__ == "__main__":
# read pdf
author = "author name"
document_title = "title of the docuemnt"
pdf_file_path = "pdf_file_path.pdf"
creator = "creator of document"
pdf_file = PdfReader(stream=pdf_file_path)
# create output pdf
new_pdf = PdfWriter()
# import old data
start_page = -1
# import the pdf to new pdf
new_pdf.merge(position=None, fileobj=pdf_file)
# add outlines objects
add_pdf_outline(
new_pdf=new_pdf,
outline=document_outline_tree,
first_page=start_page,
)
# add pdf metadata
metadata = {
# "/CreationDate": "D:20180613174011+05'30'",
# "/ModDate": "D:20190119163727+01'00'",
# "/Producer": "3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)",
"/Author": author,
"/Title": document_title,
"/Creator": creator,
}
new_pdf.add_metadata(infos=metadata)
# save output
new_pdf.write(f"{pdf_file_path.rstrip('.pdf')}_output.pdf")