Path:
strictdoc/features/html2pdf/pdf_postprocessor.py
Lines:
159
Non-empty lines:
138
Non-empty lines covered with requirements:
138 / 138 (100.0%)
Functions:
6
Functions covered by requirements:
6 / 6 (100.0%)
1
"""2
@relation(SDOC-SRS-51, scope=file)3
"""4
5
import ntpath
6
import os
7
from pathlib import Path
8
from tempfile import NamedTemporaryFile
9
from typing import Dict, List, Optional, Tuple
10
from urllib.parse import unquote, urlsplit
11
12
from pypdf import PdfWriter
13
from pypdf.generic import (
14
DictionaryObject,
15
NameObject,
16
PdfObject,
17
TextStringObject,
18
)19
20
21
class PDFPostprocessor:
22
@classmethod23
def rewrite_cross_document_links(
24
cls,
25
*,
26
path_to_input_root: str, # noqa: ARG003
27
paths_to_print: List[Tuple[str, str]],
28
) -> None:
29
html_to_pdf_map: Dict[str, str] = {
30
urlsplit(
31
Path(path_to_html).resolve().as_uri()
32
).path: os.path.abspath(path_to_pdf)
33
for path_to_html, path_to_pdf in paths_to_print
34
}35
for _, path_to_pdf in paths_to_print:
36
cls._rewrite_cross_document_links_in_single_document(
37
html_to_pdf_map=html_to_pdf_map,
38
path_to_pdf=path_to_pdf,
39
)40
41
@classmethod42
def _rewrite_cross_document_links_in_single_document(
43
cls,
44
*,
45
html_to_pdf_map: Dict[str, str],
46
path_to_pdf: str,
47
) -> None:
48
path_to_pdf = os.path.abspath(path_to_pdf)
49
path_to_pdf_dir = os.path.dirname(path_to_pdf)
50
51
writer = PdfWriter(clone_from=path_to_pdf)
52
modified = False
53
54
for page in writer.pages:
55
annotations = page.get("/Annots")
56
if annotations is None:
57
continue58
59
for annotation_reference in annotations:
60
annotation = annotation_reference.get_object()
61
if not isinstance(annotation, DictionaryObject):
62
continue63
if annotation.get("/Subtype") != "/Link":
64
continue65
66
action = annotation.get("/A")
67
if not isinstance(action, DictionaryObject):
68
continue69
if action.get("/S") != "/URI":
70
continue71
72
uri = action.get("/URI")
73
if not isinstance(uri, str):
74
continue75
76
rewritten_action = cls._create_pdf_gotor_action(
77
uri=uri,
78
html_to_pdf_map=html_to_pdf_map,
79
path_to_pdf_dir=path_to_pdf_dir,
80
)81
if rewritten_action is None:
82
continue83
84
annotation[NameObject("/A")] = rewritten_action
85
modified = True
86
87
if not modified:
88
return89
90
with NamedTemporaryFile(
91
mode="wb",
92
suffix=".pdf",
93
dir=path_to_pdf_dir,
94
delete=False,
95
) as temp_file:
96
writer.write(temp_file)
97
temp_file_path = temp_file.name
98
os.replace(temp_file_path, path_to_pdf)
99
100
@staticmethod101
def _create_pdf_gotor_action(
102
*,
103
uri: str,
104
html_to_pdf_map: Dict[str, str],
105
path_to_pdf_dir: str,
106
) -> Optional[DictionaryObject]:
107
# ruff: noqa: ERA001108
# urlsplit() produces an object of the following kind:109
# SplitResult(110
# scheme='file',111
# netloc='',112
# path='<path-to-project>/output/html2pdf/html/<project-mount-folder>/<path-to-doc>-PDF.html',113
# query='',114
# fragment='ANCHOR'115
# )116
parsed_uri = urlsplit(uri)
117
if parsed_uri.scheme != "file":
118
return None
119
120
matching_pdf_abspath = html_to_pdf_map.get(parsed_uri.path)
121
if matching_pdf_abspath is None:
122
return None
123
124
matching_pdf_relpath = PDFPostprocessor._create_relative_pdf_path(
125
path_to_pdf=matching_pdf_abspath,
126
start_dir=path_to_pdf_dir,
127
)128
129
action = DictionaryObject()
130
action[NameObject("/Type")] = NameObject("/Action")
131
action[NameObject("/S")] = NameObject("/GoToR")
132
action[NameObject("/F")] = TextStringObject(matching_pdf_relpath)
133
134
destination_name = unquote(parsed_uri.fragment)
135
if destination_name is not None and len(destination_name) > 0:
136
action[NameObject("/D")] = (
137
PDFPostprocessor._create_destination_object(destination_name)
138
)139
return action
140
141
@staticmethod142
def _create_destination_object(destination_name: str) -> PdfObject:
143
assert len(destination_name) > 0
144
if destination_name.startswith("/"):
145
return NameObject(destination_name)
146
return NameObject(f"/{destination_name}")
147
148
@staticmethod149
def _create_relative_pdf_path(*, path_to_pdf: str, start_dir: str) -> str:
150
path_module = (
151
ntpath152
if ntpath.splitdrive(path_to_pdf)[0]
153
or ntpath.splitdrive(start_dir)[0]
154
else os.path
155
)156
relative_path = path_module.relpath(path_to_pdf, start=start_dir)
157
# assert is needed to satisfy the type checker.158
assert isinstance(relative_path, str), relative_path
159
return relative_path.replace("\\", "/")