Path:
strictdoc/export/rst/rst_to_html_fragment_writer.py
Lines:
275
Non-empty lines:
243
Non-empty lines covered with requirements:
243 / 243 (100.0%)
Functions:
6
Functions covered by requirements:
6 / 6 (100.0%)
- "14.4. Caching of RST fragments" (REQUIREMENT)
- "2.11. Format-specific markup-to-HTML fragment writers" (REQUIREMENT)
- "8.1.2. Docutils" (REQUIREMENT)
1
"""2
@relation(SDOC-SRS-3, scope=file)3
"""4
5
import hashlib
6
import io
7
import os
8
import re
9
import time
10
import uuid
11
from pathlib import Path
12
from typing import Optional, Tuple
13
14
from docutils.core import publish_parts
15
from docutils.parsers.rst import directives, roles
16
from docutils.utils import SystemMessage
17
from markupsafe import Markup
18
from pygments.lexers import _load_lexers
19
20
from strictdoc.backend.sdoc.models.document import SDocDocument
21
from strictdoc.core.project_config import ProjectConfig, ProjectFeature
22
from strictdoc.export.rst.directives.raw_html_role import raw_html_role
23
from strictdoc.export.rst.directives.sphinx_style_math import (
24
MathDirective,
25
MathDirectiveForServer,
26
eq_role,
27
eq_role_for_server,
28
math_role,
29
math_role_for_server,
30
)31
from strictdoc.export.rst.directives.wildcard_enhanced_image import (
32
STRICTDOC_REFERENCE_PATH_SETTING,
33
WildcardEnhancedImage,
34
)35
from strictdoc.helpers.file_system import file_open_read_bytes
36
37
MAX_RETRIES_FOR_CACHE_FILESYSTEM_LOCKING = 3
38
39
40
class RstToHtmlFragmentWriter:
41
directives.register_directive("image", WildcardEnhancedImage)
42
43
roles.register_local_role("rawhtml", raw_html_role)
44
45
# FIXME: It doesn't feel right to load lexers like this.46
_load_lexers("strictdoc.export.rst.strictdoc_lexer")
47
48
BASE_SETTINGS = {
49
# This is important for code syntax highlighting. The setting of50
# "short" is coupled to the CSS file that we auto-generated using Pygments:51
# strictdoc/export/html/_static/pygments.css52
"syntax_highlight": "short",
53
"syntax_highlight_opts": {
54
"linenos": "inline", # "table"
55
},56
}57
58
def __init__(
59
self,
60
*,
61
project_config: ProjectConfig,
62
context_document: Optional[SDocDocument],
63
):64
self.source_path: str
65
path_to_output_dir_md5: str = hashlib.md5(
66
project_config.output_dir.encode("utf-8")
67
).hexdigest()
68
69
path_to_tmp_dir = project_config.get_path_to_cache_dir()
70
self.path_to_rst_cache_dir = os.path.join(
71
path_to_tmp_dir, "rst", path_to_output_dir_md5
72
)73
self.reference_path = os.getcwd()
74
75
if context_document is not None:
76
assert context_document.meta is not None
77
self.reference_path = (
78
context_document.meta.output_document_dir_full_path
79
)80
81
# This is a delicate move. Based on a user report and our findings,82
# the csv-table RST directive relies on the 'source path' to83
# calculate paths to CSV files.84
# Our case is, however, special: we do not render RST files but85
# rather RST fragments in memory, and because of that we don't have86
# RST files to point to with 'source_path=' below.87
# At the same time, passing the output folder of the document works88
# because this RST-to-HTML writer resolves path to CSV assets89
# that are copied to that output folder by StrictDoc.90
# See CSVTable().get_csv_data() where the source_path is used.91
self.source_path = os.path.join(
92
context_document.meta.output_document_dir_full_path,
93
"STRICTDOC-FRAGMENT.rst",
94
)95
else:
96
self.source_path = "<string>"
97
self.context_document: Optional[SDocDocument] = context_document
98
99
if project_config.is_feature_activated(ProjectFeature.MATHJAX):
100
if project_config.is_running_on_server:
101
roles.register_canonical_role("eq", eq_role_for_server)
102
roles.register_canonical_role("math", math_role_for_server)
103
directives.register_directive("math", MathDirectiveForServer)
104
else:
105
roles.register_canonical_role("eq", eq_role)
106
roles.register_canonical_role("math", math_role)
107
directives.register_directive("math", MathDirective)
108
109
def write(self, rst_fragment: str, use_cache: bool = True) -> Markup:
110
assert isinstance(rst_fragment, str), rst_fragment
111
112
# Do not try to cache very small fragments.113
if len(rst_fragment) < 40:
114
return Markup(self._write_no_cache(rst_fragment))
115
116
path_to_rst_fragment_bucket_dir = os.path.join(
117
self.path_to_rst_cache_dir, str(len(rst_fragment))
118
)119
fragment_md5 = hashlib.md5(rst_fragment.encode("utf-8")).hexdigest()
120
path_to_cached_fragment = os.path.join(
121
path_to_rst_fragment_bucket_dir, fragment_md5
122
)123
if use_cache and os.path.isdir(path_to_rst_fragment_bucket_dir):
124
if os.path.isfile(path_to_cached_fragment):
125
with file_open_read_bytes(
126
path_to_cached_fragment127
) as cached_fragment_file_:
128
return Markup(cached_fragment_file_.read().decode("UTF-8"))
129
else:
130
Path(path_to_rst_fragment_bucket_dir).mkdir(
131
parents=True, exist_ok=True
132
)133
134
rendered_html: str = self._write_no_cache(rst_fragment)
135
rendered_html_bytes = rendered_html.encode("UTF-8")
136
137
if use_cache:
138
# Thread-safe cache update strategy:139
# 1) write bytes to a unique temp file, then140
# 2) atomically replace the target cache file with os.replace().141
# This ensures that concurrent readers either see the old complete142
# file or the new complete file, but never a partially written file.143
# (os.replace is atomic when source and destination are on the same144
# filesystem, which is true because both paths are in one cache dir.)145
tmp_path_to_cached_fragment = (
146
f"{path_to_cached_fragment}.{uuid.uuid4().hex}.tmp"
147
)148
with open(
149
tmp_path_to_cached_fragment, "wb"
150
) as cached_fragment_file_:
151
cached_fragment_file_.write(rendered_html_bytes)
152
# On Windows in particular, we might get interference from Windows Defender153
# for obtaining the file system locks. As a work-around, we try multiple times...154
for attempt in range(MAX_RETRIES_FOR_CACHE_FILESYSTEM_LOCKING):
155
try:
156
os.replace(
157
tmp_path_to_cached_fragment, path_to_cached_fragment
158
)159
break # Success!
160
except PermissionError as e:
161
if attempt < MAX_RETRIES_FOR_CACHE_FILESYSTEM_LOCKING - 1:
162
# Wait 100ms, then 200ms, etc., to let Windows Defender release the lock163
time.sleep(0.1 * (attempt + 1))
164
else:
165
# Surface the original error166
raise e
167
168
return Markup(rendered_html)
169
170
def _write_no_cache(self, rst_fragment: str) -> str:
171
assert isinstance(rst_fragment, str), rst_fragment
172
173
# How do I convert a docutils document tree into an HTML string?174
# https://stackoverflow.com/a/32168938/598057175
# Use a io.StringIO as the warning stream to prevent warnings from176
# being printed to sys.stderr.177
# https://www.programcreek.com/python/example/88126/docutils.core.publish_parts178
warning_stream = io.StringIO()
179
settings = {
180
**self.BASE_SETTINGS,
181
"warning_stream": warning_stream,
182
STRICTDOC_REFERENCE_PATH_SETTING: self.reference_path,
183
}184
185
output = publish_parts(
186
rst_fragment,
187
writer="html",
188
settings_overrides=settings,
189
source_path=self.source_path,
190
)191
192
if warning_stream.tell() > 0:
193
warnings = warning_stream.getvalue().rstrip("\n")
194
# A typical RST warning:195
# """196
# path-to-output-folder/file.rst:4: (WARNING/2) Bullet list ends197
# without a blank line; unexpected unindent.198
# """199
match = re.search(
200
r".*:(?P<line>\d+): \(.*\) (?P<message>.*)", warnings
201
)202
if match is not None:
203
error_message = (
204
f"RST markup syntax error on line {match.group('line')}: "
205
f"{match.group('message')}"
206
)207
else:
208
error_message = f"RST markup syntax error: {warnings}"
209
final_message = (
210
f"problems when converting RST to HTML: {error_message}\n"
211
"RST fragment: >>>\n"
212
f"{rst_fragment}"
213
"<<<"214
)215
raise RuntimeError(final_message)
216
217
html: str = output["html_body"]
218
219
return html
220
221
def write_with_validation(
222
self, rst_fragment: str
223
) -> Tuple[Optional[str], Optional[str]]:
224
# How do I convert a docutils document tree into an HTML string?225
# https://stackoverflow.com/a/32168938/598057226
# Use a io.StringIO as the warning stream to prevent warnings from227
# being printed to sys.stderr.228
# https://www.programcreek.com/python/example/88126/docutils.core.publish_parts229
warning_stream = io.StringIO()
230
settings = {
231
**self.BASE_SETTINGS,
232
"warning_stream": warning_stream,
233
STRICTDOC_REFERENCE_PATH_SETTING: self.reference_path,
234
}235
236
try:
237
output = publish_parts(
238
rst_fragment, writer="html", settings_overrides=settings
239
)240
warnings = (
241
warning_stream.getvalue().rstrip("\n")
242
if warning_stream.tell() > 0
243
else None
244
)245
except SystemMessage as exception:
246
output = None
247
warnings = str(exception)
248
249
if warnings is not None and len(warnings) > 0:
250
# A typical RST warning:251
# """252
# <string>:4: (WARNING/2) Bullet list ends without a blank line;253
# unexpected unindent.254
# """255
match = re.search(
256
r".*<.*>:(?P<line>\d+): \(.*\) (?P<message>.*)", warnings
257
)258
if match is not None:
259
error_message = (
260
f"RST markup syntax error on line {match.group('line')}: "
261
f"{match.group('message')}"
262
)263
else:
264
error_message = f"RST markup syntax error: {warnings}"
265
return None, error_message
266
267
html = output["html_body"]
268
269
return html, None
270
271
@staticmethod272
def write_anchor_link(title: str, href: str) -> str:
273
return f"""\
274
:rawhtml:`<a href="{href}">🔗 {title}</a>`\
275
"""