StrictDoc Documentation
strictdoc/export/rst/rst_to_html_fragment_writer.py
Source file coverage
Path:
strictdoc/export/rst/rst_to_html_fragment_writer.py
Lines:
275
Non-empty lines:
243
Non-empty lines covered with requirements:
243 / 243 (100.0%)
Functions:
6
Functions covered by requirements:
6 / 6 (100.0%)
1
"""
2
@relation(SDOC-SRS-3, scope=file)
3
"""
4
 
5
import hashlib
6
import io
7
import os
8
import re
9
import time
10
import uuid
11
from pathlib import Path
12
from typing import Optional, Tuple
13
 
14
from docutils.core import publish_parts
15
from docutils.parsers.rst import directives, roles
16
from docutils.utils import SystemMessage
17
from markupsafe import Markup
18
from pygments.lexers import _load_lexers
19
 
20
from strictdoc.backend.sdoc.models.document import SDocDocument
21
from strictdoc.core.project_config import ProjectConfig, ProjectFeature
22
from strictdoc.export.rst.directives.raw_html_role import raw_html_role
23
from strictdoc.export.rst.directives.sphinx_style_math import (
24
    MathDirective,
25
    MathDirectiveForServer,
26
    eq_role,
27
    eq_role_for_server,
28
    math_role,
29
    math_role_for_server,
30
)
31
from strictdoc.export.rst.directives.wildcard_enhanced_image import (
32
    STRICTDOC_REFERENCE_PATH_SETTING,
33
    WildcardEnhancedImage,
34
)
35
from strictdoc.helpers.file_system import file_open_read_bytes
36
 
37
MAX_RETRIES_FOR_CACHE_FILESYSTEM_LOCKING = 3
38
 
39
 
40
class RstToHtmlFragmentWriter:
41
    directives.register_directive("image", WildcardEnhancedImage)
42
 
43
    roles.register_local_role("rawhtml", raw_html_role)
44
 
45
    # FIXME: It doesn't feel right to load lexers like this.
46
    _load_lexers("strictdoc.export.rst.strictdoc_lexer")
47
 
48
    BASE_SETTINGS = {
49
        # This is important for code syntax highlighting. The setting of
50
        # "short" is coupled to the CSS file that we auto-generated using Pygments:
51
        # strictdoc/export/html/_static/pygments.css
52
        "syntax_highlight": "short",
53
        "syntax_highlight_opts": {
54
            "linenos": "inline",  # "table"
55
        },
56
    }
57
 
58
    def __init__(
59
        self,
60
        *,
61
        project_config: ProjectConfig,
62
        context_document: Optional[SDocDocument],
63
    ):
64
        self.source_path: str
65
        path_to_output_dir_md5: str = hashlib.md5(
66
            project_config.output_dir.encode("utf-8")
67
        ).hexdigest()
68
 
69
        path_to_tmp_dir = project_config.get_path_to_cache_dir()
70
        self.path_to_rst_cache_dir = os.path.join(
71
            path_to_tmp_dir, "rst", path_to_output_dir_md5
72
        )
73
        self.reference_path = os.getcwd()
74
 
75
        if context_document is not None:
76
            assert context_document.meta is not None
77
            self.reference_path = (
78
                context_document.meta.output_document_dir_full_path
79
            )
80
 
81
            # This is a delicate move. Based on a user report and our findings,
82
            # the csv-table RST directive relies on the 'source path' to
83
            # calculate paths to CSV files.
84
            # Our case is, however, special: we do not render RST files but
85
            # rather RST fragments in memory, and because of that we don't have
86
            # RST files to point to with 'source_path=' below.
87
            # At the same time, passing the output folder of the document works
88
            # because this RST-to-HTML writer resolves path to CSV assets
89
            # that are copied to that output folder by StrictDoc.
90
            # See CSVTable().get_csv_data() where the source_path is used.
91
            self.source_path = os.path.join(
92
                context_document.meta.output_document_dir_full_path,
93
                "STRICTDOC-FRAGMENT.rst",
94
            )
95
        else:
96
            self.source_path = "<string>"
97
        self.context_document: Optional[SDocDocument] = context_document
98
 
99
        if project_config.is_feature_activated(ProjectFeature.MATHJAX):
100
            if project_config.is_running_on_server:
101
                roles.register_canonical_role("eq", eq_role_for_server)
102
                roles.register_canonical_role("math", math_role_for_server)
103
                directives.register_directive("math", MathDirectiveForServer)
104
            else:
105
                roles.register_canonical_role("eq", eq_role)
106
                roles.register_canonical_role("math", math_role)
107
                directives.register_directive("math", MathDirective)
108
 
109
    def write(self, rst_fragment: str, use_cache: bool = True) -> Markup:
110
        assert isinstance(rst_fragment, str), rst_fragment
111
 
112
        # Do not try to cache very small fragments.
113
        if len(rst_fragment) < 40:
114
            return Markup(self._write_no_cache(rst_fragment))
115
 
116
        path_to_rst_fragment_bucket_dir = os.path.join(
117
            self.path_to_rst_cache_dir, str(len(rst_fragment))
118
        )
119
        fragment_md5 = hashlib.md5(rst_fragment.encode("utf-8")).hexdigest()
120
        path_to_cached_fragment = os.path.join(
121
            path_to_rst_fragment_bucket_dir, fragment_md5
122
        )
123
        if use_cache and os.path.isdir(path_to_rst_fragment_bucket_dir):
124
            if os.path.isfile(path_to_cached_fragment):
125
                with file_open_read_bytes(
126
                    path_to_cached_fragment
127
                ) as cached_fragment_file_:
128
                    return Markup(cached_fragment_file_.read().decode("UTF-8"))
129
        else:
130
            Path(path_to_rst_fragment_bucket_dir).mkdir(
131
                parents=True, exist_ok=True
132
            )
133
 
134
        rendered_html: str = self._write_no_cache(rst_fragment)
135
        rendered_html_bytes = rendered_html.encode("UTF-8")
136
 
137
        if use_cache:
138
            # Thread-safe cache update strategy:
139
            # 1) write bytes to a unique temp file, then
140
            # 2) atomically replace the target cache file with os.replace().
141
            # This ensures that concurrent readers either see the old complete
142
            # file or the new complete file, but never a partially written file.
143
            # (os.replace is atomic when source and destination are on the same
144
            # filesystem, which is true because both paths are in one cache dir.)
145
            tmp_path_to_cached_fragment = (
146
                f"{path_to_cached_fragment}.{uuid.uuid4().hex}.tmp"
147
            )
148
            with open(
149
                tmp_path_to_cached_fragment, "wb"
150
            ) as cached_fragment_file_:
151
                cached_fragment_file_.write(rendered_html_bytes)
152
            # On Windows in particular, we might get interference from Windows Defender
153
            # for obtaining the file system locks. As a work-around, we try multiple times...
154
            for attempt in range(MAX_RETRIES_FOR_CACHE_FILESYSTEM_LOCKING):
155
                try:
156
                    os.replace(
157
                        tmp_path_to_cached_fragment, path_to_cached_fragment
158
                    )
159
                    break  # Success!
160
                except PermissionError as e:
161
                    if attempt < MAX_RETRIES_FOR_CACHE_FILESYSTEM_LOCKING - 1:
162
                        # Wait 100ms, then 200ms, etc., to let Windows Defender release the lock
163
                        time.sleep(0.1 * (attempt + 1))
164
                    else:
165
                        # Surface the original error
166
                        raise e
167
 
168
        return Markup(rendered_html)
169
 
170
    def _write_no_cache(self, rst_fragment: str) -> str:
171
        assert isinstance(rst_fragment, str), rst_fragment
172
 
173
        # How do I convert a docutils document tree into an HTML string?
174
        # https://stackoverflow.com/a/32168938/598057
175
        # Use a io.StringIO as the warning stream to prevent warnings from
176
        # being printed to sys.stderr.
177
        # https://www.programcreek.com/python/example/88126/docutils.core.publish_parts
178
        warning_stream = io.StringIO()
179
        settings = {
180
            **self.BASE_SETTINGS,
181
            "warning_stream": warning_stream,
182
            STRICTDOC_REFERENCE_PATH_SETTING: self.reference_path,
183
        }
184
 
185
        output = publish_parts(
186
            rst_fragment,
187
            writer="html",
188
            settings_overrides=settings,
189
            source_path=self.source_path,
190
        )
191
 
192
        if warning_stream.tell() > 0:
193
            warnings = warning_stream.getvalue().rstrip("\n")
194
            # A typical RST warning:
195
            # """
196
            # path-to-output-folder/file.rst:4: (WARNING/2) Bullet list ends
197
            # without a blank line; unexpected unindent.
198
            # """
199
            match = re.search(
200
                r".*:(?P<line>\d+): \(.*\) (?P<message>.*)", warnings
201
            )
202
            if match is not None:
203
                error_message = (
204
                    f"RST markup syntax error on line {match.group('line')}: "
205
                    f"{match.group('message')}"
206
                )
207
            else:
208
                error_message = f"RST markup syntax error: {warnings}"
209
            final_message = (
210
                f"problems when converting RST to HTML: {error_message}\n"
211
                "RST fragment: >>>\n"
212
                f"{rst_fragment}"
213
                "<<<"
214
            )
215
            raise RuntimeError(final_message)
216
 
217
        html: str = output["html_body"]
218
 
219
        return html
220
 
221
    def write_with_validation(
222
        self, rst_fragment: str
223
    ) -> Tuple[Optional[str], Optional[str]]:
224
        # How do I convert a docutils document tree into an HTML string?
225
        # https://stackoverflow.com/a/32168938/598057
226
        # Use a io.StringIO as the warning stream to prevent warnings from
227
        # being printed to sys.stderr.
228
        # https://www.programcreek.com/python/example/88126/docutils.core.publish_parts
229
        warning_stream = io.StringIO()
230
        settings = {
231
            **self.BASE_SETTINGS,
232
            "warning_stream": warning_stream,
233
            STRICTDOC_REFERENCE_PATH_SETTING: self.reference_path,
234
        }
235
 
236
        try:
237
            output = publish_parts(
238
                rst_fragment, writer="html", settings_overrides=settings
239
            )
240
            warnings = (
241
                warning_stream.getvalue().rstrip("\n")
242
                if warning_stream.tell() > 0
243
                else None
244
            )
245
        except SystemMessage as exception:
246
            output = None
247
            warnings = str(exception)
248
 
249
        if warnings is not None and len(warnings) > 0:
250
            # A typical RST warning:
251
            # """
252
            # <string>:4: (WARNING/2) Bullet list ends without a blank line;
253
            # unexpected unindent.
254
            # """
255
            match = re.search(
256
                r".*<.*>:(?P<line>\d+): \(.*\) (?P<message>.*)", warnings
257
            )
258
            if match is not None:
259
                error_message = (
260
                    f"RST markup syntax error on line {match.group('line')}: "
261
                    f"{match.group('message')}"
262
                )
263
            else:
264
                error_message = f"RST markup syntax error: {warnings}"
265
            return None, error_message
266
 
267
        html = output["html_body"]
268
 
269
        return html, None
270
 
271
    @staticmethod
272
    def write_anchor_link(title: str, href: str) -> str:
273
        return f"""\
274
:rawhtml:`<a href="{href}">🔗&nbsp;{title}</a>`\
275
"""