StrictDoc Documentation
strictdoc/backend/sdoc_source_code/marker_parser.py
Source file coverage
Path:
strictdoc/backend/sdoc_source_code/marker_parser.py
Lines:
324
Non-empty lines:
284
Non-empty lines covered with requirements:
284 / 284 (100.0%)
Functions:
4
Functions covered by requirements:
4 / 4 (100.0%)
1
"""
2
@relation(SDOC-SRS-34, SDOC-SRS-141, scope=file)
3
"""
4
 
5
from typing import Dict, List, Optional, Tuple, Union
6
 
7
from lark import ParseTree, Token, Tree
8
 
9
from strictdoc.backend.sdoc.error_handling import StrictDocSemanticError
10
from strictdoc.backend.sdoc_source_code.comment_parser.marker_lexer import (
11
    MarkerLexer,
12
)
13
from strictdoc.backend.sdoc_source_code.helpers.comment_preprocessor import (
14
    preprocess_source_code_comment,
15
)
16
from strictdoc.backend.sdoc_source_code.models.language_item_marker import (
17
    LanguageItemMarker,
18
)
19
from strictdoc.backend.sdoc_source_code.models.line_marker import LineMarker
20
from strictdoc.backend.sdoc_source_code.models.range_marker import (
21
    RangeMarker,
22
)
23
from strictdoc.backend.sdoc_source_code.models.requirement_marker import Req
24
from strictdoc.backend.sdoc_source_code.models.source_location import ByteRange
25
from strictdoc.backend.sdoc_source_code.models.source_node import SourceNode
26
 
27
 
28
class MarkerParser:
29
    @staticmethod
30
    def parse(
31
        *,
32
        input_string: str,
33
        line_start: int,
34
        line_end: int,
35
        comment_line_start: int,
36
        comment_byte_range: Optional[ByteRange],
37
        filename: Optional[str] = None,
38
        entity_name: Optional[str] = None,
39
        col_offset: int = 0,
40
        custom_tags: Optional[set[str]] = None,
41
        default_scope: Optional[str] = None,
42
    ) -> SourceNode:
43
        """
44
        Parse source nodes and relation markers from source file comments.
45
 
46
        The input_string is parsed for @relation markers. If custom_tags are given,
47
        input_string is additionally parsed for source nodes and SourceNode.fields_locations
48
        offsets are calculated relative to input_string. This implies that input_string
49
        lines must not be pre-stripped by the caller, otherwise offsets would mismatch with
50
        actual file content and source node write-back would corrupt source files.
51
        Comment symbols like /** ... */ or /// Doxygen comments or Python comments
52
        are instead replaced internally with spaces (preserving string length), so that
53
        all byte offsets remain valid for both parsing and file write-back.
54
 
55
        The 1-based line start/end provide hints to the parser for the case markers
56
        of scope file, class or function are found, in which case the user values are
57
        set as highlight range. If the parser finds line or range markers, the user
58
        provided line start/end values are ignored. Should be set to the item definition
59
        block, *with* leading comment lines if any.
60
 
61
        The 1-based comment_line_start parameter is the first actual comment line.
62
        It is required as a base offset for some parser tokens to determine their
63
        absolute position in file, as lexing gives only a position relative
64
        to comment start.
65
 
66
        comment_byte_range, if given, enables write-back of modified source nodes.
67
        Modification happens when a user edits the source node in the web server, or
68
        when StrictDoc auto-assigns MID or HASH. Values are 0-based byte-offsets
69
        specifying the exact input_string start-to-end position inside the source file.
70
 
71
        custom_tags is a set of valid tags if a comment is expected to contain
72
        key-value pairs for source node generation. The caller is responsible to determine
73
        valid custom tags from the grammar element associated with the source code file.
74
 
75
        filename should be given if input_string comes from a static source file.
76
        It will be used to create more helpful parsing error messages.
77
 
78
        entity_name is required for language item markers. It's the user-visible
79
        description of the marked range in the rendered document. Should be equal
80
        to the related LanguageItem.description for consistency with forward markers.
81
 
82
        default_scope should be provided if the caller's language-aware parser
83
        can infer the scope from the semantic comment position. Think of Rust doc
84
        comments for example. If given, users are allowed to omit the scope argument
85
        in a relation marker. A user provided scope argument always takes preference.
86
        If neither default nor a user provided value is available,
87
        StrictDocSemanticError will be raised.
88
 
89
        The function returns a SourceNode. Note: This is also the case if no custom tags were
90
        found at all (in which case fields is empty) because SourceNode also acts as a container
91
        for markers.
92
        """
93
 
94
        node_fields: Dict[str, str] = {}
95
 
96
        source_node: SourceNode = SourceNode(
97
            entity_name=entity_name,
98
            comment_byte_range=comment_byte_range,
99
        )
100
        input_string = preprocess_source_code_comment(input_string)
101
 
102
        tree: ParseTree = MarkerLexer.parse(
103
            input_string, custom_tags=custom_tags
104
        )
105
 
106
        for element_ in tree.children:
107
            if not isinstance(element_, Tree):
108
                continue
109
 
110
            if element_.data == "relation_marker":
111
                relation_markers = MarkerParser._parse_relation_marker(
112
                    element_,
113
                    line_start,
114
                    line_end,
115
                    comment_line_start,
116
                    filename,
117
                    entity_name,
118
                    col_offset,
119
                    default_scope,
120
                )
121
                source_node.markers.extend(relation_markers)
122
 
123
            elif element_.data == "node_field":
124
                node_name, node_value = MarkerParser._parse_node_field(
125
                    element_,
126
                )
127
                node_fields[node_name] = node_value
128
 
129
                source_node.fields_locations[node_name] = (
130
                    element_.meta.start_pos,
131
                    element_.meta.end_pos - 1,
132
                )
133
            else:
134
                raise AssertionError
135
 
136
        if len(node_fields) > 0:
137
            source_node.fields = node_fields
138
 
139
        return source_node
140
 
141
    @staticmethod
142
    def _parse_relation_marker(
143
        element_: Tree[Token],
144
        line_start: int,
145
        line_end: int,
146
        comment_line_start: int,
147
        filename: Optional[str] = None,
148
        entity_name: Optional[str] = None,
149
        col_offset: int = 0,
150
        default_scope: Optional[str] = None,
151
    ) -> List[Union[LanguageItemMarker, RangeMarker, LineMarker]]:
152
        markers: List[Union[LanguageItemMarker, RangeMarker, LineMarker]] = []
153
 
154
        relation_uid_elements = []
155
        relation_scope_element: Optional[Tree[Token]] = None
156
        relation_role_element: Optional[Tree[Token]] = None
157
        for relation_marker_element_ in element_.children:
158
            assert isinstance(relation_marker_element_, Tree)
159
            if relation_marker_element_.data == "relation_node_uid":
160
                relation_uid_elements.append(relation_marker_element_)
161
            elif relation_marker_element_.data == "relation_scope":
162
                relation_scope_element = relation_marker_element_
163
            elif relation_marker_element_.data == "relation_role":
164
                relation_role_element = relation_marker_element_
165
            else:
166
                raise NotImplementedError
167
 
168
        assert len(relation_uid_elements) > 0
169
 
170
        if relation_scope_element is not None:
171
            assert isinstance(relation_scope_element.children[0], Token)
172
            relation_scope = relation_scope_element.children[0].value
173
        else:
174
            relation_scope = default_scope
175
 
176
        relation_role = None
177
        if relation_role_element is not None:
178
            assert isinstance(relation_role_element.children[0], Token)
179
            relation_role = relation_role_element.children[0].value
180
 
181
        requirements = []
182
        used_uids = set()
183
 
184
        for relation_uid_token_ in relation_uid_elements:
185
            assert isinstance(relation_uid_token_.children[0], Token)
186
            assert relation_uid_token_.children[0].line is not None
187
 
188
            relation_uid = relation_uid_token_.children[0].value
189
            if relation_uid in used_uids:
190
                raise ValueError(
191
                    f"@relation marker contains duplicate node UIDs: ['{relation_uid}']. "
192
                    f"Location: {filename}:{relation_uid_token_.children[0].line}."
193
                )
194
            used_uids.add(relation_uid)
195
 
196
            requirement = Req(None, relation_uid)
197
            requirement.ng_source_line = (
198
                comment_line_start + relation_uid_token_.children[0].line - 1
199
            )
200
            requirement.ng_source_column = relation_uid_token_.children[
201
                0
202
            ].column
203
            requirements.append(requirement)
204
 
205
        if relation_scope in ("file", "class", "function"):
206
            language_item_marker = LanguageItemMarker(
207
                None, requirements, scope=relation_scope, role=relation_role
208
            )
209
            language_item_marker.ng_source_line_begin = (
210
                comment_line_start + element_.meta.line - 1
211
            )
212
            language_item_marker.ng_source_column_begin = (
213
                element_.meta.column + col_offset
214
            )
215
            language_item_marker.ng_range_line_begin = line_start
216
            language_item_marker.ng_range_line_end = line_end
217
            if relation_scope == "file":
218
                language_item_marker.set_description("entire file")
219
            elif relation_scope == "function":
220
                language_item_marker.set_description(
221
                    f"function {entity_name}()"
222
                )
223
            elif relation_scope == "class":
224
                language_item_marker.set_description(f"class {entity_name}")
225
            markers.append(language_item_marker)
226
        elif relation_scope in ("range_start", "range_end"):
227
            range_marker = RangeMarker(
228
                None,
229
                requirements,
230
                scope=relation_scope,
231
                role=relation_role,
232
            )
233
            range_marker.ng_source_line_begin = (
234
                comment_line_start + element_.meta.line - 1
235
            )
236
            range_marker.ng_source_column_begin = (
237
                element_.meta.column + col_offset
238
            )
239
            range_marker.ng_range_line_begin = (
240
                comment_line_start + element_.meta.line - 1
241
            )
242
            range_marker.ng_range_line_end = (
243
                comment_line_start + element_.meta.end_line - 1
244
            )
245
            markers.append(range_marker)
246
        elif relation_scope == "line":
247
            line_marker = LineMarker(None, requirements, role=relation_role)
248
            line_marker.ng_source_line_begin = (
249
                comment_line_start + element_.meta.line - 1
250
            )
251
            line_marker.ng_source_column_begin = (
252
                element_.meta.column + col_offset
253
            )
254
            line_marker.ng_range_line_begin = (
255
                comment_line_start + element_.meta.line - 1
256
            )
257
            line_marker.ng_range_line_end = (
258
                comment_line_start + element_.meta.end_line
259
            )
260
            markers.append(line_marker)
261
        elif relation_scope is None:
262
            reqs = ",".join(sorted(used_uids))
263
            raise StrictDocSemanticError(
264
                f"@relation marker for requirements {reqs} misses scope argument.",
265
                hint="Scope can only be omitted if supported by language, as e.g. with Rust doc comments.",
266
                example=(
267
                    "Add a scope argument. Example:\n"
268
                    f"@relation({reqs}, scope=function)"
269
                ),
270
                line=comment_line_start + element_.meta.line - 1,
271
                filename=filename,
272
            )
273
        else:
274
            raise NotImplementedError
275
 
276
        return markers
277
 
278
    @staticmethod
279
    def _parse_node_field(
280
        element_: Tree[Token],
281
    ) -> Tuple[str, str]:
282
        node_name_node = element_.children[0]
283
        assert isinstance(node_name_node, Tree)
284
        assert node_name_node.data == "node_name"
285
        assert isinstance(node_name_node.children[0], Token)
286
        node_name = node_name_node.children[0].value
287
 
288
        node_value_node = element_.children[1]
289
        assert isinstance(node_value_node, Tree)
290
        assert node_value_node.data == "node_multiline_value"
291
 
292
        # Find minimal indent in lines 1..n. It will be used to dedent the block.
293
        dedent = None
294
        if len(node_value_node.children) > 1:
295
            for node_value_component_ in node_value_node.children[1:]:
296
                assert isinstance(node_value_component_, Token)
297
                if node_value_component_.type == "NEWLINE":
298
                    continue
299
                line_value = node_value_component_.value
300
                non_ws_len = len(line_value.lstrip(" "))
301
                this_dedent = len(line_value) - non_ws_len
302
                if dedent is None:
303
                    dedent = this_dedent
304
                elif non_ws_len > 0:
305
                    dedent = min(this_dedent, dedent)
306
        if dedent is None:
307
            dedent = 0
308
 
309
        # Join and dedent.
310
        node_value = ""
311
        for i, node_value_component_ in enumerate(node_value_node.children):
312
            assert isinstance(node_value_component_, Token)
313
            line_value = node_value_component_.value
314
            if (
315
                i > 0
316
                and node_value_component_.type != "NEWLINE"
317
                and dedent is not None
318
            ):
319
                line_value = line_value[min(dedent, len(line_value)) :]
320
            node_value += line_value
321
 
322
        node_value = node_value.rstrip()
323
 
324
        return node_name, node_value