Path:
strictdoc/backend/sdoc_source_code/marker_parser.py
Lines:
324
Non-empty lines:
284
Non-empty lines covered with requirements:
284 / 284 (100.0%)
Functions:
4
Functions covered by requirements:
4 / 4 (100.0%)
- "7.6.1. Relation markers syntax" (REQUIREMENT)
- "7.7.1. Parse nodes from source code" (REQUIREMENT)
1
"""2
@relation(SDOC-SRS-34, SDOC-SRS-141, scope=file)3
"""4
5
from typing import Dict, List, Optional, Tuple, Union
6
7
from lark import ParseTree, Token, Tree
8
9
from strictdoc.backend.sdoc.error_handling import StrictDocSemanticError
10
from strictdoc.backend.sdoc_source_code.comment_parser.marker_lexer import (
11
MarkerLexer,
12
)13
from strictdoc.backend.sdoc_source_code.helpers.comment_preprocessor import (
14
preprocess_source_code_comment,
15
)16
from strictdoc.backend.sdoc_source_code.models.language_item_marker import (
17
LanguageItemMarker,
18
)19
from strictdoc.backend.sdoc_source_code.models.line_marker import LineMarker
20
from strictdoc.backend.sdoc_source_code.models.range_marker import (
21
RangeMarker,
22
)23
from strictdoc.backend.sdoc_source_code.models.requirement_marker import Req
24
from strictdoc.backend.sdoc_source_code.models.source_location import ByteRange
25
from strictdoc.backend.sdoc_source_code.models.source_node import SourceNode
26
27
28
class MarkerParser:
29
@staticmethod30
def parse(
31
*,
32
input_string: str,
33
line_start: int,
34
line_end: int,
35
comment_line_start: int,
36
comment_byte_range: Optional[ByteRange],
37
filename: Optional[str] = None,
38
entity_name: Optional[str] = None,
39
col_offset: int = 0,
40
custom_tags: Optional[set[str]] = None,
41
default_scope: Optional[str] = None,
42
) -> SourceNode:
43
"""
44
Parse source nodes and relation markers from source file comments.45
46
The input_string is parsed for @relation markers. If custom_tags are given,47
input_string is additionally parsed for source nodes and SourceNode.fields_locations48
offsets are calculated relative to input_string. This implies that input_string49
lines must not be pre-stripped by the caller, otherwise offsets would mismatch with50
actual file content and source node write-back would corrupt source files.51
Comment symbols like /** ... */ or /// Doxygen comments or Python comments52
are instead replaced internally with spaces (preserving string length), so that53
all byte offsets remain valid for both parsing and file write-back.54
55
The 1-based line start/end provide hints to the parser for the case markers56
of scope file, class or function are found, in which case the user values are57
set as highlight range. If the parser finds line or range markers, the user58
provided line start/end values are ignored. Should be set to the item definition59
block, *with* leading comment lines if any.60
61
The 1-based comment_line_start parameter is the first actual comment line.62
It is required as a base offset for some parser tokens to determine their63
absolute position in file, as lexing gives only a position relative64
to comment start.65
66
comment_byte_range, if given, enables write-back of modified source nodes.67
Modification happens when a user edits the source node in the web server, or68
when StrictDoc auto-assigns MID or HASH. Values are 0-based byte-offsets69
specifying the exact input_string start-to-end position inside the source file.70
71
custom_tags is a set of valid tags if a comment is expected to contain72
key-value pairs for source node generation. The caller is responsible to determine73
valid custom tags from the grammar element associated with the source code file.74
75
filename should be given if input_string comes from a static source file.76
It will be used to create more helpful parsing error messages.77
78
entity_name is required for language item markers. It's the user-visible79
description of the marked range in the rendered document. Should be equal80
to the related LanguageItem.description for consistency with forward markers.81
82
default_scope should be provided if the caller's language-aware parser83
can infer the scope from the semantic comment position. Think of Rust doc84
comments for example. If given, users are allowed to omit the scope argument85
in a relation marker. A user provided scope argument always takes preference.86
If neither default nor a user provided value is available,87
StrictDocSemanticError will be raised.88
89
The function returns a SourceNode. Note: This is also the case if no custom tags were90
found at all (in which case fields is empty) because SourceNode also acts as a container91
for markers.92
"""93
94
node_fields: Dict[str, str] = {}
95
96
source_node: SourceNode = SourceNode(
97
entity_name=entity_name,
98
comment_byte_range=comment_byte_range,
99
)100
input_string = preprocess_source_code_comment(input_string)
101
102
tree: ParseTree = MarkerLexer.parse(
103
input_string, custom_tags=custom_tags
104
)105
106
for element_ in tree.children:
107
if not isinstance(element_, Tree):
108
continue109
110
if element_.data == "relation_marker":
111
relation_markers = MarkerParser._parse_relation_marker(
112
element_,
113
line_start,
114
line_end,
115
comment_line_start,
116
filename,
117
entity_name,
118
col_offset,
119
default_scope,
120
)121
source_node.markers.extend(relation_markers)
122
123
elif element_.data == "node_field":
124
node_name, node_value = MarkerParser._parse_node_field(
125
element_,
126
)127
node_fields[node_name] = node_value
128
129
source_node.fields_locations[node_name] = (
130
element_.meta.start_pos,
131
element_.meta.end_pos - 1,
132
)133
else:
134
raise AssertionError
135
136
if len(node_fields) > 0:
137
source_node.fields = node_fields
138
139
return source_node
140
141
@staticmethod142
def _parse_relation_marker(
143
element_: Tree[Token],
144
line_start: int,
145
line_end: int,
146
comment_line_start: int,
147
filename: Optional[str] = None,
148
entity_name: Optional[str] = None,
149
col_offset: int = 0,
150
default_scope: Optional[str] = None,
151
) -> List[Union[LanguageItemMarker, RangeMarker, LineMarker]]:
152
markers: List[Union[LanguageItemMarker, RangeMarker, LineMarker]] = []
153
154
relation_uid_elements = []
155
relation_scope_element: Optional[Tree[Token]] = None
156
relation_role_element: Optional[Tree[Token]] = None
157
for relation_marker_element_ in element_.children:
158
assert isinstance(relation_marker_element_, Tree)
159
if relation_marker_element_.data == "relation_node_uid":
160
relation_uid_elements.append(relation_marker_element_)
161
elif relation_marker_element_.data == "relation_scope":
162
relation_scope_element = relation_marker_element_
163
elif relation_marker_element_.data == "relation_role":
164
relation_role_element = relation_marker_element_
165
else:
166
raise NotImplementedError
167
168
assert len(relation_uid_elements) > 0
169
170
if relation_scope_element is not None:
171
assert isinstance(relation_scope_element.children[0], Token)
172
relation_scope = relation_scope_element.children[0].value
173
else:
174
relation_scope = default_scope
175
176
relation_role = None
177
if relation_role_element is not None:
178
assert isinstance(relation_role_element.children[0], Token)
179
relation_role = relation_role_element.children[0].value
180
181
requirements = []
182
used_uids = set()
183
184
for relation_uid_token_ in relation_uid_elements:
185
assert isinstance(relation_uid_token_.children[0], Token)
186
assert relation_uid_token_.children[0].line is not None
187
188
relation_uid = relation_uid_token_.children[0].value
189
if relation_uid in used_uids:
190
raise ValueError(
191
f"@relation marker contains duplicate node UIDs: ['{relation_uid}']. "
192
f"Location: {filename}:{relation_uid_token_.children[0].line}."
193
)194
used_uids.add(relation_uid)
195
196
requirement = Req(None, relation_uid)
197
requirement.ng_source_line = (
198
comment_line_start + relation_uid_token_.children[0].line - 1
199
)200
requirement.ng_source_column = relation_uid_token_.children[
201
0202
].column
203
requirements.append(requirement)
204
205
if relation_scope in ("file", "class", "function"):
206
language_item_marker = LanguageItemMarker(
207
None, requirements, scope=relation_scope, role=relation_role
208
)209
language_item_marker.ng_source_line_begin = (
210
comment_line_start + element_.meta.line - 1
211
)212
language_item_marker.ng_source_column_begin = (
213
element_.meta.column + col_offset
214
)215
language_item_marker.ng_range_line_begin = line_start
216
language_item_marker.ng_range_line_end = line_end
217
if relation_scope == "file":
218
language_item_marker.set_description("entire file")
219
elif relation_scope == "function":
220
language_item_marker.set_description(
221
f"function {entity_name}()"
222
)223
elif relation_scope == "class":
224
language_item_marker.set_description(f"class {entity_name}")
225
markers.append(language_item_marker)
226
elif relation_scope in ("range_start", "range_end"):
227
range_marker = RangeMarker(
228
None,
229
requirements,
230
scope=relation_scope,
231
role=relation_role,
232
)233
range_marker.ng_source_line_begin = (
234
comment_line_start + element_.meta.line - 1
235
)236
range_marker.ng_source_column_begin = (
237
element_.meta.column + col_offset
238
)239
range_marker.ng_range_line_begin = (
240
comment_line_start + element_.meta.line - 1
241
)242
range_marker.ng_range_line_end = (
243
comment_line_start + element_.meta.end_line - 1
244
)245
markers.append(range_marker)
246
elif relation_scope == "line":
247
line_marker = LineMarker(None, requirements, role=relation_role)
248
line_marker.ng_source_line_begin = (
249
comment_line_start + element_.meta.line - 1
250
)251
line_marker.ng_source_column_begin = (
252
element_.meta.column + col_offset
253
)254
line_marker.ng_range_line_begin = (
255
comment_line_start + element_.meta.line - 1
256
)257
line_marker.ng_range_line_end = (
258
comment_line_start + element_.meta.end_line
259
)260
markers.append(line_marker)
261
elif relation_scope is None:
262
reqs = ",".join(sorted(used_uids))
263
raise StrictDocSemanticError(
264
f"@relation marker for requirements {reqs} misses scope argument.",
265
hint="Scope can only be omitted if supported by language, as e.g. with Rust doc comments.",
266
example=(
267
"Add a scope argument. Example:\n"
268
f"@relation({reqs}, scope=function)"
269
),270
line=comment_line_start + element_.meta.line - 1,
271
filename=filename,
272
)273
else:
274
raise NotImplementedError
275
276
return markers
277
278
@staticmethod279
def _parse_node_field(
280
element_: Tree[Token],
281
) -> Tuple[str, str]:
282
node_name_node = element_.children[0]
283
assert isinstance(node_name_node, Tree)
284
assert node_name_node.data == "node_name"
285
assert isinstance(node_name_node.children[0], Token)
286
node_name = node_name_node.children[0].value
287
288
node_value_node = element_.children[1]
289
assert isinstance(node_value_node, Tree)
290
assert node_value_node.data == "node_multiline_value"
291
292
# Find minimal indent in lines 1..n. It will be used to dedent the block.293
dedent = None
294
if len(node_value_node.children) > 1:
295
for node_value_component_ in node_value_node.children[1:]:
296
assert isinstance(node_value_component_, Token)
297
if node_value_component_.type == "NEWLINE":
298
continue299
line_value = node_value_component_.value
300
non_ws_len = len(line_value.lstrip(" "))
301
this_dedent = len(line_value) - non_ws_len
302
if dedent is None:
303
dedent = this_dedent
304
elif non_ws_len > 0:
305
dedent = min(this_dedent, dedent)
306
if dedent is None:
307
dedent = 0
308
309
# Join and dedent.310
node_value = ""
311
for i, node_value_component_ in enumerate(node_value_node.children):
312
assert isinstance(node_value_component_, Token)
313
line_value = node_value_component_.value
314
if (
315
i > 0
316
and node_value_component_.type != "NEWLINE"
317
and dedent is not None
318
):319
line_value = line_value[min(dedent, len(line_value)) :]
320
node_value += line_value
321
322
node_value = node_value.rstrip()
323
324
return node_name, node_value