Path:
strictdoc/backend/sdoc_source_code/reader_rust.py
Lines:
610
Non-empty lines:
563
Non-empty lines covered with requirements:
563 / 563 (100.0%)
Functions:
16
Functions covered by requirements:
16 / 16 (100.0%)
1
"""2
@relation(SDOC-SRS-142, scope=file)3
"""4
5
from enum import IntEnum
6
from pathlib import Path
7
from typing import Optional, Union
8
9
import tree_sitter_rust as ts_rust
10
from tree_sitter import Language, Node, Parser, Query, QueryCursor
11
12
from strictdoc.backend.sdoc_source_code.constants import FunctionAttribute
13
from strictdoc.backend.sdoc_source_code.marker_parser import MarkerParser
14
from strictdoc.backend.sdoc_source_code.models.language import LanguageItem
15
from strictdoc.backend.sdoc_source_code.models.language_item_marker import (
16
LanguageItemMarker,
17
RangeMarkerType,
18
)19
from strictdoc.backend.sdoc_source_code.models.line_marker import LineMarker
20
from strictdoc.backend.sdoc_source_code.models.range_marker import (
21
ForwardRangeMarker,
22
RangeMarker,
23
)24
from strictdoc.backend.sdoc_source_code.models.source_file_info import (
25
SourceFileTraceabilityInfo,
26
)27
from strictdoc.backend.sdoc_source_code.models.source_location import ByteRange
28
from strictdoc.backend.sdoc_source_code.parse_context import ParseContext
29
from strictdoc.backend.sdoc_source_code.processors.general_language_marker_processors import (
30
language_item_marker_processor,
31
line_marker_processor,
32
range_marker_processor,
33
source_file_traceability_info_processor,
34
)35
from strictdoc.helpers.cast import assert_cast
36
from strictdoc.helpers.file_stats import SourceFileStats
37
from strictdoc.helpers.file_system import file_open_read_bytes
38
- "3.1.14. Valid positions of doc comments" (REQUIREMENT)
- "3.1.8. File, line and range markers" (REQUIREMENT)
- "3.1.10. Forward relations to Rust items" (REQUIREMENT)
39
# @relation(SDOC-LLR-177, SDOC-LLR-171, SDOC-LLR-173, scope=line)40
TS_QUERY = """
41
; Query 0: Outer doc attribute, line doc, or block doc in allowed positions42
(43
[44
(attribute_item45
(attribute46
(identifier) @_attribute_id (#eq? @_attribute_id "doc")47
value: (string_literal (string_content) @doc.comment)))+48
(line_comment49
outer: (outer_doc_comment_marker)50
doc: (doc_comment) @doc.comment)+51
(block_comment52
outer: (outer_doc_comment_marker)53
doc: (doc_comment) @doc.comment)54
]55
.56
(attribute_item)*57
.58
[59
; any identifiable item, most notably functions60
(_ name: [(identifier)(field_identifier)(type_identifier)(lifetime)] @doc.item_identifier)61
62
; impl MyStruct63
(impl_item type: (type_identifier) @doc.item_identifier)64
65
; extern "C"66
(foreign_mod_item (extern_modifier) @doc.item_identifier)67
68
; match arm69
(match_arm (match_pattern) @doc.item_identifier)70
71
; assignment inside struct initializer72
(field_initializer field: (field_identifier) @doc.item_identifier)73
74
; Statement like 1;75
(expression_statement) @doc.item_identifier76
77
; Expression like x + y78
(binary_expression) @doc.item_identifier79
80
; Expression like (x + y)81
(parenthesized_expression) @doc.item_identifier82
83
; Named "type" field of any enclosing node (usually body), e.g. type within tuple struct.84
type: (_)85
] @doc.item86
)87
88
; Query 1: Inner doc attribute, line doc or block doc in allowed positions.89
; Note: We have to repeat the identical inner pattern, alternations don't help here.90
; See https://github.com/tree-sitter/tree-sitter/issues/3480.91
[92
(function_item93
name: (identifier) @doc.item_identifier94
body: (block95
[96
(inner_attribute_item97
(attribute98
(identifier) @_attribute_id (#eq? @_attribute_id "doc")99
value: (string_literal (string_content) @doc.comment)))+100
(line_comment101
inner: (inner_doc_comment_marker)102
doc: (doc_comment) @doc.comment)+103
(block_comment104
inner: (inner_doc_comment_marker)105
doc: (doc_comment) @doc.comment)106
]107
)108
)109
(mod_item110
name: (identifier) @doc.item_identifier111
body: (declaration_list112
[113
(inner_attribute_item114
(attribute115
(identifier) @_attribute_id (#eq? @_attribute_id "doc")116
value: (string_literal (string_content) @doc.comment)))+117
(line_comment118
inner: (inner_doc_comment_marker)119
doc: (doc_comment) @doc.comment)+120
(block_comment121
inner: (inner_doc_comment_marker)122
doc: (doc_comment) @doc.comment)123
]124
)125
)126
(impl_item127
type: (type_identifier) @doc.item_identifier128
body: (declaration_list129
[130
(inner_attribute_item131
(attribute132
(identifier) @_attribute_id (#eq? @_attribute_id "doc")133
value: (string_literal (string_content) @doc.comment)))+134
(line_comment135
inner: (inner_doc_comment_marker)136
doc: (doc_comment) @doc.comment)+137
(block_comment138
inner: (inner_doc_comment_marker)139
doc: (doc_comment) @doc.comment)140
]141
)142
)143
(foreign_mod_item (extern_modifier) @doc.item_identifier144
body: (declaration_list145
[146
(inner_attribute_item147
(attribute148
(identifier) @_attribute_id (#eq? @_attribute_id "doc")149
value: (string_literal (string_content) @doc.comment)))+150
(line_comment151
inner: (inner_doc_comment_marker)152
doc: (doc_comment) @doc.comment)+153
(block_comment154
inner: (inner_doc_comment_marker)155
doc: (doc_comment) @doc.comment)156
]157
)158
)159
] @doc.item160
161
; Query 2: Inner line or block doc comment of file-level module.162
(source_file163
[164
(line_comment165
inner: (inner_doc_comment_marker)166
doc: (doc_comment) @doc.comment)+167
(block_comment168
inner: (inner_doc_comment_marker)169
doc: (doc_comment) @doc.comment)170
]171
) @doc.item172
173
; Query 3: normal line or block comment174
[(line_comment !doc)+175
(block_comment !doc)] @normal_comment176
177
; Query 4: Identifiable items. Those where it's clear how to link by forward relations.178
[179
(const_item name: (identifier) @doc.item_identifier) @doc.item180
(enum_item name: (type_identifier) @doc.item_identifier) @doc.item181
(function_item name: (identifier) @doc.item_identifier) @doc.item182
(mod_item name: (identifier) @doc.item_identifier) @doc.item183
(static_item name: (identifier) @doc.item_identifier) @doc.item184
(struct_item name: (type_identifier) @doc.item_identifier) @doc.item185
(trait_item name: (type_identifier) @doc.item_identifier) @doc.item186
(type_item name: (type_identifier) @doc.item_identifier) @doc.item187
(union_item name: (type_identifier) @doc.item_identifier) @doc.item188
]189
"""190
191
192
class RustTsQuery(IntEnum):
193
"""Give the queries from TS_QUERY a friendly name."""
194
195
OUTER_DOC_COMMENT = 0
196
INNER_DOC_COMMENT = 1
197
INNER_DOC_COMMENT_FILEMODULE = 2
198
NORMAL_COMMENT = 3
199
IDENTIFIABLE_ITEM = 4
200
201
- "3.1.12. Collapse doc comments" (REQUIREMENT)
202
def comments_text_from_comment_nodes(comments: list[Node]) -> str:
203
"""
204
Join multiple comment nodes into one multi-line string.205
@relation(SDOC-LLR-175, scope=function)206
"""207
comment_text = assert_cast(comments[0].text, bytes).decode("utf-8")
208
last_row = comments[0].start_point.row
209
for comment_part in comments[1:]:
210
new_lines = comment_part.start_point.row - last_row
211
last_row = comment_part.start_point.row
212
comment_text += "\n" * new_lines + assert_cast(
213
comment_part.text, bytes
214
).decode("utf-8")
215
return comment_text
216
217
218
def special_description(item: Node, identifier_text: str) -> Optional[str]:
219
"""
220
Make a description for language constructs that are not functions.221
222
The default Function description assumes the object actually represents a function.223
However, the Rust reader reuses Function to represent many different Rust specific object types.224
We have to give them a suitable Rust specific description.225
"""226
if item.type == "associated_type":
227
return f"associated type {identifier_text}"
228
elif item.type in ("binary_expression", "parenthesized_expression"):
229
return f"expression {identifier_text}"
230
elif item.type == "const_item":
231
return f"const {identifier_text}"
232
elif item.type == "const_parameter":
233
return f"const parameter {identifier_text}"
234
elif item.type == "function_item":
235
return f"fn {identifier_text}()"
236
elif item.type == "enum_item":
237
return f"enum {identifier_text}"
238
elif item.type == "enum_variant":
239
return f"enum variant {identifier_text}"
240
elif item.type == "expression_statement":
241
return f"statement {identifier_text}"
242
elif item.type == "extern_crate_declaration":
243
return f"crate {identifier_text}"
244
elif item.type == "field_declaration":
245
return f"field {identifier_text}"
246
elif item.type == "field_initializer":
247
return f"field initializer {identifier_text}"
248
elif item.type == "foreign_mod_item":
249
return f"foreign module {identifier_text}"
250
elif item.type == "impl_item":
251
return f"impl {identifier_text}"
252
elif item.type == "match_arm":
253
return f"match arm {identifier_text}"
254
elif item.type == "macro_definition":
255
return f"macro {identifier_text}"
256
elif item.type == "mod_item":
257
return f"module {identifier_text}"
258
elif item.type == "lifetime_parameter":
259
return f"lifetime {identifier_text}"
260
elif item.type == "static_item":
261
return f"static {identifier_text}"
262
elif item.type == "struct_item":
263
return f"struct {identifier_text}"
264
elif item.type == "trait_item":
265
return f"trait {identifier_text}"
266
elif item.type == "type_item":
267
return f"type {identifier_text}"
268
elif item.type == "type_parameter":
269
return f"type parameter {identifier_text}"
270
elif item.type == "union_item":
271
return f"union {identifier_text}"
272
elif item.type in ("primitive_type", "type_identifier"):
273
return f"type {identifier_text}"
274
return None
275
276
277
class SourceFileTraceabilityReader_Rust:
278
@staticmethod279
def supported_elements() -> list[str]:
280
return []
281
282
def __init__(self, custom_tags: Optional[set[str]] = None) -> None:
283
self.custom_tags: Optional[set[str]] = custom_tags
284
285
def read(
286
self,
287
input_buffer: bytes,
288
file_path: Optional[str] = None,
289
) -> SourceFileTraceabilityInfo:
290
file_stats = SourceFileStats.create(input_buffer)
291
parse_context = ParseContext(file_path, file_stats)
292
traceability_info = SourceFileTraceabilityInfo([])
293
parser = ParserRun(
294
input_buffer, parse_context, traceability_info, self.custom_tags
295
)296
parser()
297
source_file_traceability_info_processor(
298
traceability_info, parse_context
299
)300
return traceability_info
301
302
def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
303
"""
304
Generate the source file traceability info for one particular Rust file.305
306
The created SourceFileTraceabilityInfo is filled partially local information:307
- functions: Markers are associated, but only those resulting from local markers.308
- markers: Markers that stem from markup in this file.309
- ng_map_reqs_to_markers: Mapping of requirement IDs to Marker objects for markers directly defined in the source file.310
"""311
with file_open_read_bytes(file_path) as file:
312
sdoc_content = file.read()
313
sdoc = self.read(sdoc_content, file_path=file_path)
314
return sdoc
315
316
317
class ParserRun:
318
def __init__(
319
self,
320
input_buffer: bytes,
321
parse_context: ParseContext,
322
traceability_info: SourceFileTraceabilityInfo,
323
custom_tags: Optional[set[str]],
324
):325
rust_language = Language(ts_rust.language())
326
self.parser = Parser(rust_language) # type: ignore[call-arg, unused-ignore]
327
self.TS_QUERY = Query(rust_language, TS_QUERY)
328
self.input_buffer: bytes = input_buffer
329
self.parse_context = parse_context
330
self.traceability_info = traceability_info
331
self.custom_tags: Optional[set[str]] = custom_tags
332
333
def __call__(self) -> None:
334
tree = self.parser.parse(self.input_buffer)
335
cursor = QueryCursor(self.TS_QUERY)
336
matches = cursor.matches(tree.root_node)
337
338
seen_nodes = set()
339
deferred_matches = []
340
for query_index, captures in matches:
341
if query_index == RustTsQuery.IDENTIFIABLE_ITEM:
342
# The query for identifiable items overlaps with comment based queries. Move results for identifiable343
# items last, so that a result can be skipped if a LanguageItem was already created.344
deferred_matches.append(captures)
345
elif query_index in (
346
RustTsQuery.OUTER_DOC_COMMENT,
347
RustTsQuery.INNER_DOC_COMMENT,
348
RustTsQuery.INNER_DOC_COMMENT_FILEMODULE,
349
):350
assert len(captures["doc.item"]) == 1
351
item = captures["doc.item"][0]
352
doc_comment = captures["doc.comment"]
353
if (
354
item.type == "source_file"
355
and "doc.item_identifier" not in captures
356
):357
self._process_anonymous_module_comment(
358
doc_comment,
359
item,
360
)361
else:
362
if "doc.item_identifier" in captures:
363
# doc comment on named item364
assert len(captures["doc.item_identifier"]) == 1
365
identifier = assert_cast(
366
captures["doc.item_identifier"][0].text, bytes
367
).decode()
368
else:
369
# doc comment on anonymous item370
identifier = assert_cast(item.text, bytes).decode()
371
self._process_doc_comment(
372
doc_comment,
373
item,
374
identifier,
375
)376
seen_nodes.add(item.id)
377
elif query_index == RustTsQuery.NORMAL_COMMENT:
378
self._process_normal_comment(captures["normal_comment"])
379
380
for captures in deferred_matches:
381
assert len(captures["doc.item"]) == 1
382
assert len(captures["doc.item_identifier"]) == 1
383
item = captures["doc.item"][0]
384
if item.id not in seen_nodes:
385
identifier = assert_cast(
386
captures["doc.item_identifier"][0].text, bytes
387
).decode()
388
self._process_item_for_forward_relation(item, identifier)
389
- "3.1.1. Auto-scoped relation markers in Rust docs" (REQUIREMENT)
- "3.1.9. Source nodes from doc comments" (REQUIREMENT)
390
def _process_anonymous_module_comment(
391
self, comments: list[Node], module: Node
392
) -> None:
393
"""
394
Create marker, item and source nodes for file-level module from tree-sitter doc comment nodes.395
@relation(SDOC-LLR-164, SDOC-LLR-172, scope=function)396
"""397
comment_text = comments_text_from_comment_nodes(comments)
398
source_node = MarkerParser.parse(
399
input_string=comment_text,
400
line_start=1,
401
line_end=self.parse_context.file_stats.lines_total,
402
comment_line_start=module.start_point.row + 1,
403
comment_byte_range=ByteRange.create_from_ts_nodes(
404
comments[0], comments[-1]
405
),406
custom_tags=self.custom_tags,
407
default_scope="file",
408
)409
for marker_ in source_node.markers:
410
if not isinstance(marker_, LanguageItemMarker):
411
continue412
# At the top level, only accept the scope=file markers.413
# Everything else will be handled by functions and classes.414
if marker_.scope != RangeMarkerType.FILE:
415
print( # noqa: T201
416
"warning: comment to top-level module is not scope=file, ignoring"417
)418
continue419
language_item_marker_processor(marker_, self.parse_context)
420
self.traceability_info.markers.append(marker_)
421
- "3.1.1. Auto-scoped relation markers in Rust docs" (REQUIREMENT)
- "3.1.9. Source nodes from doc comments" (REQUIREMENT)
422
def _process_doc_comment(
423
self,
424
comments: list[Node],
425
item: Node,
426
identifier_text: str,
427
) -> None:
428
"""
429
Create markers, items and source nodes from tree-sitter doc comment nodes.430
@relation(SDOC-LLR-164, SDOC-LLR-172, scope=function)431
"""432
assert len(comments) >= 1
433
comment_text = comments_text_from_comment_nodes(comments)
434
line_start_0_based = min(
435
item.start_point[0], comments[0].start_point[0]
436
)437
line_end_0_based = max(item.end_point[0], comments[-1].end_point[0])
438
source_node = MarkerParser.parse(
439
input_string=comment_text,
440
line_start=line_start_0_based + 1,
441
line_end=line_end_0_based + 1
442
if self.input_buffer[-1] == 10
443
else line_end_0_based,
444
comment_line_start=comments[0].start_point[0] + 1,
445
comment_byte_range=ByteRange.create_from_ts_nodes(
446
comments[0], comments[-1]
447
),448
custom_tags=self.custom_tags,
449
entity_name=identifier_text,
450
default_scope="function",
451
)452
453
function_markers: list[
454
Union[
455
LanguageItemMarker, LineMarker, RangeMarker, ForwardRangeMarker
456
]457
] = []
458
for marker_ in source_node.markers:
459
if isinstance(marker_, LanguageItemMarker) and (
460
language_item_marker_ := marker_
461
):462
if (
463
description := special_description(item, identifier_text)
464
) is not None:
465
language_item_marker_.set_description(description)
466
467
# adds marker to context, and connects context requirements with marker468
language_item_marker_processor(
469
language_item_marker_, self.parse_context
470
)471
self.traceability_info.markers.append(language_item_marker_)
472
function_markers.append(marker_)
473
474
name = self.canonical_path(item.parent, identifier_text)
475
new_function_for_rust_item = LanguageItem(
476
parent=self.traceability_info,
477
name=name,
478
display_name=name,
479
line_begin=item.start_point[0] + 1,
480
line_end=item.end_point[0] + 1,
481
code_byte_range=ByteRange.create_from_ts_node(item),
482
child_functions=[],
483
markers=[],
484
attributes={FunctionAttribute.DEFINITION},
485
)486
if len(source_node.fields) > 0:
487
source_node.function = new_function_for_rust_item
488
self.traceability_info.source_nodes.append(source_node)
489
self.traceability_info.functions.append(new_function_for_rust_item)
490
self.traceability_info.ng_map_names_to_markers[identifier_text] = (
491
function_markers492
)493
- "3.1.8. File, line and range markers" (REQUIREMENT)
494
def _process_normal_comment(self, comments: list[Node]) -> None:
495
"""
496
Create markers and items from tree-sitter normal comment nodes.497
@relation(SDOC-LLR-171, scope=function)498
"""499
comment_text = comments_text_from_comment_nodes(comments)
500
line_start_0_based = comments[0].start_point.row
501
line_end_0_based = comments[-1].end_point.row
502
source_node = MarkerParser.parse(
503
input_string=comment_text,
504
line_start=line_start_0_based + 1,
505
line_end=line_end_0_based + 1,
506
comment_line_start=line_start_0_based + 1,
507
comment_byte_range=ByteRange.create_from_ts_nodes(
508
comments[0], comments[-1]
509
),510
)511
for marker_ in source_node.markers:
512
if (
513
isinstance(marker_, LanguageItemMarker)
514
and (marker_.scope is RangeMarkerType.FILE)
515
and (language_item_marker := marker_)
516
):517
language_item_marker.ng_range_line_begin = 1
518
language_item_marker.ng_range_line_end = (
519
self.parse_context.file_stats.lines_total
520
)521
language_item_marker_processor(
522
language_item_marker, self.parse_context
523
)524
elif isinstance(marker_, RangeMarker) and (range_marker := marker_):
525
range_marker_processor(range_marker, self.parse_context)
526
elif isinstance(marker_, LineMarker) and (line_marker := marker_):
527
line_marker_processor(line_marker, self.parse_context)
528
else:
529
print( # noqa: T201
530
"warning: Ignoring @relation. Only scope=file|line|range_start is supported in regular "531
"Rust comments. Use doc comments otherwise."532
)533
- "3.1.10. Forward relations to Rust items" (REQUIREMENT)
534
def _process_item_for_forward_relation(
535
self, item: Node, identifier: str
536
) -> None:
537
"""
538
Create item objects from tree-sitter doc comment nodes to support forward relations.539
540
Corresponding markers will be created and resolved later by FileTraceabilityIndex,541
see validate_and_resolve.542
543
@relation(SDOC-LLR-173, scope=function)544
"""545
name = self.canonical_path(item.parent, identifier)
546
function = LanguageItem(
547
parent=self.traceability_info,
548
name=name,
549
display_name=name,
550
line_begin=item.start_point[0] + 1,
551
line_end=max(item.end_point[0] + 1, item.start_point[0] + 2),
552
code_byte_range=ByteRange.create_from_ts_node(item),
553
child_functions=[],
554
markers=[],
555
attributes={FunctionAttribute.DEFINITION},
556
)557
self.traceability_info.functions.append(function)
558
- "3.1.11. Forward relation by canonical path" (REQUIREMENT)
559
def canonical_path(
560
self, parent_scope: Optional[Node], item_path_segment: str
561
) -> str:
562
"""
563
Construct a canonical path in best-effort.564
@relation(SDOC-LLR-174, scope=function)565
"""566
cursor: Optional[Node] = parent_scope
567
568
if (
569
cursor is not None
570
and cursor.type == "declaration_list"
571
and cursor.parent is not None
572
and cursor.parent.type == "impl_item"
573
):574
cursor = cursor.parent
575
item_being_implemented = cursor.child_by_field_name("type")
576
assert item_being_implemented is not None
577
canonical_path_item_being_implemented = self.canonical_path(
578
cursor,
579
assert_cast(item_being_implemented.text, bytes).decode("utf-8"),
580
)581
impl_trait_node = cursor.child_by_field_name("trait")
582
if impl_trait_node is not None:
583
# rust-lang.org: For trait implementations, [the path prefix] is the canonical path of the item being584
# implemented followed by as followed by the canonical path to the trait all surrounded in angle (<>)585
# brackets.586
trait = self.canonical_path(
587
None,
588
assert_cast(impl_trait_node.text, bytes).decode("utf-8"),
589
)590
path_prefix = (
591
f"<{canonical_path_item_being_implemented} as {trait}>"
592
)593
else:
594
# rust-lang.org: For bare implementations, [the path prefix] is the canonical path of the item being595
# implemented surrounded by angle (<>) brackets.596
path_prefix = f"<{canonical_path_item_being_implemented}>"
597
else:
598
path_prefix_segments = []
599
while cursor is not None:
600
name_node = cursor.child_by_field_name("name")
601
if name_node is not None:
602
name = assert_cast(name_node.text, bytes).decode("utf-8")
603
path_prefix_segments.append(name)
604
cursor = cursor.parent
605
path_prefix_segments.append(Path(self.parse_context.filename).stem)
606
path_prefix = "::".join(reversed(path_prefix_segments))
607
608
# rust-lang.org: The canonical path is defined as a path prefix appended by the path segment the item itself609
# defines.610
return f"{path_prefix}::{item_path_segment}"