StrictDoc Documentation
strictdoc/backend/sdoc_source_code/reader_rust.py
Source file coverage
Path:
strictdoc/backend/sdoc_source_code/reader_rust.py
Lines:
610
Non-empty lines:
563
Non-empty lines covered with requirements:
563 / 563 (100.0%)
Functions:
16
Functions covered by requirements:
16 / 16 (100.0%)
1
"""
2
@relation(SDOC-SRS-142, scope=file)
3
"""
4
 
5
from enum import IntEnum
6
from pathlib import Path
7
from typing import Optional, Union
8
 
9
import tree_sitter_rust as ts_rust
10
from tree_sitter import Language, Node, Parser, Query, QueryCursor
11
 
12
from strictdoc.backend.sdoc_source_code.constants import FunctionAttribute
13
from strictdoc.backend.sdoc_source_code.marker_parser import MarkerParser
14
from strictdoc.backend.sdoc_source_code.models.language import LanguageItem
15
from strictdoc.backend.sdoc_source_code.models.language_item_marker import (
16
    LanguageItemMarker,
17
    RangeMarkerType,
18
)
19
from strictdoc.backend.sdoc_source_code.models.line_marker import LineMarker
20
from strictdoc.backend.sdoc_source_code.models.range_marker import (
21
    ForwardRangeMarker,
22
    RangeMarker,
23
)
24
from strictdoc.backend.sdoc_source_code.models.source_file_info import (
25
    SourceFileTraceabilityInfo,
26
)
27
from strictdoc.backend.sdoc_source_code.models.source_location import ByteRange
28
from strictdoc.backend.sdoc_source_code.parse_context import ParseContext
29
from strictdoc.backend.sdoc_source_code.processors.general_language_marker_processors import (
30
    language_item_marker_processor,
31
    line_marker_processor,
32
    range_marker_processor,
33
    source_file_traceability_info_processor,
34
)
35
from strictdoc.helpers.cast import assert_cast
36
from strictdoc.helpers.file_stats import SourceFileStats
37
from strictdoc.helpers.file_system import file_open_read_bytes
38
 
39
# @relation(SDOC-LLR-177, SDOC-LLR-171, SDOC-LLR-173, scope=line)
40
TS_QUERY = """
41
; Query 0: Outer doc attribute, line doc, or block doc in allowed positions
42
(
43
  [
44
    (attribute_item
45
      (attribute
46
        (identifier) @_attribute_id (#eq? @_attribute_id "doc")
47
          value: (string_literal (string_content) @doc.comment)))+
48
    (line_comment
49
      outer: (outer_doc_comment_marker)
50
      doc: (doc_comment) @doc.comment)+
51
    (block_comment
52
      outer: (outer_doc_comment_marker)
53
      doc: (doc_comment) @doc.comment)
54
  ]
55
  .
56
  (attribute_item)*
57
  .
58
  [
59
    ; any identifiable item, most notably functions
60
    (_ name: [(identifier)(field_identifier)(type_identifier)(lifetime)] @doc.item_identifier)
61
 
62
    ; impl MyStruct
63
    (impl_item type: (type_identifier) @doc.item_identifier)
64
 
65
    ; extern "C"
66
    (foreign_mod_item (extern_modifier) @doc.item_identifier)
67
 
68
    ; match arm
69
    (match_arm (match_pattern) @doc.item_identifier)
70
 
71
    ; assignment inside struct initializer
72
    (field_initializer field: (field_identifier) @doc.item_identifier)
73
 
74
    ; Statement like 1;
75
    (expression_statement) @doc.item_identifier
76
 
77
    ; Expression like x + y
78
    (binary_expression) @doc.item_identifier
79
 
80
    ; Expression like (x + y)
81
    (parenthesized_expression) @doc.item_identifier
82
 
83
    ; Named "type" field of any enclosing node (usually body), e.g. type within tuple struct.
84
    type: (_)
85
  ] @doc.item
86
)
87
 
88
; Query 1: Inner doc attribute, line doc or block doc in allowed positions.
89
; Note: We have to repeat the identical inner pattern, alternations don't help here.
90
;       See https://github.com/tree-sitter/tree-sitter/issues/3480.
91
[
92
  (function_item
93
    name: (identifier) @doc.item_identifier
94
    body: (block
95
      [
96
        (inner_attribute_item
97
          (attribute
98
            (identifier) @_attribute_id (#eq? @_attribute_id "doc")
99
            value: (string_literal (string_content) @doc.comment)))+
100
        (line_comment
101
          inner: (inner_doc_comment_marker)
102
          doc: (doc_comment) @doc.comment)+
103
        (block_comment
104
          inner: (inner_doc_comment_marker)
105
          doc: (doc_comment) @doc.comment)
106
      ]
107
    )
108
  )
109
  (mod_item
110
    name: (identifier) @doc.item_identifier
111
    body: (declaration_list
112
      [
113
        (inner_attribute_item
114
          (attribute
115
            (identifier) @_attribute_id (#eq? @_attribute_id "doc")
116
            value: (string_literal (string_content) @doc.comment)))+
117
        (line_comment
118
          inner: (inner_doc_comment_marker)
119
          doc: (doc_comment) @doc.comment)+
120
        (block_comment
121
          inner: (inner_doc_comment_marker)
122
          doc: (doc_comment) @doc.comment)
123
      ]
124
    )
125
  )
126
  (impl_item
127
    type: (type_identifier) @doc.item_identifier
128
    body: (declaration_list
129
      [
130
        (inner_attribute_item
131
          (attribute
132
            (identifier) @_attribute_id (#eq? @_attribute_id "doc")
133
            value: (string_literal (string_content) @doc.comment)))+
134
        (line_comment
135
          inner: (inner_doc_comment_marker)
136
          doc: (doc_comment) @doc.comment)+
137
        (block_comment
138
          inner: (inner_doc_comment_marker)
139
          doc: (doc_comment) @doc.comment)
140
      ]
141
    )
142
  )
143
  (foreign_mod_item (extern_modifier) @doc.item_identifier
144
    body: (declaration_list
145
      [
146
        (inner_attribute_item
147
          (attribute
148
            (identifier) @_attribute_id (#eq? @_attribute_id "doc")
149
            value: (string_literal (string_content) @doc.comment)))+
150
        (line_comment
151
          inner: (inner_doc_comment_marker)
152
          doc: (doc_comment) @doc.comment)+
153
        (block_comment
154
          inner: (inner_doc_comment_marker)
155
          doc: (doc_comment) @doc.comment)
156
      ]
157
    )
158
  )
159
] @doc.item
160
 
161
; Query 2: Inner line or block doc comment of file-level module.
162
(source_file
163
  [
164
    (line_comment
165
      inner: (inner_doc_comment_marker)
166
        doc: (doc_comment) @doc.comment)+
167
    (block_comment
168
      inner: (inner_doc_comment_marker)
169
        doc: (doc_comment) @doc.comment)
170
  ]
171
) @doc.item
172
 
173
; Query 3: normal line or block comment
174
[(line_comment !doc)+
175
 (block_comment !doc)] @normal_comment
176
 
177
; Query 4: Identifiable items. Those where it's clear how to link by forward relations.
178
[
179
  (const_item name: (identifier) @doc.item_identifier) @doc.item
180
  (enum_item name: (type_identifier) @doc.item_identifier) @doc.item
181
  (function_item name: (identifier) @doc.item_identifier) @doc.item
182
  (mod_item name: (identifier) @doc.item_identifier) @doc.item
183
  (static_item name: (identifier) @doc.item_identifier) @doc.item
184
  (struct_item name: (type_identifier) @doc.item_identifier) @doc.item
185
  (trait_item name: (type_identifier) @doc.item_identifier) @doc.item
186
  (type_item name: (type_identifier) @doc.item_identifier) @doc.item
187
  (union_item name: (type_identifier) @doc.item_identifier) @doc.item
188
]
189
"""
190
 
191
 
192
class RustTsQuery(IntEnum):
193
    """Give the queries from TS_QUERY a friendly name."""
194
 
195
    OUTER_DOC_COMMENT = 0
196
    INNER_DOC_COMMENT = 1
197
    INNER_DOC_COMMENT_FILEMODULE = 2
198
    NORMAL_COMMENT = 3
199
    IDENTIFIABLE_ITEM = 4
200
 
201
 
202
def comments_text_from_comment_nodes(comments: list[Node]) -> str:
203
    """
204
    Join multiple comment nodes into one multi-line string.
205
    @relation(SDOC-LLR-175, scope=function)
206
    """
207
    comment_text = assert_cast(comments[0].text, bytes).decode("utf-8")
208
    last_row = comments[0].start_point.row
209
    for comment_part in comments[1:]:
210
        new_lines = comment_part.start_point.row - last_row
211
        last_row = comment_part.start_point.row
212
        comment_text += "\n" * new_lines + assert_cast(
213
            comment_part.text, bytes
214
        ).decode("utf-8")
215
    return comment_text
216
 
217
 
218
def special_description(item: Node, identifier_text: str) -> Optional[str]:
219
    """
220
    Make a description for language constructs that are not functions.
221
 
222
    The default Function description assumes the object actually represents a function.
223
    However, the Rust reader reuses Function to represent many different Rust specific object types.
224
    We have to give them a suitable Rust specific description.
225
    """
226
    if item.type == "associated_type":
227
        return f"associated type {identifier_text}"
228
    elif item.type in ("binary_expression", "parenthesized_expression"):
229
        return f"expression {identifier_text}"
230
    elif item.type == "const_item":
231
        return f"const {identifier_text}"
232
    elif item.type == "const_parameter":
233
        return f"const parameter {identifier_text}"
234
    elif item.type == "function_item":
235
        return f"fn {identifier_text}()"
236
    elif item.type == "enum_item":
237
        return f"enum {identifier_text}"
238
    elif item.type == "enum_variant":
239
        return f"enum variant {identifier_text}"
240
    elif item.type == "expression_statement":
241
        return f"statement {identifier_text}"
242
    elif item.type == "extern_crate_declaration":
243
        return f"crate {identifier_text}"
244
    elif item.type == "field_declaration":
245
        return f"field {identifier_text}"
246
    elif item.type == "field_initializer":
247
        return f"field initializer {identifier_text}"
248
    elif item.type == "foreign_mod_item":
249
        return f"foreign module {identifier_text}"
250
    elif item.type == "impl_item":
251
        return f"impl {identifier_text}"
252
    elif item.type == "match_arm":
253
        return f"match arm {identifier_text}"
254
    elif item.type == "macro_definition":
255
        return f"macro {identifier_text}"
256
    elif item.type == "mod_item":
257
        return f"module {identifier_text}"
258
    elif item.type == "lifetime_parameter":
259
        return f"lifetime {identifier_text}"
260
    elif item.type == "static_item":
261
        return f"static {identifier_text}"
262
    elif item.type == "struct_item":
263
        return f"struct {identifier_text}"
264
    elif item.type == "trait_item":
265
        return f"trait {identifier_text}"
266
    elif item.type == "type_item":
267
        return f"type {identifier_text}"
268
    elif item.type == "type_parameter":
269
        return f"type parameter {identifier_text}"
270
    elif item.type == "union_item":
271
        return f"union {identifier_text}"
272
    elif item.type in ("primitive_type", "type_identifier"):
273
        return f"type {identifier_text}"
274
    return None
275
 
276
 
277
class SourceFileTraceabilityReader_Rust:
278
    @staticmethod
279
    def supported_elements() -> list[str]:
280
        return []
281
 
282
    def __init__(self, custom_tags: Optional[set[str]] = None) -> None:
283
        self.custom_tags: Optional[set[str]] = custom_tags
284
 
285
    def read(
286
        self,
287
        input_buffer: bytes,
288
        file_path: Optional[str] = None,
289
    ) -> SourceFileTraceabilityInfo:
290
        file_stats = SourceFileStats.create(input_buffer)
291
        parse_context = ParseContext(file_path, file_stats)
292
        traceability_info = SourceFileTraceabilityInfo([])
293
        parser = ParserRun(
294
            input_buffer, parse_context, traceability_info, self.custom_tags
295
        )
296
        parser()
297
        source_file_traceability_info_processor(
298
            traceability_info, parse_context
299
        )
300
        return traceability_info
301
 
302
    def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
303
        """
304
        Generate the source file traceability info for one particular Rust file.
305
 
306
        The created SourceFileTraceabilityInfo is filled partially local information:
307
        - functions: Markers are associated, but only those resulting from local markers.
308
        - markers: Markers that stem from markup in this file.
309
        - ng_map_reqs_to_markers: Mapping of requirement IDs to Marker objects for markers directly defined in the source file.
310
        """
311
        with file_open_read_bytes(file_path) as file:
312
            sdoc_content = file.read()
313
            sdoc = self.read(sdoc_content, file_path=file_path)
314
            return sdoc
315
 
316
 
317
class ParserRun:
318
    def __init__(
319
        self,
320
        input_buffer: bytes,
321
        parse_context: ParseContext,
322
        traceability_info: SourceFileTraceabilityInfo,
323
        custom_tags: Optional[set[str]],
324
    ):
325
        rust_language = Language(ts_rust.language())
326
        self.parser = Parser(rust_language)  # type: ignore[call-arg, unused-ignore]
327
        self.TS_QUERY = Query(rust_language, TS_QUERY)
328
        self.input_buffer: bytes = input_buffer
329
        self.parse_context = parse_context
330
        self.traceability_info = traceability_info
331
        self.custom_tags: Optional[set[str]] = custom_tags
332
 
333
    def __call__(self) -> None:
334
        tree = self.parser.parse(self.input_buffer)
335
        cursor = QueryCursor(self.TS_QUERY)
336
        matches = cursor.matches(tree.root_node)
337
 
338
        seen_nodes = set()
339
        deferred_matches = []
340
        for query_index, captures in matches:
341
            if query_index == RustTsQuery.IDENTIFIABLE_ITEM:
342
                # The query for identifiable items overlaps with comment based queries. Move results for identifiable
343
                # items last, so that a result can be skipped if a LanguageItem was already created.
344
                deferred_matches.append(captures)
345
            elif query_index in (
346
                RustTsQuery.OUTER_DOC_COMMENT,
347
                RustTsQuery.INNER_DOC_COMMENT,
348
                RustTsQuery.INNER_DOC_COMMENT_FILEMODULE,
349
            ):
350
                assert len(captures["doc.item"]) == 1
351
                item = captures["doc.item"][0]
352
                doc_comment = captures["doc.comment"]
353
                if (
354
                    item.type == "source_file"
355
                    and "doc.item_identifier" not in captures
356
                ):
357
                    self._process_anonymous_module_comment(
358
                        doc_comment,
359
                        item,
360
                    )
361
                else:
362
                    if "doc.item_identifier" in captures:
363
                        # doc comment on named item
364
                        assert len(captures["doc.item_identifier"]) == 1
365
                        identifier = assert_cast(
366
                            captures["doc.item_identifier"][0].text, bytes
367
                        ).decode()
368
                    else:
369
                        # doc comment on anonymous item
370
                        identifier = assert_cast(item.text, bytes).decode()
371
                    self._process_doc_comment(
372
                        doc_comment,
373
                        item,
374
                        identifier,
375
                    )
376
                seen_nodes.add(item.id)
377
            elif query_index == RustTsQuery.NORMAL_COMMENT:
378
                self._process_normal_comment(captures["normal_comment"])
379
 
380
        for captures in deferred_matches:
381
            assert len(captures["doc.item"]) == 1
382
            assert len(captures["doc.item_identifier"]) == 1
383
            item = captures["doc.item"][0]
384
            if item.id not in seen_nodes:
385
                identifier = assert_cast(
386
                    captures["doc.item_identifier"][0].text, bytes
387
                ).decode()
388
                self._process_item_for_forward_relation(item, identifier)
389
 
390
    def _process_anonymous_module_comment(
391
        self, comments: list[Node], module: Node
392
    ) -> None:
393
        """
394
        Create marker, item and source nodes for file-level module from tree-sitter doc comment nodes.
395
        @relation(SDOC-LLR-164, SDOC-LLR-172, scope=function)
396
        """
397
        comment_text = comments_text_from_comment_nodes(comments)
398
        source_node = MarkerParser.parse(
399
            input_string=comment_text,
400
            line_start=1,
401
            line_end=self.parse_context.file_stats.lines_total,
402
            comment_line_start=module.start_point.row + 1,
403
            comment_byte_range=ByteRange.create_from_ts_nodes(
404
                comments[0], comments[-1]
405
            ),
406
            custom_tags=self.custom_tags,
407
            default_scope="file",
408
        )
409
        for marker_ in source_node.markers:
410
            if not isinstance(marker_, LanguageItemMarker):
411
                continue
412
            # At the top level, only accept the scope=file markers.
413
            # Everything else will be handled by functions and classes.
414
            if marker_.scope != RangeMarkerType.FILE:
415
                print(  # noqa: T201
416
                    "warning: comment to top-level module is not scope=file, ignoring"
417
                )
418
                continue
419
            language_item_marker_processor(marker_, self.parse_context)
420
            self.traceability_info.markers.append(marker_)
421
 
422
    def _process_doc_comment(
423
        self,
424
        comments: list[Node],
425
        item: Node,
426
        identifier_text: str,
427
    ) -> None:
428
        """
429
        Create markers, items and source nodes from tree-sitter doc comment nodes.
430
        @relation(SDOC-LLR-164, SDOC-LLR-172, scope=function)
431
        """
432
        assert len(comments) >= 1
433
        comment_text = comments_text_from_comment_nodes(comments)
434
        line_start_0_based = min(
435
            item.start_point[0], comments[0].start_point[0]
436
        )
437
        line_end_0_based = max(item.end_point[0], comments[-1].end_point[0])
438
        source_node = MarkerParser.parse(
439
            input_string=comment_text,
440
            line_start=line_start_0_based + 1,
441
            line_end=line_end_0_based + 1
442
            if self.input_buffer[-1] == 10
443
            else line_end_0_based,
444
            comment_line_start=comments[0].start_point[0] + 1,
445
            comment_byte_range=ByteRange.create_from_ts_nodes(
446
                comments[0], comments[-1]
447
            ),
448
            custom_tags=self.custom_tags,
449
            entity_name=identifier_text,
450
            default_scope="function",
451
        )
452
 
453
        function_markers: list[
454
            Union[
455
                LanguageItemMarker, LineMarker, RangeMarker, ForwardRangeMarker
456
            ]
457
        ] = []
458
        for marker_ in source_node.markers:
459
            if isinstance(marker_, LanguageItemMarker) and (
460
                language_item_marker_ := marker_
461
            ):
462
                if (
463
                    description := special_description(item, identifier_text)
464
                ) is not None:
465
                    language_item_marker_.set_description(description)
466
 
467
                # adds marker to context, and connects context requirements with marker
468
                language_item_marker_processor(
469
                    language_item_marker_, self.parse_context
470
                )
471
                self.traceability_info.markers.append(language_item_marker_)
472
                function_markers.append(marker_)
473
 
474
        name = self.canonical_path(item.parent, identifier_text)
475
        new_function_for_rust_item = LanguageItem(
476
            parent=self.traceability_info,
477
            name=name,
478
            display_name=name,
479
            line_begin=item.start_point[0] + 1,
480
            line_end=item.end_point[0] + 1,
481
            code_byte_range=ByteRange.create_from_ts_node(item),
482
            child_functions=[],
483
            markers=[],
484
            attributes={FunctionAttribute.DEFINITION},
485
        )
486
        if len(source_node.fields) > 0:
487
            source_node.function = new_function_for_rust_item
488
        self.traceability_info.source_nodes.append(source_node)
489
        self.traceability_info.functions.append(new_function_for_rust_item)
490
        self.traceability_info.ng_map_names_to_markers[identifier_text] = (
491
            function_markers
492
        )
493
 
494
    def _process_normal_comment(self, comments: list[Node]) -> None:
495
        """
496
        Create markers and items from tree-sitter normal comment nodes.
497
        @relation(SDOC-LLR-171, scope=function)
498
        """
499
        comment_text = comments_text_from_comment_nodes(comments)
500
        line_start_0_based = comments[0].start_point.row
501
        line_end_0_based = comments[-1].end_point.row
502
        source_node = MarkerParser.parse(
503
            input_string=comment_text,
504
            line_start=line_start_0_based + 1,
505
            line_end=line_end_0_based + 1,
506
            comment_line_start=line_start_0_based + 1,
507
            comment_byte_range=ByteRange.create_from_ts_nodes(
508
                comments[0], comments[-1]
509
            ),
510
        )
511
        for marker_ in source_node.markers:
512
            if (
513
                isinstance(marker_, LanguageItemMarker)
514
                and (marker_.scope is RangeMarkerType.FILE)
515
                and (language_item_marker := marker_)
516
            ):
517
                language_item_marker.ng_range_line_begin = 1
518
                language_item_marker.ng_range_line_end = (
519
                    self.parse_context.file_stats.lines_total
520
                )
521
                language_item_marker_processor(
522
                    language_item_marker, self.parse_context
523
                )
524
            elif isinstance(marker_, RangeMarker) and (range_marker := marker_):
525
                range_marker_processor(range_marker, self.parse_context)
526
            elif isinstance(marker_, LineMarker) and (line_marker := marker_):
527
                line_marker_processor(line_marker, self.parse_context)
528
            else:
529
                print(  # noqa: T201
530
                    "warning: Ignoring @relation. Only scope=file|line|range_start is supported in regular "
531
                    "Rust comments. Use doc comments otherwise."
532
                )
533
 
534
    def _process_item_for_forward_relation(
535
        self, item: Node, identifier: str
536
    ) -> None:
537
        """
538
        Create item objects from tree-sitter doc comment nodes to support forward relations.
539
 
540
        Corresponding markers will be created and resolved later by FileTraceabilityIndex,
541
        see validate_and_resolve.
542
 
543
        @relation(SDOC-LLR-173, scope=function)
544
        """
545
        name = self.canonical_path(item.parent, identifier)
546
        function = LanguageItem(
547
            parent=self.traceability_info,
548
            name=name,
549
            display_name=name,
550
            line_begin=item.start_point[0] + 1,
551
            line_end=max(item.end_point[0] + 1, item.start_point[0] + 2),
552
            code_byte_range=ByteRange.create_from_ts_node(item),
553
            child_functions=[],
554
            markers=[],
555
            attributes={FunctionAttribute.DEFINITION},
556
        )
557
        self.traceability_info.functions.append(function)
558
 
559
    def canonical_path(
560
        self, parent_scope: Optional[Node], item_path_segment: str
561
    ) -> str:
562
        """
563
        Construct a canonical path in best-effort.
564
        @relation(SDOC-LLR-174, scope=function)
565
        """
566
        cursor: Optional[Node] = parent_scope
567
 
568
        if (
569
            cursor is not None
570
            and cursor.type == "declaration_list"
571
            and cursor.parent is not None
572
            and cursor.parent.type == "impl_item"
573
        ):
574
            cursor = cursor.parent
575
            item_being_implemented = cursor.child_by_field_name("type")
576
            assert item_being_implemented is not None
577
            canonical_path_item_being_implemented = self.canonical_path(
578
                cursor,
579
                assert_cast(item_being_implemented.text, bytes).decode("utf-8"),
580
            )
581
            impl_trait_node = cursor.child_by_field_name("trait")
582
            if impl_trait_node is not None:
583
                # rust-lang.org: For trait implementations, [the path prefix] is the canonical path of the item being
584
                # implemented followed by as followed by the canonical path to the trait all surrounded in angle (<>)
585
                # brackets.
586
                trait = self.canonical_path(
587
                    None,
588
                    assert_cast(impl_trait_node.text, bytes).decode("utf-8"),
589
                )
590
                path_prefix = (
591
                    f"<{canonical_path_item_being_implemented} as {trait}>"
592
                )
593
            else:
594
                # rust-lang.org: For bare implementations, [the path prefix] is the canonical path of the item being
595
                # implemented surrounded by angle (<>) brackets.
596
                path_prefix = f"<{canonical_path_item_being_implemented}>"
597
        else:
598
            path_prefix_segments = []
599
            while cursor is not None:
600
                name_node = cursor.child_by_field_name("name")
601
                if name_node is not None:
602
                    name = assert_cast(name_node.text, bytes).decode("utf-8")
603
                    path_prefix_segments.append(name)
604
                cursor = cursor.parent
605
            path_prefix_segments.append(Path(self.parse_context.filename).stem)
606
            path_prefix = "::".join(reversed(path_prefix_segments))
607
 
608
        # rust-lang.org: The canonical path is defined as a path prefix appended by the path segment the item itself
609
        # defines.
610
        return f"{path_prefix}::{item_path_segment}"