StrictDoc Documentation
strictdoc/commands/manage_autouid_command.py
Source file coverage
Path:
strictdoc/commands/manage_autouid_command.py
Lines:
322
Non-empty lines:
277
Non-empty lines covered with requirements:
277 / 277 (100.0%)
Functions:
6
Functions covered by requirements:
6 / 6 (100.0%)
1
import argparse
2
import sys
3
from typing import Dict, Optional
4
 
5
from strictdoc.backend.sdoc.errors.document_tree_error import DocumentTreeError
6
from strictdoc.backend.sdoc.models.node import SDocNode
7
from strictdoc.backend.sdoc.writer import SDWriter
8
from strictdoc.backend.sdoc_source_code.marker_writer import MarkerWriter
9
from strictdoc.backend.sdoc_source_code.models.source_file_info import (
10
    SourceFileTraceabilityInfo,
11
)
12
from strictdoc.backend.sdoc_source_code.source_writer import SourceWriter
13
from strictdoc.cli.base_command import BaseCommand, CLIValidationError
14
from strictdoc.commands.manage_autouid_config import ManageAutoUIDCommandConfig
15
from strictdoc.core.analyzers.document_stats import (
16
    DocumentStats,
17
    DocumentTreeStats,
18
)
19
from strictdoc.core.analyzers.document_uid_analyzer import DocumentUIDAnalyzer
20
from strictdoc.core.project_config import ProjectConfig, ProjectConfigLoader
21
from strictdoc.core.traceability_index import TraceabilityIndex
22
from strictdoc.core.traceability_index_builder import TraceabilityIndexBuilder
23
from strictdoc.helpers.parallelizer import Parallelizer
24
from strictdoc.helpers.sha256 import get_random_sha256, get_sha256, is_sha256
25
from strictdoc.helpers.string import (
26
    create_safe_acronym,
27
)
28
 
29
 
30
def generate_code_hash(
31
    *, project: bytes, file_path: bytes, instance: bytes, code: bytes
32
) -> bytes:
33
    """
34
    Generate hash for drift detection as suggested by Linux kernel requirements template:
35
 
36
    "${PROJECT}${FILE_PATH}${INSTANCE}${CODE}" | sha256sum".
37
    """
38
 
39
    assert isinstance(project, bytes)
40
    assert isinstance(file_path, bytes)
41
    assert isinstance(instance, bytes)
42
    assert isinstance(code, bytes)
43
 
44
    hash_input = project + file_path + instance + code
45
    return bytes(get_sha256(hash_input), encoding="utf8")
46
 
47
 
48
class ManageAutoUIDCommand(BaseCommand):
49
    HELP = "Generates missing requirements UIDs automatically."
50
    DETAILED_HELP = """\
51
This command generates missing requirement UID automatically.
52
The UIDs are generated based on the nearest section PREFIX (if provided) or
53
the document's PREFIX (if provided or "REQ-" by default).
54
"""
55
 
56
    @classmethod
57
    def add_arguments(cls, parser: argparse.ArgumentParser) -> None:
58
        command_parser_auto_uid = parser
59
 
60
        command_parser_auto_uid.add_argument(
61
            "input_path",
62
            type=str,
63
            help="Path to the project tree.",
64
        )
65
        command_parser_auto_uid.add_argument(
66
            "--include-sections",
67
            action="store_true",
68
            help=(
69
                "By default, the command only generates the UID for "
70
                "requirements. This option enables the generation of UID for "
71
                "sections."
72
            ),
73
        )
74
        parser.add_argument(
75
            "--config",
76
            type=str,
77
            help="Path to the StrictDoc TOML config file.",
78
        )
79
 
80
    def __init__(self, args: argparse.Namespace) -> None:
81
        self.args = args
82
        self.config: ManageAutoUIDCommandConfig = ManageAutoUIDCommandConfig(
83
            **vars(args)
84
        )
85
 
86
    def run(self, parallelizer: Parallelizer) -> None:  # noqa: ARG002
87
        """
88
        @relation(SDOC-SRS-85, scope=function)
89
        """
90
 
91
        manage_config: ManageAutoUIDCommandConfig = self.config
92
        try:
93
            manage_config.validate()
94
        except CLIValidationError as exception_:
95
            raise exception_
96
 
97
        project_config = ProjectConfigLoader.load_using_manage_autouid_config(
98
            manage_config
99
        )
100
 
101
        try:
102
            traceability_index: TraceabilityIndex = (
103
                TraceabilityIndexBuilder.create(
104
                    project_config=project_config,
105
                    parallelizer=parallelizer,
106
                )
107
            )
108
        except DocumentTreeError as exc:
109
            print(exc.to_print_message())  # noqa: T201
110
            sys.exit(1)
111
 
112
        document_tree_stats: DocumentTreeStats = (
113
            DocumentUIDAnalyzer.analyze_document_tree(traceability_index)
114
        )
115
 
116
        if project_config.autouuid_include_sections:
117
            document_stats_: DocumentStats
118
            for document_stats_ in document_tree_stats.single_document_stats:
119
                document_acronym = create_safe_acronym(
120
                    document_stats_.document.title
121
                )
122
                for section in document_stats_.sections_without_uid:
123
                    auto_uid = document_tree_stats.get_auto_section_uid(
124
                        document_acronym, section
125
                    )
126
                    section.reserved_uid = auto_uid
127
 
128
        for (
129
            prefix,
130
            prefix_requirements,
131
        ) in document_tree_stats.requirements_per_prefix.items():
132
            next_number = document_tree_stats.get_next_requirement_uid_number(
133
                prefix
134
            )
135
 
136
            for requirement in prefix_requirements.requirements_no_uid:
137
                requirement_prefix = requirement.get_prefix()
138
                requirement_uid = f"{requirement_prefix}{next_number}"
139
                requirement.set_field_value(
140
                    field_name="UID",
141
                    form_field_index=0,
142
                    value=requirement_uid,
143
                )
144
                next_number += 1
145
 
146
        for (
147
            trace_info_
148
        ) in traceability_index.get_file_traceability_index().trace_infos:
149
            ManageAutoUIDCommand._rewrite_source_file(
150
                trace_info_,
151
                project_config,
152
                traceability_index=traceability_index,
153
            )
154
 
155
        for document in traceability_index.document_tree.document_list:
156
            assert document.meta is not None
157
 
158
            # Most recently, we parse JUnit XML, Gcov JSON files or SDoc
159
            # documents generated from source code comments.
160
            # These must not be written back.
161
            if (
162
                not document.meta.document_filename.endswith(".sdoc")
163
                or document.autogen
164
            ):
165
                continue
166
 
167
            document_content = SDWriter(project_config).write(document)
168
            document_meta = document.meta
169
            with open(
170
                document_meta.input_doc_full_path, "w", encoding="utf8"
171
            ) as output_file:
172
                output_file.write(document_content)
173
 
174
    @staticmethod
175
    def _rewrite_source_file(
176
        trace_info: SourceFileTraceabilityInfo,
177
        project_config: ProjectConfig,
178
        traceability_index: TraceabilityIndex,
179
    ) -> None:
180
        """
181
        NOTE: This updates:
182
              - The links in graph database.
183
              - The source code with the new calculated value.
184
              This DOES NOT update MID in the search index built for each
185
              document in SDocDocument.build_search_index(). The assumption is
186
              that the search index is not used by the 'manage autouid' command,
187
              so updating of the document search indexes can be skipped.
188
        """
189
 
190
        assert trace_info.source_file is not None
191
        # FIXME: These conditions for skipping the writes may be insufficient.
192
        if (
193
            not trace_info.source_file.in_doctree_source_file_rel_path_posix.endswith(
194
                ".c"
195
            )
196
            or not trace_info.source_file.is_referenced
197
        ):
198
            return
199
 
200
        with open(trace_info.source_file.full_path, "rb") as source_file_:
201
            file_bytes = source_file_.read()
202
 
203
        field_remapped_mid = "MID"
204
 
205
        relevant_source_node_config = (
206
            project_config.get_relevant_source_nodes_entry(
207
                trace_info.source_file.in_doctree_source_file_rel_path_posix
208
            )
209
        )
210
        if relevant_source_node_config is not None:
211
            field_remapped_mid = (
212
                relevant_source_node_config.sdoc_to_source_map.get("MID", "MID")
213
            )
214
 
215
        file_rewrites = {}
216
        for source_node_ in trace_info.source_nodes:
217
            function = source_node_.function
218
            if function is None or function.code_byte_range is None:
219
                continue
220
 
221
            # Not all source readers create rewritable byte ranges. There is
222
            # nothing to rewrite for such nodes. Skipping them here.
223
            if source_node_.comment_byte_range is None:
224
                continue
225
 
226
            if field_remapped_mid not in source_node_.fields:
227
                continue
228
 
229
            node_rewrites: Dict[str, bytes] = {}
230
 
231
            # If the source node has the MID (SPDX-REQ-ID), but it is not yet a
232
            # valid SHA256 identifier, create one and patch the node.
233
            existing_req_id = source_node_.fields[field_remapped_mid]
234
            if not is_sha256(existing_req_id):
235
                hash_spdx_id_str = get_random_sha256()
236
                hash_spdx_id = bytes(hash_spdx_id_str, encoding="utf8")
237
 
238
                if (sdoc_node_ := source_node_.sdoc_node) is not None:
239
                    traceability_index.update_node_mid(
240
                        sdoc_node_, hash_spdx_id_str
241
                    )
242
                node_rewrites[field_remapped_mid] = hash_spdx_id
243
 
244
                patched_node = MarkerWriter().write(
245
                    source_node_,
246
                    rewrites=node_rewrites,
247
                    comment_file_bytes=file_bytes[
248
                        source_node_.comment_byte_range.start : source_node_.comment_byte_range.end
249
                    ],
250
                )
251
                file_rewrites[source_node_] = patched_node
252
 
253
            # If a source node has no sidecar SDoc node attached, there is
254
            # nothing else to do.
255
            if source_node_.sdoc_node is None:
256
                continue
257
 
258
            #
259
            # The following is only applicable to the Linux Kernel Requirements
260
            # Template proposal:
261
            #
262
            # Generate HASH field if it is not present. The HASH field is only
263
            # generated for SDoc nodes, the source code nodes are not modified.
264
            #
265
            sdoc_node: SDocNode = source_node_.sdoc_node
266
 
267
            existing_req_hash: Optional[str] = None
268
            if "HASH" in sdoc_node.ordered_fields_lookup:
269
                hash_field = sdoc_node.get_field_by_name("HASH")
270
                existing_req_hash = hash_field.get_text_value()
271
 
272
            if existing_req_hash is None or not is_sha256(existing_req_hash):
273
                # FILE_PATH: The file the code resides in, relative to the root of the project repository.
274
                file_path = bytes(
275
                    trace_info.source_file.in_doctree_source_file_rel_path_posix,
276
                    encoding="utf8",
277
                )
278
 
279
                # INSTANCE:	The requirement template instance, minus tags with hash strings.
280
                instance_bytes = bytearray()
281
                for (
282
                    field_name_,
283
                    field_values_,
284
                ) in sdoc_node.ordered_fields_lookup.items():
285
                    if field_name_ in ("MID", "HASH"):
286
                        continue
287
                    for field_value_ in field_values_:
288
                        instance_bytes += bytes(
289
                            field_value_.get_text_value(), encoding="utf8"
290
                        )
291
 
292
                # CODE: The code that the node hash applies to.
293
                code = file_bytes[
294
                    function.code_byte_range.start : function.code_byte_range.end
295
                ]
296
 
297
                # This is important for Windows. Otherwise, the hash key will be calculated incorrectly.
298
                instance_bytes = instance_bytes.replace(b"\r\n", b"\n")
299
                code = code.replace(b"\r\n", b"\n")
300
 
301
                hash_spdx_hash = generate_code_hash(
302
                    project=bytes(
303
                        project_config.project_title, encoding="utf8"
304
                    ),
305
                    file_path=file_path,
306
                    instance=bytes(instance_bytes),
307
                    code=code,
308
                )
309
                hash_spdx_hash_str = hash_spdx_hash.decode("utf8")
310
                sdoc_node.set_field_value(
311
                    field_name="HASH",
312
                    form_field_index=0,
313
                    value=hash_spdx_hash_str,
314
                )
315
 
316
        source_writer = SourceWriter()
317
        output_string = source_writer.write(
318
            trace_info, rewrites=file_rewrites, file_bytes=file_bytes
319
        )
320
 
321
        with open(trace_info.source_file.full_path, "wb") as source_file_:
322
            source_file_.write(output_string)