StrictDoc Documentation
strictdoc/core/file_system/document_finder.py
Source file coverage
Path:
strictdoc/core/file_system/document_finder.py
Lines:
325
Non-empty lines:
286
Non-empty lines covered with requirements:
286 / 286 (100.0%)
Functions:
5
Functions covered by requirements:
5 / 5 (100.0%)
1
"""
2
@relation(SDOC-SRS-104, SDOC-SRS-115, scope=file)
3
"""
4
 
5
import os
6
import sys
7
from functools import partial
8
from typing import Dict, List, Tuple, Union
9
 
10
from strictdoc.backend.markdown.reader import SDMarkdownReader
11
from strictdoc.backend.reqif.reqif_reader import ReqIFReader
12
from strictdoc.backend.sdoc.grammar_reader import SDocGrammarReader
13
from strictdoc.backend.sdoc.models.document import SDocDocument
14
from strictdoc.backend.sdoc.models.document_grammar import DocumentGrammar
15
from strictdoc.backend.sdoc.reader import SDReader
16
from strictdoc.backend.sdoc_source_code.coverage_reports.gcov import (
17
    GCovJSONReader,
18
)
19
from strictdoc.backend.sdoc_source_code.test_reports.junit_xml_reader import (
20
    JUnitXMLReader,
21
)
22
from strictdoc.backend.sdoc_source_code.test_reports.robot_xml_reader import (
23
    RobotOutputXMLReader,
24
)
25
from strictdoc.core.asset_manager import AssetManager
26
from strictdoc.core.document_meta import DocumentMeta
27
from strictdoc.core.document_tree import DocumentTree
28
from strictdoc.core.file_system.file_tree import (
29
    File,
30
    FileFinder,
31
    FileTree,
32
    Folder,
33
    PathFinder,
34
)
35
from strictdoc.core.project_config import ProjectConfig
36
from strictdoc.helpers.exception import StrictDocException
37
from strictdoc.helpers.parallelizer import Parallelizer
38
from strictdoc.helpers.paths import SDocRelativePath
39
from strictdoc.helpers.textx import drop_textx_meta
40
from strictdoc.helpers.timing import measure_performance, timing_decorator
41
 
42
 
43
class DocumentFinder:
44
    @staticmethod
45
    @timing_decorator("Find and read SDoc files")
46
    def find_sdoc_content(
47
        project_config: ProjectConfig, parallelizer: Parallelizer
48
    ) -> Tuple[DocumentTree, AssetManager]:
49
        assert project_config.input_paths is not None
50
        for paths_to_files_or_doc in project_config.input_paths:
51
            if not os.path.exists(paths_to_files_or_doc):
52
                sys.stdout.flush()
53
                raise StrictDocException(
54
                    "error: "
55
                    "Provided path is neither a single file or a folder: "
56
                    f"'{paths_to_files_or_doc}'"
57
                )
58
 
59
        with measure_performance("Completed finding SDoc and assets"):
60
            file_tree, asset_manager = DocumentFinder._build_file_tree(
61
                project_config=project_config
62
            )
63
        with measure_performance("Completed building document tree"):
64
            document_tree = DocumentFinder._build_document_tree(
65
                file_tree, project_config, parallelizer
66
            )
67
 
68
        return document_tree, asset_manager
69
 
70
    @staticmethod
71
    def _process_worker_parse_document(
72
        document_triple: Tuple[Union[Folder, File], File, str],
73
        project_config: ProjectConfig,
74
    ) -> Tuple[File, str, Union[SDocDocument, DocumentGrammar]]:
75
        _, doc_file, file_tree_mount_folder = document_triple
76
        doc_full_path = doc_file.full_path
77
 
78
        with measure_performance(
79
            f"Reading SDOC: {os.path.basename(doc_full_path)}"
80
        ):
81
            document_or_grammar: Union[SDocDocument, DocumentGrammar]
82
 
83
            # @relation(SDOC-SRS-104, scope=range_start)
84
            if doc_full_path.endswith(".sdoc"):
85
                sdoc_reader: SDReader = SDReader()
86
                document_or_grammar = sdoc_reader.read_from_file(
87
                    doc_full_path, project_config
88
                )
89
                assert isinstance(document_or_grammar, SDocDocument)
90
            elif doc_full_path.endswith(".md") or doc_full_path.endswith(
91
                ".markdown"
92
            ):
93
                markdown_reader = SDMarkdownReader()
94
                document_or_grammar = markdown_reader.read_from_file(
95
                    doc_full_path,
96
                    project_config,
97
                )
98
                assert isinstance(document_or_grammar, SDocDocument)
99
            # @relation(SDOC-SRS-104, scope=range_end)
100
 
101
            elif doc_full_path.endswith(".sgra"):
102
                sgra_reader = SDocGrammarReader()
103
                document_or_grammar = sgra_reader.read_from_file(
104
                    doc_full_path, project_config
105
                )
106
                assert isinstance(document_or_grammar, DocumentGrammar)
107
            elif doc_full_path.endswith(".reqif"):
108
                reqif_reader = ReqIFReader()
109
                reqif_documents = reqif_reader.read_from_file(doc_full_path)
110
                assert len(reqif_documents) >= 0
111
                document_or_grammar = reqif_documents[0]
112
                assert isinstance(document_or_grammar, SDocDocument)
113
            elif doc_full_path.endswith(".junit.xml"):
114
                junit_xml_reader = JUnitXMLReader()
115
                document_or_grammar = junit_xml_reader.read_from_file(
116
                    doc_file, project_config
117
                )
118
                assert isinstance(document_or_grammar, SDocDocument)
119
            elif doc_full_path.endswith(".gcov.json"):
120
                gcov_json_reader = GCovJSONReader()
121
                document_or_grammar = gcov_json_reader.read_from_file(
122
                    doc_file, project_config
123
                )
124
                assert isinstance(document_or_grammar, SDocDocument)
125
            elif doc_full_path.endswith(".robot.xml"):
126
                robot_reader = RobotOutputXMLReader()
127
                document_or_grammar = robot_reader.read_from_file(
128
                    doc_file, project_config
129
                )
130
            else:
131
                raise NotImplementedError
132
        drop_textx_meta(document_or_grammar)
133
 
134
        return doc_file, file_tree_mount_folder, document_or_grammar
135
 
136
    @staticmethod
137
    def _build_document_tree(
138
        file_trees: List[FileTree],
139
        project_config: ProjectConfig,
140
        parallelizer: Parallelizer,
141
    ) -> DocumentTree:
142
        """
143
        @relation(SDOC-SRS-48, scope=function)
144
        """
145
 
146
        assert isinstance(file_trees, list)
147
 
148
        output_root_html = project_config.export_output_html_root
149
        assert output_root_html is not None
150
 
151
        document_list: List[SDocDocument] = []
152
        map_docs_by_paths = {}
153
        map_docs_by_rel_paths: Dict[str, SDocDocument] = {}
154
        map_grammars_by_filenames = {}
155
 
156
        file_tree_list: List[Tuple[Union[Folder, File], File, str]] = []
157
        for file_tree in file_trees:
158
            file_tree_list.extend(list(file_tree.iterate()))
159
 
160
        process_document_binding = partial(
161
            DocumentFinder._process_worker_parse_document,
162
            project_config=project_config,
163
        )
164
 
165
        with measure_performance("Completed parsing all documents"):
166
            found_documents = parallelizer.run_parallel(
167
                file_tree_list, process_document_binding
168
            )
169
 
170
        doc_file: File
171
        for doc_file, file_tree_mount_folder, document in found_documents:
172
            assert isinstance(file_tree_mount_folder, str), (
173
                file_tree_mount_folder
174
            )
175
 
176
            if isinstance(document, DocumentGrammar):
177
                map_grammars_by_filenames[
178
                    doc_file.rel_path.relative_path_posix
179
                ] = document
180
                continue
181
 
182
            input_doc_full_path: str = doc_file.full_path
183
            map_docs_by_paths[input_doc_full_path] = document
184
            document_list.append(document)
185
 
186
            doc_relative_path_folder: SDocRelativePath = SDocRelativePath(
187
                os.path.dirname(doc_file.rel_path.relative_path)
188
            )
189
            output_document_dir_rel_path: SDocRelativePath = SDocRelativePath(
190
                os.path.join(
191
                    file_tree_mount_folder,
192
                    doc_relative_path_folder.relative_path,
193
                )
194
                if len(doc_relative_path_folder.relative_path) > 0
195
                else file_tree_mount_folder
196
            )
197
 
198
            document_filename = doc_file.file_name
199
            document_filename_base = os.path.splitext(document_filename)[0]
200
 
201
            output_document_dir_full_path: str = os.path.join(
202
                output_root_html, output_document_dir_rel_path.relative_path
203
            )
204
 
205
            input_doc_assets_dir_rel_path: SDocRelativePath = SDocRelativePath(
206
                os.path.join(
207
                    file_tree_mount_folder,
208
                    doc_relative_path_folder.relative_path,
209
                    "_assets",
210
                )
211
                if doc_relative_path_folder.length() > 0
212
                else "/".join((file_tree_mount_folder, "_assets"))
213
            )
214
 
215
            document_meta = DocumentMeta(
216
                doc_file.level,
217
                file_tree_mount_folder,
218
                document_filename,
219
                document_filename_base,
220
                input_doc_full_path,
221
                doc_file.rel_path,
222
                doc_relative_path_folder,
223
                input_doc_assets_dir_rel_path,
224
                output_document_dir_full_path,
225
                output_document_dir_rel_path,
226
            )
227
            document.assign_meta(document_meta)
228
 
229
            output_document_rel_path: SDocRelativePath = SDocRelativePath(
230
                os.path.join(
231
                    output_document_dir_rel_path.relative_path,
232
                    f"{document_filename_base}.html",
233
                )
234
            )
235
 
236
            map_docs_by_paths[input_doc_full_path] = document
237
            map_docs_by_rel_paths[output_document_rel_path.relative_path] = (
238
                document
239
            )
240
 
241
        return DocumentTree(
242
            file_trees,
243
            document_list,
244
            map_docs_by_paths,
245
            map_docs_by_rel_paths,
246
            map_grammars_by_filenames=map_grammars_by_filenames,
247
        )
248
 
249
    @staticmethod
250
    def _build_file_tree(
251
        project_config: ProjectConfig,
252
    ) -> Tuple[List[FileTree], AssetManager]:
253
        assert isinstance(project_config.input_paths, list)
254
        assert len(project_config.input_paths) > 0
255
 
256
        root_trees: List[FileTree] = []
257
        asset_manager = AssetManager()
258
 
259
        for path_to_doc_root_raw in project_config.input_paths:
260
            if os.path.isfile(path_to_doc_root_raw):
261
                path_to_doc_root = path_to_doc_root_raw
262
                if not os.path.isabs(path_to_doc_root):
263
                    path_to_doc_root = os.path.abspath(path_to_doc_root)
264
 
265
                parent_dir = os.path.dirname(path_to_doc_root)
266
                path_to_doc_root_base = os.path.dirname(parent_dir)
267
 
268
                assets_dir: str = os.path.join(parent_dir, "_assets")
269
                if os.path.isdir(assets_dir):
270
                    asset_manager.add_asset_dir(
271
                        full_path=assets_dir,
272
                        relative_path=SDocRelativePath(
273
                            os.path.relpath(assets_dir, path_to_doc_root_base)
274
                        ),
275
                    )
276
                root_trees.append(
277
                    FileTree.create_single_file_tree(path_to_doc_root)
278
                )
279
                continue
280
 
281
            # Strip away the trailing slash to let the later os.path.relpath
282
            # calculations work correctly.
283
            path_to_doc_root = path_to_doc_root_raw.rstrip("/")
284
            path_to_doc_root = os.path.abspath(path_to_doc_root)
285
            path_to_doc_root_base = os.path.dirname(path_to_doc_root)
286
 
287
            # Finding assets.
288
            with measure_performance("Find asset directories"):
289
                tree_asset_dirs: List[str] = PathFinder.find_directories(
290
                    path_to_doc_root,
291
                    "_assets",
292
                    include_paths=project_config.include_doc_paths,
293
                    exclude_paths=project_config.exclude_doc_paths,
294
                )
295
 
296
            for asset_dir_ in tree_asset_dirs:
297
                asset_manager.add_asset_dir(
298
                    full_path=asset_dir_,
299
                    relative_path=SDocRelativePath(
300
                        os.path.relpath(asset_dir_, path_to_doc_root_base)
301
                    ),
302
                )
303
 
304
            # Finding SDoc files.
305
            assert isinstance(project_config.output_dir, str)
306
            with measure_performance("Find SDoc files"):
307
                file_tree_structure = FileFinder.find_files_with_extensions(
308
                    root_path=path_to_doc_root,
309
                    ignored_dirs=[project_config.output_dir],
310
                    extensions=[
311
                        ".sdoc",
312
                        ".md",
313
                        ".markdown",
314
                        ".sgra",
315
                        ".reqif",
316
                        ".junit.xml",
317
                        ".gcov.json",
318
                        ".robot.xml",
319
                    ],
320
                    include_paths=project_config.include_doc_paths,
321
                    exclude_paths=project_config.exclude_doc_paths,
322
                )
323
            root_trees.append(file_tree_structure)
324
 
325
        return root_trees, asset_manager