diff --git a/docs/source/components/analyse.rst b/docs/source/components/analyse.rst index a6271b7..631eafd 100644 --- a/docs/source/components/analyse.rst +++ b/docs/source/components/analyse.rst @@ -47,7 +47,7 @@ Limitations **Current Limitations:** -- **Language Support**: C/C++ (``//``, ``/* */``), C# (``//``, ``/* */``, ``///``), Python (``#``), YAML (``#``), Rust (``//``, ``/* */``, ``///``) and Go (``//``, ``/* */``) comment styles are supported +- **Language Support**: C/C++ (``//``, ``/* */``), C# (``//``, ``/* */``, ``///``), Python (``#``), YAML (``#``), Rust (``//``, ``/* */``, ``///``), Go (``//``, ``/* */``) and JSONC (``//``, ``/* */``) comment styles are supported - **Single Comment Style**: Each analysis run processes only one comment style at a time Extraction Examples diff --git a/docs/source/components/configuration.rst b/docs/source/components/configuration.rst index d92650e..8da2f2a 100644 --- a/docs/source/components/configuration.rst +++ b/docs/source/components/configuration.rst @@ -271,7 +271,7 @@ Specifies the comment syntax style used in the source code files. This determine **Type:** ``str`` **Default:** ``"cpp"`` -**Supported values:** ``"cpp"``, ``"python"``, ``"cs"``, ``"yaml"``, ``"rust"``, ``"go"`` +**Supported values:** ``"cpp"``, ``"python"``, ``"cs"``, ``"yaml"``, ``"rust"``, ``"go"``, ``"jsonc"`` .. code-block:: toml @@ -320,6 +320,12 @@ Specifies the comment syntax style used in the source code files. This determine - ``//`` (single-line), ``/* */`` (multi-line) - ``.go`` + * - JSON with Comments (JSONC) + - ``"jsonc"`` + - ``//`` (single-line), + ``/* */`` (multi-line) + - ``.jsonc`` (always); ``.json`` only when the file opens with a comment + (e.g. the mode line ``// -*- mode: jsonc -*-``) .. note:: Future versions may support additional programming languages. diff --git a/docs/source/components/features.rst b/docs/source/components/features.rst index 0fd9811..c513ba1 100644 --- a/docs/source/components/features.rst +++ b/docs/source/components/features.rst @@ -207,6 +207,33 @@ Features .. fault:: Sphinx-codelinks halucinates traceability objects in Go :id: FAULT_GO_2 +.. feature:: JSONC Language Support + :id: FE_JSONC + + Support for defining traceability objects in JSON with Comments (JSONC) files. + + The JSONC parser leverages tree-sitter to identify and extract single-line (``//``) + and multi-line (``/* */``) comments from JSON data, associating each marker with the + surrounding data structure such as the key/value pair, array item, or object it + annotates. + + ``.jsonc`` files are always parsed as JSONC. A ``.json`` file is only treated as JSONC + when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``), following + the `JSONC filename convention `_. + + Key capabilities: + + * Detection of inline and leading comments + * Association of comments with key/value pairs and array items + * Support for both ``//`` and ``/* */`` comment styles + * Opt-in handling of ``.json`` files via a leading comment + + .. fault:: Traceability objects are not detected in JSONC + :id: FAULT_JSONC_1 + + .. fault:: Sphinx-codelinks hallucinates traceability objects in JSONC + :id: FAULT_JSONC_2 + .. feature:: Customized comment styles :id: FE_CMT diff --git a/pyproject.toml b/pyproject.toml index b4c1244..2c11dd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "tree-sitter-yaml>=0.7.1", "tree-sitter-rust>=0.23.0", "tree-sitter-go>=0.23.0", + "tree-sitter-json>=0.24.8", ] [build-system] diff --git a/src/sphinx_codelinks/analyse/utils.py b/src/sphinx_codelinks/analyse/utils.py index 7343926..b136be3 100644 --- a/src/sphinx_codelinks/analyse/utils.py +++ b/src/sphinx_codelinks/analyse/utils.py @@ -12,14 +12,22 @@ from sphinx_codelinks.logger import get_logger from sphinx_codelinks.source_discover.config import CommentType -# Language-specific node types for scope detection +# Language-specific node types for scope detection. +# +# YAML and JSONC are intentionally absent. They are data formats, not code, so a +# comment associates with the surrounding data structure (key/value pair, list +# item, or scalar) rather than with an enclosing or following declaration. That +# needs a different algorithm (inline same-row association first, scalar targets, +# grammar-specific traversal), implemented in find_yaml_associated_structure and +# find_jsonc_associated_structure and dispatched from find_associated_scope. +# Those bespoke finders never read this table (only find_next_scope and +# find_enclosing_scope do), so an entry here would be dead. SCOPE_NODE_TYPES = { # @Python Scope Node Types, IMPL_PY_2, impl, [FE_PY] CommentType.python: {"function_definition", "class_definition"}, # @C and C++ Scope Node Types, IMPL_C_2, impl, [FE_C_SUPPORT, FE_CPP] CommentType.cpp: {"function_definition", "class_definition"}, CommentType.cs: {"method_declaration", "class_declaration", "property_declaration"}, - CommentType.yaml: {"block_mapping_pair", "block_sequence_item", "document"}, # @Rust Scope Node Types, IMPL_RUST_2, impl, [FE_RUST]; CommentType.rust: { "function_item", @@ -65,6 +73,19 @@ GO_QUERY = """ (comment) @comment """ +JSONC_QUERY = """(comment) @comment""" + +# JSON value node types that can be associated with a comment. +JSON_STRUCTURE_TYPES = { + "pair", + "object", + "array", + "string", + "number", + "true", + "false", + "null", +} def is_text_file(filepath: Path, sample_size: int = 2048) -> bool: @@ -82,7 +103,7 @@ def is_text_file(filepath: Path, sample_size: int = 2048) -> bool: return False -# @Tree-sitter parser initialization for multiple languages, IMPL_LANG_1, impl, [FE_C_SUPPORT, FE_CPP, FE_PY, FE_YAML, FE_RUST, FE_GO] +# @Tree-sitter parser initialization for multiple languages, IMPL_LANG_1, impl, [FE_C_SUPPORT, FE_CPP, FE_PY, FE_YAML, FE_RUST, FE_GO, FE_JSONC] def init_tree_sitter(comment_type: CommentType) -> tuple[Parser, Query]: if comment_type == CommentType.cpp: import tree_sitter_cpp # noqa: PLC0415 @@ -114,6 +135,11 @@ def init_tree_sitter(comment_type: CommentType) -> tuple[Parser, Query]: parsed_language = Language(tree_sitter_go.language()) query = Query(parsed_language, GO_QUERY) + elif comment_type == CommentType.jsonc: + import tree_sitter_json # noqa: PLC0415 + + parsed_language = Language(tree_sitter_json.language()) + query = Query(parsed_language, JSONC_QUERY) else: raise ValueError(f"Unsupported comment style: {comment_type}") parser = Parser(parsed_language) @@ -213,8 +239,11 @@ def find_yaml_next_structure(node: TreeSitterNode) -> TreeSitterNode | None: return None -def find_yaml_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | None: - """Find a previous named sibling that is on the same row as the comment.""" +def find_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | None: + """Find a previous named sibling that is on the same row as the comment. + + Grammar-agnostic: used to detect inline comments in both YAML and JSONC. + """ comment_row = node.start_point.row current = node.prev_named_sibling @@ -235,7 +264,7 @@ def find_yaml_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | def find_yaml_associated_structure(node: TreeSitterNode) -> TreeSitterNode | None: """Find the YAML structure (key-value pair, list item, etc.) associated with a comment.""" # First, check if this is an inline comment by looking for a previous sibling on the same row - prev_sibling_same_row = find_yaml_prev_sibling_on_same_row(node) + prev_sibling_same_row = find_prev_sibling_on_same_row(node) if prev_sibling_same_row: return prev_sibling_same_row @@ -254,6 +283,36 @@ def find_yaml_associated_structure(node: TreeSitterNode) -> TreeSitterNode | Non return None +# @JSONC comment-to-structure association, IMPL_JSONC_2, impl, [FE_JSONC] +def find_jsonc_associated_structure(node: TreeSitterNode) -> TreeSitterNode | None: + """Find the JSON structure (key/value pair, value, list item) for a comment. + + JSON is data rather than code, so association follows the same intent as YAML: + an inline comment belongs to the value on its row, a leading comment belongs to + the following structure, otherwise it belongs to the enclosing structure. + """ + # Inline comment: a value/pair on the same row, before the comment + prev_sibling_same_row = find_prev_sibling_on_same_row(node) + if prev_sibling_same_row: + return prev_sibling_same_row + + # Leading comment: the next structure following the comment + current = node.next_named_sibling + while current: + if current.type in JSON_STRUCTURE_TYPES: + return current + current = current.next_named_sibling + + # Otherwise: the enclosing structure + parent = node.parent + while parent: + if parent.type in {"pair", "object", "array"}: + return parent + parent = parent.parent + + return None + + def find_associated_scope( node: TreeSitterNode, comment_type: CommentType = CommentType.cpp ) -> TreeSitterNode | None: @@ -262,6 +321,10 @@ def find_associated_scope( # YAML uses different structure association logic return find_yaml_associated_structure(node) + if comment_type == CommentType.jsonc: + # JSONC uses data-aware structure association logic + return find_jsonc_associated_structure(node) + if node.type == CommentCategory.docstring: # Only for python's docstring return find_enclosing_scope(node, comment_type) diff --git a/src/sphinx_codelinks/source_discover/config.py b/src/sphinx_codelinks/source_discover/config.py index aad64e4..a0ef107 100644 --- a/src/sphinx_codelinks/source_discover/config.py +++ b/src/sphinx_codelinks/source_discover/config.py @@ -12,6 +12,7 @@ "yaml": ["yml", "yaml"], "rust": ["rs"], "go": ["go"], + "jsonc": ["jsonc", "json"], } @@ -24,6 +25,8 @@ class CommentType(str, Enum): rust = "rust" # @Support Go style comments, IMPL_GO_1, impl, [FE_GO]; go = "go" + # @Support JSONC style comments, IMPL_JSONC_1, impl, [FE_JSONC]; + jsonc = "jsonc" class SourceDiscoverSectionConfigType(TypedDict, total=False): diff --git a/src/sphinx_codelinks/source_discover/source_discover.py b/src/sphinx_codelinks/source_discover/source_discover.py index 7faf154..f229464 100644 --- a/src/sphinx_codelinks/source_discover/source_discover.py +++ b/src/sphinx_codelinks/source_discover/source_discover.py @@ -6,10 +6,28 @@ from sphinx_codelinks.source_discover.config import ( COMMENT_FILETYPE, + CommentType, SourceDiscoverConfig, ) +def _json_starts_with_comment(filepath: Path, sample_size: int = 256) -> bool: + """Return True if a ``.json`` file's first non-whitespace content is a comment. + + Used to decide whether a ``.json`` file should be treated as JSONC. Per + https://jsonc.org/#filename-extension a ``.json`` file should only be treated as + JSONC when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``). + """ + try: + with filepath.open("rb") as f: + chunk = f.read(sample_size) + except OSError: + return False + # strip a leading UTF-8 BOM, then leading whitespace + text = chunk.removeprefix(b"\xef\xbb\xbf").lstrip() + return text.startswith((b"//", b"/*")) + + # @Source code file discovery with gitignore support, IMPL_DISC_1, impl, [FE_DISCOVERY, FE_CLI_DISCOVER] class SourceDiscover: def __init__(self, src_discover_config: SourceDiscoverConfig): @@ -75,6 +93,15 @@ def _discover(self) -> list[Path]: continue if self.file_types and filepath.suffix.lower() not in self.file_types: continue + # @JSONC .json files require a leading comment, IMPL_JSONC_3, impl, [FE_JSONC] + # A plain ``.json`` file is only treated as JSONC when it opens with a + # comment; otherwise it is skipped under the ``jsonc`` comment type. + if ( + self.src_discover_config.comment_type == CommentType.jsonc + and filepath.suffix.lower() == ".json" + and not _json_starts_with_comment(filepath) + ): + continue # resolve() produces canonical absolute paths; follow_links only # controls whether the walker descends into symlinked directories discovered_files.append(filepath.resolve()) diff --git a/tests/data/jsonc/demo.jsonc b/tests/data/jsonc/demo.jsonc new file mode 100644 index 0000000..5fcab37 --- /dev/null +++ b/tests/data/jsonc/demo.jsonc @@ -0,0 +1,15 @@ +// -*- mode: jsonc -*- +{ + // @JSONC alpha implementation, IMPL_JSONC_A, impl, [REQ_JSONC_1] + "alpha": 1, + "items": [ + "first", // @JSONC inline item, IMPL_JSONC_B, impl, [REQ_JSONC_2] + "second" + ], + /* Block comment with marker + @JSONC beta implementation, IMPL_JSONC_C, impl, [REQ_JSONC_3] + */ + "beta": { + "nested": true + } +} diff --git a/tests/data/jsonc/plain.json b/tests/data/jsonc/plain.json new file mode 100644 index 0000000..d086709 --- /dev/null +++ b/tests/data/jsonc/plain.json @@ -0,0 +1,3 @@ +{ + "value": 42 +} diff --git a/tests/data/jsonc/with_modeline.json b/tests/data/jsonc/with_modeline.json new file mode 100644 index 0000000..e0dc106 --- /dev/null +++ b/tests/data/jsonc/with_modeline.json @@ -0,0 +1,5 @@ +// -*- mode: jsonc -*- +{ + // @JSONC modeline file, IMPL_JSONC_D, impl, [REQ_JSONC_4] + "value": 42 +} diff --git a/tests/test_analyse.py b/tests/test_analyse.py index 9617b7b..e465a10 100644 --- a/tests/test_analyse.py +++ b/tests/test_analyse.py @@ -6,6 +6,7 @@ from sphinx_codelinks.analyse.analyse import SourceAnalyse, _count from sphinx_codelinks.config import SourceAnalyseConfig +from sphinx_codelinks.source_discover.config import CommentType from tests.conftest import ( ONELINE_COMMENT_STYLE, ONELINE_COMMENT_STYLE_DEFAULT, @@ -118,6 +119,21 @@ def test_analyse(src_dir, src_paths, tmp_path, snapshot_marks): "num_oneline_warnings": 0, }, ), + ( + TEST_DIR / "data" / "jsonc", + [ + TEST_DIR / "data" / "jsonc" / "demo.jsonc", + ], + ONELINE_COMMENT_STYLE_DEFAULT, + { + "num_src_files": 1, + "num_uncached_files": 1, + "num_cached_files": 0, + "num_comments": 4, + "num_oneline_warnings": 0, + "comment_type": CommentType.jsonc, + }, + ), ], ) def test_analyse_oneline_needs( @@ -130,6 +146,7 @@ def test_analyse_oneline_needs( get_oneline_needs=True, get_rst=False, oneline_comment_style=oneline_comment_style, + comment_type=result.get("comment_type", CommentType.cpp), ) src_analyse = SourceAnalyse(src_analyse_config) src_analyse.run() diff --git a/tests/test_analyse_utils.py b/tests/test_analyse_utils.py index 40de292..bf896a9 100644 --- a/tests/test_analyse_utils.py +++ b/tests/test_analyse_utils.py @@ -9,6 +9,7 @@ import tree_sitter_c_sharp import tree_sitter_cpp import tree_sitter_go +import tree_sitter_json import tree_sitter_python import tree_sitter_rust import tree_sitter_yaml @@ -66,6 +67,14 @@ def init_go_tree_sitter() -> tuple[Parser, Query]: return parser, query +@pytest.fixture(scope="session") +def init_jsonc_tree_sitter() -> tuple[Parser, Query]: + parsed_language = Language(tree_sitter_json.language()) + query = Query(parsed_language, utils.JSONC_QUERY) + parser = Parser(parsed_language) + return parser, query + + @pytest.mark.parametrize( ("code", "result"), [ @@ -374,6 +383,48 @@ def test_find_associated_scope_rust(code, result, init_rust_tree_sitter): assert result in rust_def +@pytest.mark.parametrize( + ("code", "result"), + [ + # leading comment is associated with the following key/value pair + ( + b'{\n // @req-id: need_001\n "alpha": 1\n}\n', + '"alpha": 1', + ), + # inline comment is associated with the array item on the same row + ( + b'{\n "items": [\n "first", // @req-id: need_001\n "second"\n ]\n}\n', + '"first"', + ), + # inline comment is associated with the pair on the same row + ( + b'{\n "alpha": 1, // @req-id: need_001\n "beta": 2\n}\n', + '"alpha": 1', + ), + # block comment is associated with the following pair + ( + b'{\n /* @req-id: need_001 */\n "beta": 2\n}\n', + '"beta": 2', + ), + # trailing comment falls back to the enclosing object + ( + b'{\n "alpha": 1\n // @req-id: need_001\n}\n', + '"alpha"', + ), + ], +) +def test_find_associated_scope_jsonc(code, result, init_jsonc_tree_sitter): + parser, query = init_jsonc_tree_sitter + comments = utils.extract_comments(code, parser, query) + node: TreeSitterNode | None = utils.find_associated_scope( + comments[0], CommentType.jsonc + ) + assert node + assert node.text + jsonc_structure = node.text.decode("utf-8") + assert result in jsonc_structure + + @pytest.mark.parametrize( ("code", "result"), [ diff --git a/tests/test_source_discover.py b/tests/test_source_discover.py index d533829..063b764 100644 --- a/tests/test_source_discover.py +++ b/tests/test_source_discover.py @@ -49,7 +49,7 @@ "comment_type": "java", }, [ - "Schema validation error in field 'comment_type': 'java' is not one of ['cpp', 'cs', 'go', 'python', 'rust', 'yaml']" + "Schema validation error in field 'comment_type': 'java' is not one of ['cpp', 'cs', 'go', 'jsonc', 'python', 'rust', 'yaml']" ], ), ( @@ -196,6 +196,18 @@ def test_comment_filetype( assert len(source_discover.source_paths) == nums_files +def test_jsonc_discover_gate() -> None: + """`.jsonc` is always discovered; `.json` only when it opens with a comment.""" + jsonc_dir = Path(__file__).parent / "data" / "jsonc" + config = SourceDiscoverConfig( + src_dir=jsonc_dir, comment_type="jsonc", gitignore=False + ) + discovered = {p.name for p in SourceDiscover(config).source_paths} + assert "demo.jsonc" in discovered + assert "with_modeline.json" in discovered + assert "plain.json" not in discovered + + def test_follow_links(tmp_path: Path) -> None: """Test that follow_links controls whether symbolic links are followed.""" # Create a real directory with a source file diff --git a/tests/test_src_trace.py b/tests/test_src_trace.py index abaa0c1..b339055 100644 --- a/tests/test_src_trace.py +++ b/tests/test_src_trace.py @@ -59,7 +59,7 @@ [ "Project 'dcdc' has the following errors:", "Schema validation error in field 'exclude': 123 is not of type 'string'", - "Schema validation error in field 'comment_type': 'java' is not one of ['cpp', 'cs', 'go', 'python', 'rust', 'yaml']", + "Schema validation error in field 'comment_type': 'java' is not one of ['cpp', 'cs', 'go', 'jsonc', 'python', 'rust', 'yaml']", "Schema validation error in field 'gitignore': '_true' is not of type 'boolean'", "Schema validation error in field 'include': 345 is not of type 'string'", "Schema validation error in field 'src_dir': ['../dcdc'] is not of type 'string'",