Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/components/analyse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Limitations

**Current Limitations:**

- **Language Support**: C/C++ (``//``, ``/* */``), C# (``//``, ``/* */``, ``///``), Python (``#``), YAML (``#``), Rust (``//``, ``/* */``, ``///``) and Go (``//``, ``/* */``) comment styles are supported
- **Language Support**: C/C++ (``//``, ``/* */``), C# (``//``, ``/* */``, ``///``), Python (``#``), YAML (``#``), Rust (``//``, ``/* */``, ``///``), Go (``//``, ``/* */``) and JSONC (``//``, ``/* */``) comment styles are supported
- **Single Comment Style**: Each analysis run processes only one comment style at a time

Extraction Examples
Expand Down
8 changes: 7 additions & 1 deletion docs/source/components/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ Specifies the comment syntax style used in the source code files. This determine

**Type:** ``str``
**Default:** ``"cpp"``
**Supported values:** ``"cpp"``, ``"python"``, ``"cs"``, ``"yaml"``, ``"rust"``, ``"go"``
**Supported values:** ``"cpp"``, ``"python"``, ``"cs"``, ``"yaml"``, ``"rust"``, ``"go"``, ``"jsonc"``

.. code-block:: toml

Expand Down Expand Up @@ -320,6 +320,12 @@ Specifies the comment syntax style used in the source code files. This determine
- ``//`` (single-line),
``/* */`` (multi-line)
- ``.go``
* - JSON with Comments (JSONC)
- ``"jsonc"``
- ``//`` (single-line),
``/* */`` (multi-line)
- ``.jsonc`` (always); ``.json`` only when the file opens with a comment
(e.g. the mode line ``// -*- mode: jsonc -*-``)

.. note:: Future versions may support additional programming languages.

Expand Down
27 changes: 27 additions & 0 deletions docs/source/components/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,33 @@ Features
.. fault:: Sphinx-codelinks halucinates traceability objects in Go
:id: FAULT_GO_2

.. feature:: JSONC Language Support
:id: FE_JSONC

Support for defining traceability objects in JSON with Comments (JSONC) files.

The JSONC parser leverages tree-sitter to identify and extract single-line (``//``)
and multi-line (``/* */``) comments from JSON data, associating each marker with the
surrounding data structure such as the key/value pair, array item, or object it
annotates.

``.jsonc`` files are always parsed as JSONC. A ``.json`` file is only treated as JSONC
when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``), following
the `JSONC filename convention <https://jsonc.org/#filename-extension>`_.

Key capabilities:

* Detection of inline and leading comments
* Association of comments with key/value pairs and array items
* Support for both ``//`` and ``/* */`` comment styles
* Opt-in handling of ``.json`` files via a leading comment

.. fault:: Traceability objects are not detected in JSONC
:id: FAULT_JSONC_1

.. fault:: Sphinx-codelinks hallucinates traceability objects in JSONC
:id: FAULT_JSONC_2

.. feature:: Customized comment styles
:id: FE_CMT

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = [
"tree-sitter-yaml>=0.7.1",
"tree-sitter-rust>=0.23.0",
"tree-sitter-go>=0.23.0",
"tree-sitter-json>=0.24.8",
]

[build-system]
Expand Down
75 changes: 69 additions & 6 deletions src/sphinx_codelinks/analyse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,22 @@
from sphinx_codelinks.logger import get_logger
from sphinx_codelinks.source_discover.config import CommentType

# Language-specific node types for scope detection
# Language-specific node types for scope detection.
#
# YAML and JSONC are intentionally absent. They are data formats, not code, so a
# comment associates with the surrounding data structure (key/value pair, list
# item, or scalar) rather than with an enclosing or following declaration. That
# needs a different algorithm (inline same-row association first, scalar targets,
# grammar-specific traversal), implemented in find_yaml_associated_structure and
# find_jsonc_associated_structure and dispatched from find_associated_scope.
# Those bespoke finders never read this table (only find_next_scope and
# find_enclosing_scope do), so an entry here would be dead.
SCOPE_NODE_TYPES = {
# @Python Scope Node Types, IMPL_PY_2, impl, [FE_PY]
CommentType.python: {"function_definition", "class_definition"},
# @C and C++ Scope Node Types, IMPL_C_2, impl, [FE_C_SUPPORT, FE_CPP]
CommentType.cpp: {"function_definition", "class_definition"},
CommentType.cs: {"method_declaration", "class_declaration", "property_declaration"},
CommentType.yaml: {"block_mapping_pair", "block_sequence_item", "document"},
# @Rust Scope Node Types, IMPL_RUST_2, impl, [FE_RUST];
CommentType.rust: {
"function_item",
Expand Down Expand Up @@ -65,6 +73,19 @@
GO_QUERY = """
(comment) @comment
"""
JSONC_QUERY = """(comment) @comment"""

# JSON value node types that can be associated with a comment.
JSON_STRUCTURE_TYPES = {
"pair",
"object",
"array",
"string",
"number",
"true",
"false",
"null",
}


def is_text_file(filepath: Path, sample_size: int = 2048) -> bool:
Expand All @@ -82,7 +103,7 @@ def is_text_file(filepath: Path, sample_size: int = 2048) -> bool:
return False


# @Tree-sitter parser initialization for multiple languages, IMPL_LANG_1, impl, [FE_C_SUPPORT, FE_CPP, FE_PY, FE_YAML, FE_RUST, FE_GO]
# @Tree-sitter parser initialization for multiple languages, IMPL_LANG_1, impl, [FE_C_SUPPORT, FE_CPP, FE_PY, FE_YAML, FE_RUST, FE_GO, FE_JSONC]
def init_tree_sitter(comment_type: CommentType) -> tuple[Parser, Query]:
if comment_type == CommentType.cpp:
import tree_sitter_cpp # noqa: PLC0415
Expand Down Expand Up @@ -114,6 +135,11 @@ def init_tree_sitter(comment_type: CommentType) -> tuple[Parser, Query]:

parsed_language = Language(tree_sitter_go.language())
query = Query(parsed_language, GO_QUERY)
elif comment_type == CommentType.jsonc:
import tree_sitter_json # noqa: PLC0415

parsed_language = Language(tree_sitter_json.language())
query = Query(parsed_language, JSONC_QUERY)
else:
raise ValueError(f"Unsupported comment style: {comment_type}")
parser = Parser(parsed_language)
Expand Down Expand Up @@ -213,8 +239,11 @@ def find_yaml_next_structure(node: TreeSitterNode) -> TreeSitterNode | None:
return None


def find_yaml_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | None:
"""Find a previous named sibling that is on the same row as the comment."""
def find_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | None:
"""Find a previous named sibling that is on the same row as the comment.

Grammar-agnostic: used to detect inline comments in both YAML and JSONC.
"""
comment_row = node.start_point.row
current = node.prev_named_sibling

Expand All @@ -235,7 +264,7 @@ def find_yaml_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode |
def find_yaml_associated_structure(node: TreeSitterNode) -> TreeSitterNode | None:
"""Find the YAML structure (key-value pair, list item, etc.) associated with a comment."""
# First, check if this is an inline comment by looking for a previous sibling on the same row
prev_sibling_same_row = find_yaml_prev_sibling_on_same_row(node)
prev_sibling_same_row = find_prev_sibling_on_same_row(node)
if prev_sibling_same_row:
return prev_sibling_same_row

Expand All @@ -254,6 +283,36 @@ def find_yaml_associated_structure(node: TreeSitterNode) -> TreeSitterNode | Non
return None


# @JSONC comment-to-structure association, IMPL_JSONC_2, impl, [FE_JSONC]
def find_jsonc_associated_structure(node: TreeSitterNode) -> TreeSitterNode | None:
"""Find the JSON structure (key/value pair, value, list item) for a comment.

JSON is data rather than code, so association follows the same intent as YAML:
an inline comment belongs to the value on its row, a leading comment belongs to
the following structure, otherwise it belongs to the enclosing structure.
"""
# Inline comment: a value/pair on the same row, before the comment
prev_sibling_same_row = find_prev_sibling_on_same_row(node)
if prev_sibling_same_row:
return prev_sibling_same_row

# Leading comment: the next structure following the comment
current = node.next_named_sibling
while current:
if current.type in JSON_STRUCTURE_TYPES:
return current
current = current.next_named_sibling

# Otherwise: the enclosing structure
parent = node.parent
while parent:
if parent.type in {"pair", "object", "array"}:
return parent
parent = parent.parent

return None


def find_associated_scope(
node: TreeSitterNode, comment_type: CommentType = CommentType.cpp
) -> TreeSitterNode | None:
Expand All @@ -262,6 +321,10 @@ def find_associated_scope(
# YAML uses different structure association logic
return find_yaml_associated_structure(node)

if comment_type == CommentType.jsonc:
# JSONC uses data-aware structure association logic
return find_jsonc_associated_structure(node)

if node.type == CommentCategory.docstring:
# Only for python's docstring
return find_enclosing_scope(node, comment_type)
Expand Down
3 changes: 3 additions & 0 deletions src/sphinx_codelinks/source_discover/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"yaml": ["yml", "yaml"],
"rust": ["rs"],
"go": ["go"],
"jsonc": ["jsonc", "json"],
}


Expand All @@ -24,6 +25,8 @@ class CommentType(str, Enum):
rust = "rust"
# @Support Go style comments, IMPL_GO_1, impl, [FE_GO];
go = "go"
# @Support JSONC style comments, IMPL_JSONC_1, impl, [FE_JSONC];
jsonc = "jsonc"


class SourceDiscoverSectionConfigType(TypedDict, total=False):
Expand Down
27 changes: 27 additions & 0 deletions src/sphinx_codelinks/source_discover/source_discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,28 @@

from sphinx_codelinks.source_discover.config import (
COMMENT_FILETYPE,
CommentType,
SourceDiscoverConfig,
)


def _json_starts_with_comment(filepath: Path, sample_size: int = 256) -> bool:
"""Return True if a ``.json`` file's first non-whitespace content is a comment.

Used to decide whether a ``.json`` file should be treated as JSONC. Per
https://jsonc.org/#filename-extension a ``.json`` file should only be treated as
JSONC when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``).
"""
try:
with filepath.open("rb") as f:
chunk = f.read(sample_size)
except OSError:
return False
# strip a leading UTF-8 BOM, then leading whitespace
text = chunk.removeprefix(b"\xef\xbb\xbf").lstrip()
return text.startswith((b"//", b"/*"))


# @Source code file discovery with gitignore support, IMPL_DISC_1, impl, [FE_DISCOVERY, FE_CLI_DISCOVER]
class SourceDiscover:
def __init__(self, src_discover_config: SourceDiscoverConfig):
Expand Down Expand Up @@ -75,6 +93,15 @@ def _discover(self) -> list[Path]:
continue
if self.file_types and filepath.suffix.lower() not in self.file_types:
continue
# @JSONC .json files require a leading comment, IMPL_JSONC_3, impl, [FE_JSONC]
# A plain ``.json`` file is only treated as JSONC when it opens with a
# comment; otherwise it is skipped under the ``jsonc`` comment type.
if (
self.src_discover_config.comment_type == CommentType.jsonc
and filepath.suffix.lower() == ".json"
and not _json_starts_with_comment(filepath)
):
continue
# resolve() produces canonical absolute paths; follow_links only
# controls whether the walker descends into symlinked directories
discovered_files.append(filepath.resolve())
Expand Down
15 changes: 15 additions & 0 deletions tests/data/jsonc/demo.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// -*- mode: jsonc -*-
{
// @JSONC alpha implementation, IMPL_JSONC_A, impl, [REQ_JSONC_1]
"alpha": 1,
"items": [
"first", // @JSONC inline item, IMPL_JSONC_B, impl, [REQ_JSONC_2]
"second"
],
/* Block comment with marker
@JSONC beta implementation, IMPL_JSONC_C, impl, [REQ_JSONC_3]
*/
"beta": {
"nested": true
}
}
3 changes: 3 additions & 0 deletions tests/data/jsonc/plain.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"value": 42
}
5 changes: 5 additions & 0 deletions tests/data/jsonc/with_modeline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// -*- mode: jsonc -*-
{
// @JSONC modeline file, IMPL_JSONC_D, impl, [REQ_JSONC_4]
"value": 42
}
17 changes: 17 additions & 0 deletions tests/test_analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from sphinx_codelinks.analyse.analyse import SourceAnalyse, _count
from sphinx_codelinks.config import SourceAnalyseConfig
from sphinx_codelinks.source_discover.config import CommentType
from tests.conftest import (
ONELINE_COMMENT_STYLE,
ONELINE_COMMENT_STYLE_DEFAULT,
Expand Down Expand Up @@ -118,6 +119,21 @@ def test_analyse(src_dir, src_paths, tmp_path, snapshot_marks):
"num_oneline_warnings": 0,
},
),
(
TEST_DIR / "data" / "jsonc",
[
TEST_DIR / "data" / "jsonc" / "demo.jsonc",
],
ONELINE_COMMENT_STYLE_DEFAULT,
{
"num_src_files": 1,
"num_uncached_files": 1,
"num_cached_files": 0,
"num_comments": 4,
"num_oneline_warnings": 0,
"comment_type": CommentType.jsonc,
},
),
],
)
def test_analyse_oneline_needs(
Expand All @@ -130,6 +146,7 @@ def test_analyse_oneline_needs(
get_oneline_needs=True,
get_rst=False,
oneline_comment_style=oneline_comment_style,
comment_type=result.get("comment_type", CommentType.cpp),
)
src_analyse = SourceAnalyse(src_analyse_config)
src_analyse.run()
Expand Down
Loading
Loading