Skip to content

Commit 8dbb402

Browse files
authored
feat(python): add knowledge graph helpers and CLI skeleton (#10)
1 parent 554a1ed commit 8dbb402

5 files changed

Lines changed: 255 additions & 0 deletions

File tree

AGENTS.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Repository Guidelines
2+
3+
## Project Structure & Module Organization
4+
- `rust/lance-graph/` hosts the Rust Cypher engine; keep new modules under `src/` and co-locate helpers inside `query/` or feature-specific submodules.
5+
- `python/src/` contains the PyO3 bridge; `python/python/lance_graph/` holds the pure-Python facade and packaging metadata.
6+
- `python/python/tests/` stores functional tests; mirror new features with targeted cases here and in the corresponding Rust module.
7+
- `examples/` demonstrates Cypher usage; update or add examples when introducing new public APIs.
8+
9+
## Build, Test, and Development Commands
10+
- `cargo check` / `cargo test --all` (run inside `rust/lance-graph`) validate Rust code paths.
11+
- `cargo bench --bench graph_execution` measures performance-critical changes; include shortened runs with `--warm-up-time 1`.
12+
- `uv venv --python 3.11 .venv` and `uv pip install -e '.[tests]'` bootstrap the Python workspace.
13+
- `maturin develop` rebuilds the extension after Rust edits; `pytest python/python/tests/ -v` exercises Python bindings.
14+
- `make lint` (in `python/`) runs `ruff`, formatting checks, and `pyright`.
15+
16+
## Coding Style & Naming Conventions
17+
- Format Rust with `cargo fmt --all`; keep modules and functions snake_case, types PascalCase, and reuse `snafu` error patterns.
18+
- Run `cargo clippy --all-targets --all-features` to catch lint regressions.
19+
- Use 4-space indentation in Python; maintain snake_case modules, CamelCase classes, and type-annotated public APIs.
20+
- Apply `ruff format python/` before committing; `ruff check` and `pyright` enforce import hygiene and typing.
21+
22+
## Testing Guidelines
23+
- Add Rust unit tests alongside implementations via `#[cfg(test)]`; prefer focused scenarios over broad integration.
24+
- Python tests belong in `python/python/tests/`; name files `test_*.py` and use markers (`gpu`, `cuda`, `integration`, `slow`) consistently.
25+
- When touching performance-sensitive code, capture representative `cargo bench` or large-table pytest timing notes in the PR.
26+
27+
## Commit & Pull Request Guidelines
28+
- Follow the existing history style (`feat(graph):`, `docs:`, `refactor(query):`), using imperative, ≤72-character subjects.
29+
- Reference issues or discussions when relevant and include brief context in the body.
30+
- PRs should describe scope, list test commands run, mention benchmark deltas when applicable, and highlight impacts on bindings or examples.

python/pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ classifiers = [
2828

2929
[tool.maturin]
3030
python-source = "python"
31+
python-packages = ["lance_graph", "knowledge_graph"]
3132

3233
[build-system]
3334
requires = ["maturin>=1.4"]
@@ -37,6 +38,9 @@ build-backend = "maturin"
3738
tests = ["pytest", "pyarrow>=14", "pandas"]
3839
dev = ["ruff", "pyright"]
3940

41+
[project.scripts]
42+
knowledge_graph = "knowledge_graph.main:main"
43+
4044
[tool.ruff]
4145
lint.select = ["F", "E", "W", "I", "G", "TCH", "PERF", "B019"]
4246

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
"""High-level helpers for working with Lance-backed knowledge graphs."""
2+
3+
from __future__ import annotations
4+
5+
from dataclasses import dataclass
6+
from typing import Dict, Mapping, Optional
7+
8+
import pyarrow as pa
9+
from lance_graph import CypherQuery, GraphConfig
10+
11+
try: # Prefer to import for typing without raising at runtime.
12+
from lance_graph import GraphConfigBuilder
13+
except ImportError: # pragma: no cover - builder is available in normal installs.
14+
GraphConfigBuilder = object # type: ignore[assignment]
15+
16+
TableMapping = Mapping[str, pa.Table]
17+
18+
19+
def _ensure_table(name: str, table: pa.Table) -> pa.Table:
20+
if not isinstance(table, pa.Table):
21+
raise TypeError(
22+
f"Dataset '{name}' must be a pyarrow.Table (got {type(table)!r})"
23+
)
24+
return table
25+
26+
27+
@dataclass(frozen=True)
28+
class KnowledgeGraph:
29+
"""Wraps a ``GraphConfig`` alongside the Arrow tables backing it."""
30+
31+
config: GraphConfig
32+
_tables: Dict[str, pa.Table]
33+
34+
def __init__(self, config: GraphConfig, datasets: TableMapping) -> None:
35+
object.__setattr__(self, "config", config)
36+
normalized = {
37+
name: _ensure_table(name, table) for name, table in datasets.items()
38+
}
39+
object.__setattr__(self, "_tables", normalized)
40+
41+
def run(
42+
self,
43+
statement: str,
44+
*,
45+
datasets: Optional[TableMapping] = None,
46+
):
47+
"""Execute a Cypher statement, overriding tables when provided."""
48+
query = CypherQuery(statement).with_config(self.config)
49+
sources: Dict[str, pa.Table] = dict(self._tables)
50+
if datasets:
51+
sources.update(
52+
{name: _ensure_table(name, table) for name, table in datasets.items()}
53+
)
54+
return query.execute(sources)
55+
56+
def tables(self) -> Dict[str, pa.Table]:
57+
"""Return a shallow copy of the registered datasets."""
58+
return dict(self._tables)
59+
60+
61+
class KnowledgeGraphBuilder:
62+
"""Collects nodes, relationships, and datasets before building a graph."""
63+
64+
def __init__(self) -> None:
65+
builder = GraphConfig.builder()
66+
self._builder: GraphConfigBuilder = builder # type: ignore[annotation-unchecked]
67+
self._datasets: Dict[str, pa.Table] = {}
68+
69+
def with_node(
70+
self,
71+
label: str,
72+
primary_key: str,
73+
table: pa.Table,
74+
) -> KnowledgeGraphBuilder:
75+
"""Register a node label and Arrow table."""
76+
self._builder = self._builder.with_node_label(label, primary_key)
77+
self._datasets[label] = _ensure_table(label, table)
78+
return self
79+
80+
def with_relationship(
81+
self,
82+
name: str,
83+
source_key: str,
84+
target_key: str,
85+
table: pa.Table,
86+
) -> KnowledgeGraphBuilder:
87+
"""Register a relationship and its underlying table."""
88+
self._builder = self._builder.with_relationship(name, source_key, target_key)
89+
self._datasets[name] = _ensure_table(name, table)
90+
return self
91+
92+
def with_dataset(self, name: str, table: pa.Table) -> KnowledgeGraphBuilder:
93+
"""Attach arbitrary supporting datasets (e.g., reference tables)."""
94+
self._datasets[name] = _ensure_table(name, table)
95+
return self
96+
97+
def build(self) -> KnowledgeGraph:
98+
"""Materialize the ``KnowledgeGraph`` instance."""
99+
config = self._builder.build()
100+
return KnowledgeGraph(config, self._datasets)
101+
102+
103+
__all__ = ["KnowledgeGraph", "KnowledgeGraphBuilder"]
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""Executable module wrapper for `python -m knowledge_graph`."""
2+
3+
from __future__ import annotations
4+
5+
from .main import main
6+
7+
if __name__ == "__main__":
8+
raise SystemExit(main())
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""Command line interface for the knowledge_graph helpers."""
2+
3+
from __future__ import annotations
4+
5+
import argparse
6+
from pathlib import Path
7+
from typing import Optional, Sequence
8+
9+
10+
def init_graph() -> None:
11+
"""Initialize storage for the knowledge graph."""
12+
pass
13+
14+
15+
def run_interactive() -> None:
16+
"""Enter an interactive shell for issuing commands."""
17+
pass
18+
19+
20+
def execute_query(text: str) -> None:
21+
"""Execute a single knowledge graph query."""
22+
del text # placeholder until implementation
23+
24+
25+
def preview_extraction(path: Path) -> None:
26+
"""Preview extracted knowledge from a text source."""
27+
del path # placeholder until implementation
28+
29+
30+
def extract_and_add(path: Path) -> None:
31+
"""Extract knowledge and append it to the backing graph."""
32+
del path # placeholder until implementation
33+
34+
35+
def ask_question(question: str) -> None:
36+
"""Answer a natural-language question using the graph."""
37+
del question # placeholder until implementation
38+
39+
40+
def _build_parser() -> argparse.ArgumentParser:
41+
parser = argparse.ArgumentParser(
42+
prog="knowledge_graph",
43+
description="Operate the Lance-backed knowledge graph.",
44+
)
45+
group = parser.add_mutually_exclusive_group()
46+
group.add_argument(
47+
"--init",
48+
action="store_true",
49+
help="Initialize the knowledge graph storage.",
50+
)
51+
group.add_argument(
52+
"--extract-preview",
53+
metavar="PATH",
54+
help="Preview extracted entities and relations from a text file.",
55+
)
56+
group.add_argument(
57+
"--extract-and-add",
58+
metavar="PATH",
59+
help="Extract and insert knowledge from a text file.",
60+
)
61+
group.add_argument(
62+
"--ask",
63+
metavar="QUESTION",
64+
help="Ask a natural-language question over the knowledge graph.",
65+
)
66+
parser.add_argument(
67+
"query",
68+
nargs="?",
69+
help="Execute a single Cypher or semantic query.",
70+
)
71+
return parser
72+
73+
74+
def main(argv: Optional[Sequence[str]] = None) -> int:
75+
parser = _build_parser()
76+
args = parser.parse_args(argv)
77+
78+
exclusive_args = any(
79+
[
80+
args.init,
81+
args.extract_preview is not None,
82+
args.extract_and_add is not None,
83+
args.ask is not None,
84+
]
85+
)
86+
if args.query and exclusive_args:
87+
parser.error("Query argument cannot be combined with flags.")
88+
89+
if args.init:
90+
init_graph()
91+
return 0
92+
if args.extract_preview:
93+
preview_extraction(Path(args.extract_preview))
94+
return 0
95+
if args.extract_and_add:
96+
extract_and_add(Path(args.extract_and_add))
97+
return 0
98+
if args.ask:
99+
ask_question(args.ask)
100+
return 0
101+
if args.query:
102+
execute_query(args.query)
103+
return 0
104+
105+
run_interactive()
106+
return 0
107+
108+
109+
if __name__ == "__main__":
110+
raise SystemExit(main())

0 commit comments

Comments
 (0)