Skip to content

Commit 921dccd

Browse files
DvirDukhanCopilot
andcommitted
feat(graph): per-branch graph identity (T17 #651)
Refactor FalkorDB graph naming so each (project, branch) pair gets its own graph: 'code:{project}:{branch}'. This lets concurrent agents working on different branches of the same repo index in parallel without overwriting each other. Changes: - api/graph.py: add DEFAULT_BRANCH, compose_graph_name(), parse_graph_name(); Graph and AsyncGraphQuery constructors now accept (name, branch=None); Graph.from_raw_name() classmethod for internal callers that need to bypass composition (e.g. clone()); get_repos()/async_get_repos() now return {project, branch, graph} dicts. - api/info.py: branch-aware Redis hash keys ('{repo}:{branch}_info'); reads fall back to legacy '{repo}_info' for un-migrated graphs. - api/git_utils: GitRepoName() and switch_commit() thread branch through; LegacyGitRepoName() retained for the migration helper. - api/project.py: detect_branch() via 'git rev-parse --abbrev-ref HEAD'; Project.__init__ / from_git_repository / from_local_repository accept branch. - api/index.py: all Pydantic request models gain 'branch: Optional[str]'; endpoints thread it into AsyncGraphQuery + info functions; responses include 'branch'. - api/cli.py: --branch flag on index / index-repo / search / neighbors / paths / info; new 'cgraph migrate' command. - api/migrations/per_branch.py (NEW): idempotent migration that renames legacy '<project>' graphs to 'code:<project>:_default', '{<project>}_info' Redis keys to '{<project>}:_default_info', and '{<project>}_git' graphs to '{<project>}:_default_git'. Supports --dry-run. Tests: - tests/test_per_branch_graphs.py (NEW): 24 unit tests covering compose/parse helpers, Graph constructor branch awareness, AsyncGraphQuery, info-key shape, GitRepoName shape, and migration idempotency (with mocked FalkorDB). - tests/test_async_graph.py, tests/test_cli.py, tests/endpoints/test_list_repos.py: updated assertions for the new dict return shape from get_repos / async_get_repos. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent db71080 commit 921dccd

17 files changed

Lines changed: 872 additions & 118 deletions

File tree

api/analyzers/source_analyzer.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,21 +208,26 @@ def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]]
208208

209209
logging.info("Done analyzing path")
210210

211-
def analyze_local_repository(self, path: str, ignore: Optional[list[str]] = None) -> Graph:
211+
def analyze_local_repository(self, path: str, ignore: Optional[list[str]] = None, branch: Optional[str] = None) -> Graph:
212212
"""
213213
Analyze a local Git repository.
214214
215215
Args:
216216
path (str): Path to a local git repository
217217
ignore (List(str)): List of paths to skip
218+
branch (Optional[str]): Branch name. Auto-detected from the
219+
checkout when ``None``.
218220
"""
219221
if ignore is None:
220222
ignore = []
221223

222224
from pygit2.repository import Repository
225+
from ..project import detect_branch
223226

224227
proj_name = Path(path).name
225-
graph = Graph(proj_name)
228+
if branch is None:
229+
branch = detect_branch(Path(path))
230+
graph = Graph(proj_name, branch=branch)
226231
self.analyze_local_folder(path, graph, ignore)
227232

228233
# Save processed commit hash to the DB

api/auto_complete.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
1+
from typing import Optional
2+
13
from .graph import Graph, AsyncGraphQuery
24

35

4-
def prefix_search(repo: str, prefix: str) -> str:
6+
def prefix_search(repo: str, prefix: str, branch: Optional[str] = None) -> str:
57
""" Returns a list of all entities in the repository that start with the given prefix. """
6-
g = Graph(repo)
8+
g = Graph(repo, branch=branch)
79
return g.prefix_search(prefix)
810

911

10-
async def async_prefix_search(repo: str, prefix: str) -> list:
12+
async def async_prefix_search(repo: str, prefix: str, branch: Optional[str] = None) -> list:
1113
"""Async version of prefix_search using AsyncGraphQuery."""
12-
g = AsyncGraphQuery(repo)
14+
g = AsyncGraphQuery(repo, branch=branch)
1315
try:
1416
return await g.prefix_search(prefix)
1517
finally:

api/cli.py

Lines changed: 70 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,9 @@ def index(
172172
repo: Optional[str] = typer.Option(
173173
None, "--repo", help="Graph name (defaults to folder name)"
174174
),
175+
branch: Optional[str] = typer.Option(
176+
None, "--branch", help="Branch to associate with this index (auto-detected from git checkout when omitted; '_default' for non-git paths)"
177+
),
175178
) -> None:
176179
"""Index a local folder into the knowledge graph."""
177180
from .project import Project
@@ -204,14 +207,14 @@ def index(
204207

205208
_stderr(f"Indexing {folder} as '{name}'…")
206209
try:
207-
project = Project(name, folder, url)
210+
project = Project(name, folder, url, branch=branch)
208211
graph = project.analyze_sources(ignore=list(ignore) if ignore else [])
209212
stats = graph.stats()
210213
except Exception as e:
211214
_json_error(str(e))
212215

213-
_stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges")
214-
_json_out({"status": "ok", "repo": name, **stats})
216+
_stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges (branch={project.branch})")
217+
_json_out({"status": "ok", "repo": name, "branch": project.branch, **stats})
215218

216219

217220
# ── index-repo ─────────────────────────────────────────────────────────
@@ -223,6 +226,9 @@ def index_repo(
223226
ignore: Optional[List[str]] = typer.Option(
224227
None, "--ignore", help="Directories to ignore (repeatable)"
225228
),
229+
branch: Optional[str] = typer.Option(
230+
None, "--branch", help="Branch to associate with this index (auto-detected from the cloned checkout when omitted)"
231+
),
226232
) -> None:
227233
"""Clone a git repository and index it into the knowledge graph."""
228234
from .project import Project
@@ -233,22 +239,22 @@ def index_repo(
233239
import io
234240
import contextlib
235241
with contextlib.redirect_stdout(io.StringIO()):
236-
project = Project.from_git_repository(url)
242+
project = Project.from_git_repository(url, branch=branch)
237243
graph = project.analyze_sources(ignore=list(ignore) if ignore else [])
238244
stats = graph.stats()
239245
except Exception as e:
240246
_json_error(str(e))
241247

242-
_stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges")
243-
_json_out({"status": "ok", "repo": project.name, **stats})
248+
_stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges (branch={project.branch})")
249+
_json_out({"status": "ok", "repo": project.name, "branch": project.branch, **stats})
244250

245251

246252
# ── list ───────────────────────────────────────────────────────────────
247253

248254

249255
@app.command("list")
250256
def list_repos() -> None:
251-
"""List all indexed repositories."""
257+
"""List all indexed (project, branch) pairs."""
252258
from .graph import get_repos
253259

254260
try:
@@ -259,6 +265,30 @@ def list_repos() -> None:
259265
_json_out({"repos": repos})
260266

261267

268+
# ── migrate ────────────────────────────────────────────────────────────
269+
270+
271+
@app.command("migrate")
272+
def migrate(
273+
dry_run: bool = typer.Option(False, "--dry-run", help="Print actions without performing them"),
274+
) -> None:
275+
"""Promote legacy (pre-T17) graphs and Redis keys into the per-branch namespace.
276+
277+
Renames each legacy ``<project>`` graph to ``code:<project>:_default``,
278+
each ``{project}_info`` Redis key to ``{project}:_default_info``, and
279+
each ``{project}_git`` graph to ``{project}:_default_git``. Idempotent.
280+
"""
281+
282+
from .migrations.per_branch import run_migration
283+
284+
try:
285+
result = run_migration(dry_run=dry_run)
286+
except Exception as e:
287+
_json_error(str(e))
288+
289+
_json_out(result)
290+
291+
262292
# ── search ─────────────────────────────────────────────────────────────
263293

264294

@@ -268,18 +298,24 @@ def search(
268298
repo: Optional[str] = typer.Option(
269299
None, "--repo", help="Repository name (defaults to CWD name)"
270300
),
301+
branch: Optional[str] = typer.Option(
302+
None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)"
303+
),
271304
) -> None:
272305
"""Search for entities by prefix (full-text search)."""
273306
from .graph import Graph
307+
from .project import detect_branch
274308

275309
name = _default_repo(repo)
310+
if branch is None:
311+
branch = detect_branch(Path.cwd())
276312
try:
277-
g = Graph(name)
313+
g = Graph(name, branch=branch)
278314
results = g.prefix_search(query)
279315
except Exception as e:
280316
_json_error(str(e))
281317

282-
_json_out({"repo": name, "results": results})
318+
_json_out({"repo": name, "branch": branch, "results": results})
283319

284320

285321
# ── neighbors ──────────────────────────────────────────────────────────
@@ -297,18 +333,24 @@ def neighbors(
297333
label: Optional[str] = typer.Option(
298334
None, "--label", help="Filter by destination label (e.g. Function, Class)"
299335
),
336+
branch: Optional[str] = typer.Option(
337+
None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)"
338+
),
300339
) -> None:
301340
"""Get neighboring entities of the given node(s)."""
302341
from .graph import Graph
342+
from .project import detect_branch
303343

304344
name = _default_repo(repo)
345+
if branch is None:
346+
branch = detect_branch(Path.cwd())
305347
try:
306-
g = Graph(name)
348+
g = Graph(name, branch=branch)
307349
result = g.get_neighbors(node_ids, rel=rel, lbl=label)
308350
except Exception as e:
309351
_json_error(str(e))
310352

311-
_json_out({"repo": name, **result})
353+
_json_out({"repo": name, "branch": branch, **result})
312354

313355

314356
# ── paths ──────────────────────────────────────────────────────────────
@@ -321,18 +363,24 @@ def paths(
321363
repo: Optional[str] = typer.Option(
322364
None, "--repo", help="Repository name (defaults to CWD name)"
323365
),
366+
branch: Optional[str] = typer.Option(
367+
None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)"
368+
),
324369
) -> None:
325370
"""Find call-chain paths between two nodes."""
326371
from .graph import Graph
372+
from .project import detect_branch
327373

328374
name = _default_repo(repo)
375+
if branch is None:
376+
branch = detect_branch(Path.cwd())
329377
try:
330-
g = Graph(name)
378+
g = Graph(name, branch=branch)
331379
result = g.find_paths(src, dest)
332380
except Exception as e:
333381
_json_error(str(e))
334382

335-
_json_out({"repo": name, "paths": result})
383+
_json_out({"repo": name, "branch": branch, "paths": result})
336384

337385

338386
# ── info ───────────────────────────────────────────────────────────────
@@ -343,20 +391,26 @@ def info(
343391
repo: Optional[str] = typer.Option(
344392
None, "--repo", help="Repository name (defaults to CWD name)"
345393
),
394+
branch: Optional[str] = typer.Option(
395+
None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)"
396+
),
346397
) -> None:
347398
"""Show repository statistics and metadata."""
348399
from .graph import Graph
349400
from .info import get_repo_info
401+
from .project import detect_branch
350402

351403
name = _default_repo(repo)
404+
if branch is None:
405+
branch = detect_branch(Path.cwd())
352406
try:
353-
g = Graph(name)
407+
g = Graph(name, branch=branch)
354408
stats = g.stats()
355-
metadata = get_repo_info(name) or {}
409+
metadata = get_repo_info(name, branch) or {}
356410
except Exception as e:
357411
_json_error(str(e))
358412

359-
_json_out({"repo": name, **stats, "metadata": metadata})
413+
_json_out({"repo": name, "branch": branch, **stats, "metadata": metadata})
360414

361415

362416
if __name__ == "__main__":

api/code_coverage/lcov/lcov.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
22
import sys
3+
from typing import Optional
4+
35
from ...graph import Graph
46

57
def lcovparse(content):
@@ -124,7 +126,7 @@ def _line(l, report):
124126
else:
125127
sys.stdout.write("Unknown method name %s" % method)
126128

127-
def process_lcov(repo: str, lcov_file: str) -> None:
129+
def process_lcov(repo: str, lcov_file: str, branch: Optional[str] = None) -> None:
128130
# create report from coverage lcov file
129131
with open(lcov_file, "r") as file:
130132
content = file.read() # Reads the entire file as a single string
@@ -134,7 +136,7 @@ def process_lcov(repo: str, lcov_file: str) -> None:
134136
# SF:/__w/FalkorDB/FalkorDB/src/algorithms/detect_cycle.c
135137
prefix = "/__w/FalkorDB/FalkorDB/" # prefix to remove
136138

137-
g = Graph(repo)
139+
g = Graph(repo, branch=branch)
138140

139141
#---------------------------------------------------------------------------
140142
# Process report

api/git_utils/git_utils.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,21 @@
1414
# Configure logging
1515
logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s')
1616

17-
def GitRepoName(repo_name):
18-
""" Returns the git repository name """
17+
def GitRepoName(repo_name, branch=None):
18+
""" Returns the git transitions graph key for ``(repo_name, branch)``.
19+
20+
Format: ``{repo_name}:{branch}_git``. Hash-tag stays on ``repo_name``
21+
so the git-graph key lives on the same FalkorDB cluster slot as its
22+
sibling code graph and ``*_info`` Redis hash.
23+
"""
24+
from ..graph import DEFAULT_BRANCH
25+
if branch is None or branch == "":
26+
branch = DEFAULT_BRANCH
27+
return "{" + repo_name + "}" + ":" + branch + "_git"
28+
29+
30+
def LegacyGitRepoName(repo_name):
31+
"""Pre-T17 git graph key shape — kept for the migration helper."""
1932
return "{" + repo_name + "}_git"
2033

2134
def is_ignored(file_path: str, ignore_list: List[str]) -> bool:
@@ -70,14 +83,15 @@ def classify_changes(
7083
return added, deleted, modified
7184

7285
# build a graph capturing the git commit history
73-
def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, ignore_list: Optional[List[str]] = None) -> GitGraph:
86+
def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, ignore_list: Optional[List[str]] = None, branch: Optional[str] = None) -> GitGraph:
7487
"""
7588
Builds a graph representation of the git commit history.
7689
7790
Args:
7891
path (str): Path to the git repository.
7992
repo_name (str): Name of the repository.
8093
ignore_list (List[str], optional): List of file patterns to ignore.
94+
branch (Optional[str]): Branch name. Defaults to ``_default``.
8195
8296
Returns:
8397
GitGraph: Graph object representing the commit history.
@@ -86,13 +100,15 @@ def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, igno
86100
if ignore_list is None:
87101
ignore_list = []
88102

89-
# Copy the graph into a temporary graph
90-
logging.info("Cloning source graph %s -> %s_tmp", repo_name, repo_name)
91-
# Will be deleted at the end of this function
92-
g = Graph(repo_name).clone(repo_name + "_tmp")
103+
# Copy the graph into a temporary graph (sibling key with `_tmp` suffix on
104+
# the branch component so the clone lands on the same cluster slot).
105+
source = Graph(repo_name, branch=branch)
106+
tmp_name = source.name + "_tmp"
107+
logging.info("Cloning source graph %s -> %s", source.name, tmp_name)
108+
g = source.clone(tmp_name)
93109
g.enable_backlog()
94110

95-
git_graph = GitGraph(GitRepoName(repo_name))
111+
git_graph = GitGraph(GitRepoName(repo_name, branch))
96112
supported_types = analyzer.supported_types()
97113

98114
# Initialize with the current commit
@@ -252,12 +268,12 @@ def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, igno
252268
# Delete temporaty graph
253269
g.disable_backlog()
254270

255-
logging.debug(f"Deleting temporary graph {repo_name + '_tmp'}")
271+
logging.debug(f"Deleting temporary graph {g.name}")
256272
g.delete()
257273

258274
return git_graph
259275

260-
def switch_commit(repo: str, to: str):
276+
def switch_commit(repo: str, to: str, branch: Optional[str] = None):
261277
"""
262278
Switches the state of a graph repository from its current commit to the given commit.
263279
@@ -268,6 +284,7 @@ def switch_commit(repo: str, to: str):
268284
Args:
269285
repo (str): The name of the graph repository to switch commits.
270286
to (str): The target commit hash to switch the graph to.
287+
branch (Optional[str]): The branch. Defaults to ``_default``.
271288
"""
272289

273290
# Validate input arguments
@@ -280,11 +297,11 @@ def switch_commit(repo: str, to: str):
280297
logging.info(f"Switching to commit: {to}")
281298

282299
# Initialize the graph and GitGraph objects
283-
g = Graph(repo)
284-
git_graph = GitGraph(GitRepoName(repo))
300+
g = Graph(repo, branch=branch)
301+
git_graph = GitGraph(GitRepoName(repo, branch))
285302

286303
# Get the current commit hash of the graph
287-
current_hash = get_repo_commit(repo)
304+
current_hash = get_repo_commit(repo, branch)
288305
logging.info(f"Current graph commit: {current_hash}")
289306

290307
if current_hash == to:
@@ -329,5 +346,5 @@ def switch_commit(repo: str, to: str):
329346
g.rerun_query(_q, _p)
330347

331348
# Update the graph's commit to the new target commit
332-
set_repo_commit(repo, to)
349+
set_repo_commit(repo, to, branch)
333350
logging.info(f"Graph commit updated to {to}")

0 commit comments

Comments
 (0)