-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdatasources.py
More file actions
executable file
·155 lines (126 loc) · 5.26 KB
/
Copy pathdatasources.py
File metadata and controls
executable file
·155 lines (126 loc) · 5.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env python3
"""
CodeAlive Data Sources - List available repositories and workspaces
Shows all indexed codebases available for search and consultation.
Includes current project repos, dependencies, libraries, and organizational codebases.
Usage:
python datasources.py # Show ready-to-use data sources
python datasources.py --query "TASK" # Show only sources relevant to a task (recommended)
python datasources.py --all # Show all data sources (including processing)
python datasources.py --json # Output as JSON
Examples:
# RECOMMENDED when you know the task: only sources relevant to it, each with a
# relevanceReason explaining the match
python datasources.py --query "add OAuth to the checkout flow"
# List ready data sources
python datasources.py
# List all data sources (including those being processed)
python datasources.py --all
# Get JSON output for parsing
python datasources.py --json
Note:
--query runs an AI relevance filter on the backend. It fails open: if filtering is
unavailable, the FULL list is returned and the output says so.
"""
import sys
import json
from pathlib import Path
# Add lib directory to path
sys.path.insert(0, str(Path(__file__).parent / "lib"))
from api_client import CodeAliveClient
def format_datasources(datasources: list, as_json: bool = False, message: str = "") -> str:
"""Format data sources for display.
`message` is the relevance hint accompanying a --query'd listing: how many sources
were omitted as non-relevant, or that filtering was unavailable and the list is full.
"""
if as_json:
if message:
return json.dumps({"dataSources": datasources, "message": message}, indent=2)
return json.dumps(datasources, indent=2)
if not datasources:
if message:
return f"No data sources matched.\nℹ️ {message}"
return "No data sources found.\nAdd repositories at https://app.codealive.ai"
output = []
output.append(f"\n📚 Available Data Sources ({len(datasources)} total)\n")
output.append("="*80)
if message:
output.append(f"\nℹ️ {message}")
# Group by type
repos = [ds for ds in datasources if ds.get("type") == "Repository"]
workspaces = [ds for ds in datasources if ds.get("type") == "Workspace"]
if workspaces:
output.append("\n🗂️ WORKSPACES (search across multiple repos)")
output.append("-"*80)
for ws in workspaces:
name = ws.get("name", "Unknown")
desc = ws.get("description", "No description")
state = ws.get("state", "")
status = f" [{state}]" if state and state != "Alive" else ""
output.append(f"\n 📁 {name}{status}")
output.append(f" {desc}")
if ws.get("relevanceReason"):
output.append(f" 🎯 {ws['relevanceReason']}")
if repos:
output.append("\n\n📦 REPOSITORIES")
output.append("-"*80)
for repo in repos:
name = repo.get("name", "Unknown")
desc = repo.get("description", "No description")
url = repo.get("url", "")
state = repo.get("state", "")
status = f" [{state}]" if state and state != "Alive" else ""
output.append(f"\n 📄 {name}{status}")
output.append(f" {desc}")
if repo.get("relevanceReason"):
output.append(f" 🎯 {repo['relevanceReason']}")
if url:
output.append(f" 🔗 {url}")
output.append("\n" + "="*80)
output.append("\n💡 Usage:")
output.append(" • Use names with search.py, grep.py, and fetch.py")
output.append(" • Workspaces search ALL repos in the workspace")
output.append(" • Combine multiple data sources for broader search")
output.append(" • Pass --query 'your task' to list only the relevant sources")
output.append("\n📖 Examples:")
output.append(" python search.py 'auth logic' my-backend")
output.append(" python grep.py 'AuthService' my-backend")
return "\n".join(output)
def main():
"""CLI interface for listing data sources."""
alive_only = True
as_json = False
query = None
args = sys.argv[1:]
i = 0
while i < len(args):
arg = args[i]
if arg == "--all":
alive_only = False
elif arg == "--json":
as_json = True
elif arg == "--query":
if i + 1 >= len(args):
print("❌ Error: --query requires a value", file=sys.stderr)
sys.exit(1)
query = args[i + 1]
i += 1
elif arg == "--help":
print(__doc__)
sys.exit(0)
i += 1
try:
client = CodeAliveClient()
result = client.get_datasources(alive_only=alive_only, query=query)
if isinstance(result, dict):
datasources = result.get("dataSources", [])
message = result.get("message", "")
else:
datasources = result
message = ""
print(format_datasources(datasources, as_json, message))
except Exception as e:
print(f"❌ Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()