@@ -169,6 +169,40 @@ def __init__(self, url: str = "", token: str = ""):
169169 )
170170 if not self .token :
171171 log .warning ("No Sourcegraph token — deepsearch/hybrid backends unavailable" )
172+ else :
173+ # Validate token at init
174+ self ._validate_token ()
175+
176+ def _validate_token (self ) -> None :
177+ """Check that the token is valid by running a simple query."""
178+ import urllib .request
179+ import urllib .error
180+
181+ gql = 'query { currentUser { username } }'
182+ body = json .dumps ({"query" : gql }).encode ()
183+ req = urllib .request .Request (
184+ f"{ self .url .rstrip ('/' )} /.api/graphql" ,
185+ data = body ,
186+ headers = {
187+ "Authorization" : f"token { self .token } " ,
188+ "Content-Type" : "application/json" ,
189+ },
190+ )
191+ try :
192+ with urllib .request .urlopen (req , timeout = 10 ) as resp :
193+ data = json .loads (resp .read ())
194+ user = data .get ("data" , {}).get ("currentUser" , {})
195+ if user :
196+ log .info ("SG auth OK (user: %s)" , user .get ("username" , "?" ))
197+ else :
198+ log .warning (
199+ "SG token may be invalid — currentUser returned null. "
200+ "SG search tools will fail with 403."
201+ )
202+ self .token = "" # Disable SG tools
203+ except (urllib .error .URLError , Exception ) as e :
204+ log .warning ("SG token validation failed: %s. SG search tools disabled." , e )
205+ self .token = "" # Disable SG tools
172206
173207 def keyword_search (self , query : str , max_results : int = 50 ) -> str :
174208 """Run a keyword search and return formatted results."""
@@ -924,13 +958,20 @@ def _extract_clone_urls(dockerfile_content: str) -> List[Dict[str, str]]:
924958
925959## Strategy
9269601. Read the task carefully. Identify key entities and concepts.
927- 2. Use deep_search for broad semantic exploration and discovery.
961+ 2. ALWAYS start with sourcegraph_search or deep_search to discover relevant \
962+ files and repos — even if local repos are available. The local repo set \
963+ may be INCOMPLETE (not all repos mentioned in the task are cloned locally).
9289643. Use local grep/rg to verify and refine findings against actual files.
9299654. Use search_imports and find_symbols for precise dependency tracing.
9309665. Cross-check: anything found by search should be verified locally, and \
931967 local findings should be checked for completeness via search.
932- 6. Be THOROUGH — recall matters more than precision for oracle generation.
933- 7. For multi-repo tasks, search ALL repos.
968+ 6. If local search finds nothing, ALWAYS try Sourcegraph search before \
969+ concluding a file doesn't exist. The file may be in a repo not cloned locally.
970+ 7. Be THOROUGH — recall matters more than precision for oracle generation.
971+ 8. For multi-repo tasks, search ALL repos — including repos you might not \
972+ have locally. Use sourcegraph_search to find files across ALL indexed repos.
973+ 9. Stay FOCUSED on the specific task question. Include only files directly \
974+ relevant to answering the task. Do not include every tangentially related file.
934975""" ,
935976}
936977
@@ -979,8 +1020,17 @@ def build_user_message(
9791020
9801021 # Available repos
9811022 parts .append ("\n ## Available Local Repositories" )
982- for name , path in sorted (repo_paths .items ()):
983- parts .append (f"- **{ name } **: `{ path } `" )
1023+ if repo_paths :
1024+ for name , path in sorted (repo_paths .items ()):
1025+ parts .append (f"- **{ name } **: `{ path } `" )
1026+ else :
1027+ parts .append ("- *(none cloned locally)*" )
1028+ parts .append (
1029+ "\n **IMPORTANT**: The repos listed above may NOT be complete. "
1030+ "The task may involve additional repositories not cloned locally. "
1031+ "You MUST use sourcegraph_search or deep_search to discover files "
1032+ "in repositories beyond the local set. Do not assume local repos are exhaustive."
1033+ )
9841034
9851035 # Suite context
9861036 suite = ctx .get ("suite_name" , "" )
0 commit comments