Skip to content

Commit bce4351

Browse files
committed
Fix Daytona curator timeout override and robust clone parsing
1 parent 9eeacf4 commit bce4351

File tree

2 files changed

+97
-16
lines changed

2 files changed

+97
-16
lines changed

scripts/context_retrieval_agent.py

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -844,18 +844,74 @@ def load_task_context(task_dir: Path) -> Dict[str, Any]:
844844

845845

846846
def _extract_clone_urls(dockerfile_content: str) -> List[Dict[str, str]]:
847-
"""Extract git clone URLs from a Dockerfile."""
847+
"""Extract git clone URLs from a Dockerfile.
848+
849+
Handles common clone flag variants (`--branch`, `--no-tags`, etc.) and
850+
clones without an explicit target directory.
851+
"""
852+
def _default_clone_target(url: str) -> str:
853+
clean = url.rstrip("/")
854+
if clean.endswith(".git"):
855+
clean = clean[:-4]
856+
# Support scp-like syntax, e.g. git@github.com:org/repo.git
857+
clean = clean.split(":")[-1]
858+
return clean.split("/")[-1] or "repo"
859+
860+
option_with_value = {
861+
"--branch",
862+
"--depth",
863+
"--config",
864+
"--origin",
865+
"--reference",
866+
"--reference-if-able",
867+
"--separate-git-dir",
868+
"--upload-pack",
869+
"--shallow-since",
870+
"--jobs",
871+
"-b",
872+
"-c",
873+
"-o",
874+
"-u",
875+
}
876+
877+
normalized = dockerfile_content.replace("\\\n", " ")
848878
results = []
849-
for match in re.finditer(
850-
r"git\s+clone\s+(?:--depth\s+\d+\s+)?(\S+?)(?:\.git)?\s+(\S+)",
851-
dockerfile_content,
852-
):
853-
url = match.group(1)
854-
target = match.group(2)
855-
# Extract mirror slug from URL
879+
for match in re.finditer(r"git\s+clone\b([^\n]*)", normalized):
880+
suffix = match.group(1).strip()
881+
suffix = re.split(r"\s*(?:&&|;|\|\|)\s*", suffix, maxsplit=1)[0]
882+
try:
883+
tokens = shlex.split(f"git clone {suffix}")
884+
except ValueError:
885+
continue
886+
887+
# Parse: git clone [options] <repo> [<directory>]
888+
positional = []
889+
i = 2
890+
while i < len(tokens):
891+
tok = tokens[i]
892+
if tok == "--":
893+
i += 1
894+
continue
895+
if tok in option_with_value:
896+
i += 2
897+
continue
898+
if tok.startswith("-"):
899+
i += 1
900+
continue
901+
positional.append(tok)
902+
i += 1
903+
904+
if not positional:
905+
continue
906+
907+
url = positional[0]
908+
target = positional[1] if len(positional) > 1 else _default_clone_target(url)
909+
910+
# Extract mirror slug from URL when available.
856911
m = re.search(r"github\.com/(.+?)(?:\.git)?$", url)
857912
slug = m.group(1) if m else url
858913
results.append({"url": url, "slug": slug, "target": target})
914+
859915
return results
860916

861917

scripts/daytona_curator_runner.py

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,22 @@ def _extract_repo_info_for_sandbox(ctx: Dict[str, Any]) -> List[Dict[str, str]]:
192192
slug = entry.get("slug", "")
193193
target = entry.get("target", "repo")
194194
if url:
195-
# Ensure URL ends with .git for clone
196-
clone_url = url if url.endswith(".git") else url + ".git"
197-
# Extract dir name from target path
198-
name = target.rstrip("/").split("/")[-1] if target else "repo"
195+
# Only normalize GitHub URLs to ".git"; other hosts (e.g.
196+
# go.googlesource.com) often work best with the original URL.
197+
if url.endswith(".git"):
198+
clone_url = url
199+
elif "github.com/" in url:
200+
clone_url = url + ".git"
201+
else:
202+
clone_url = url
203+
204+
# Extract dir name from target path. If Dockerfile uses "." as the
205+
# target, derive a stable repo name from slug/URL.
206+
target_name = target.rstrip("/").split("/")[-1] if target else ""
207+
if target_name in {"", "."}:
208+
fallback = slug or url
209+
target_name = fallback.rstrip("/").split("/")[-1].replace(".git", "")
210+
name = target_name or "repo"
199211
repos.append({
200212
"url": clone_url,
201213
"commit": "HEAD", # mirrors are at the right commit
@@ -1134,9 +1146,11 @@ def _run_sdlc_mode(args, creds: Dict[str, Any]) -> int:
11341146
]
11351147

11361148
future_timeout = SANDBOX_TIMEOUT_SEC + 300 # clone (300s) + curator (900s)
1137-
# Global timeout: generous to handle slow clones + curator runs in parallel
1138-
# Each task runs in its own sandbox, so total wall time ≈ max(individual times)
1139-
global_timeout = future_timeout + 600 # extra 10 min buffer
1149+
# Scale global timeout by queued "waves" so large batches don't get cut off
1150+
# by a fixed wall-clock limit. Allow explicit override for long/retry runs.
1151+
waves = max(1, (len(tasks) + max(1, args.parallel) - 1) // max(1, args.parallel))
1152+
computed_global_timeout = (future_timeout * waves) + 600 # extra 10 min buffer
1153+
global_timeout = args.global_timeout_sec if args.global_timeout_sec > 0 else computed_global_timeout
11401154

11411155
executor = ThreadPoolExecutor(max_workers=args.parallel)
11421156
futures = {
@@ -1291,7 +1305,9 @@ def _run_contextbench_mode(args, creds: Dict[str, Any]) -> int:
12911305
]
12921306

12931307
future_timeout = SANDBOX_TIMEOUT_SEC + 300
1294-
global_timeout = future_timeout + 600
1308+
waves = max(1, (len(tasks) + max(1, args.parallel) - 1) // max(1, args.parallel))
1309+
computed_global_timeout = (future_timeout * waves) + 600
1310+
global_timeout = args.global_timeout_sec if args.global_timeout_sec > 0 else computed_global_timeout
12951311

12961312
executor = ThreadPoolExecutor(max_workers=args.parallel)
12971313
futures = {
@@ -1437,6 +1453,15 @@ def main() -> int:
14371453
choices=("local", "deepsearch", "hybrid"))
14381454
parser.add_argument("--parallel", type=int, default=DEFAULT_PARALLEL,
14391455
help=f"Concurrent sandboxes (default: {DEFAULT_PARALLEL})")
1456+
parser.add_argument(
1457+
"--global-timeout-sec",
1458+
type=int,
1459+
default=0,
1460+
help=(
1461+
"Override wall-clock timeout for the full batch in seconds. "
1462+
"Default (0) uses a computed timeout based on task count and parallelism."
1463+
),
1464+
)
14401465
parser.add_argument("--max-cost", type=float, default=0, help="Cost limit in USD")
14411466
parser.add_argument("--max-tasks", type=int, default=0, help="Max tasks to process")
14421467
parser.add_argument("--dry-run", action="store_true")

0 commit comments

Comments
 (0)