BugTraceAI-Launcher/test_installer_core.py at main · BugTraceAI/BugTraceAI-Launcher · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
#!/usr/bin/env python3
"""
Unit + property tests for installer_core (the pure functional core).

Run:  python3 -m unittest test_installer_core -v
      python3 test_installer_core.py

No external dependencies (stdlib unittest). Property tests use random with a
fixed seed *inside the test* — randomness at the test boundary is fine; the
functions under test remain pure and deterministic.
"""
import json
import random
import unittest

import installer_core as core
from installer_core import (
    Ok, Err, DomainError, TerminalCaps, PromptSpec, Check,
)


def caps(is_tty=True, term="xterm-256color", no_color=False, encoding="UTF-8"):
    return TerminalCaps(is_tty=is_tty, term=term, no_color=no_color, encoding=encoding)


class TestResult(unittest.TestCase):
    def test_ok_err_helpers(self):
        self.assertTrue(core.is_ok(Ok(1)))
        self.assertFalse(core.is_err(Ok(1)))
        self.assertTrue(core.is_err(Err(DomainError("k", "m"))))

    def test_frozen_immutability(self):
        e = DomainError("k", "m")
        with self.assertRaises(Exception):
            e.kind = "other"  # frozen dataclass


class TestPalette(unittest.TestCase):
    def test_no_ansi_when_color_disabled(self):
        for c in (caps(is_tty=False), caps(term="dumb"), caps(no_color=True)):
            p = core.make_palette(c)
            self.assertEqual(p.red, "")
            self.assertEqual(p.reset, "")
            self.assertEqual(p.bold, "")

    def test_ansi_present_when_color_enabled(self):
        p = core.make_palette(caps())
        self.assertEqual(p.red, "\033[31m")
        self.assertTrue(p.reset.startswith("\033["))

    def test_ascii_fallback_without_utf8(self):
        p = core.make_palette(caps(encoding="ANSI_X3.4-1968"))
        self.assertEqual(p.check, "OK")
        self.assertEqual(p.cross, "NO")
        self.assertEqual(p.arrow, ">")
        self.assertEqual(p.spinner_frames, "|/-\\")

    def test_utf8_glyphs_when_supported(self):
        p = core.make_palette(caps(encoding="UTF-8"))
        self.assertEqual(p.check, "✔")
        self.assertEqual(p.arrow, "›")

    def test_color_independent_of_utf8(self):
        # No-color but UTF-8 → glyphs unicode, codes empty
        p = core.make_palette(caps(no_color=True, encoding="UTF-8"))
        self.assertEqual(p.green, "")
        self.assertEqual(p.check, "✔")


class TestTimeouts(unittest.TestCase):
    def test_long_running_detection(self):
        self.assertTrue(core.is_long_running("docker compose up -d --build"))
        self.assertTrue(core.is_long_running("sudo apt-get install -y nmap"))
        self.assertTrue(core.is_long_running("docker pull alpine"))
        self.assertFalse(core.is_long_running("ls -la"))
        self.assertFalse(core.is_long_running("docker ps"))

    def test_select_timeout_consistency(self):
        self.assertEqual(core.select_timeout("docker compose build", 60, 600), 600)
        self.assertEqual(core.select_timeout("echo hi", 60, 600), 60)


class TestStreamingLogs(unittest.TestCase):
    def test_detect_follow(self):
        self.assertTrue(core.is_streaming_log("docker compose logs -f --tail=20"))
        self.assertTrue(core.is_streaming_log("docker logs --follow web"))
        self.assertFalse(core.is_streaming_log("docker compose logs --tail=30"))
        self.assertFalse(core.is_streaming_log("docker ps -f status=running"))  # -f not for logs

    def test_harden_strips_follow(self):
        safe, mod = core.harden_command("docker compose logs -f --tail=20")
        self.assertTrue(mod)
        self.assertNotIn("-f", safe.split())
        self.assertIn("--tail=20", safe)

    def test_harden_noop_on_normal(self):
        cmd = "docker compose up -d --build"
        self.assertEqual(core.harden_command(cmd), (cmd, False))

    def test_harden_idempotent_and_safe_property(self):
        rng = random.Random(1234)
        flags = ["-f", "--follow", "--tail=20", "web", "cli", "-t", "--timestamps"]
        for _ in range(500):
            n = rng.randint(0, 5)
            tail = " ".join(rng.choice(flags) for _ in range(n))
            cmd = f"docker compose logs {tail}".strip()
            safe1, _ = core.harden_command(cmd)
            safe2, mod2 = core.harden_command(safe1)
            # After hardening once, the result is already safe (idempotent) ...
            self.assertFalse(mod2, f"not idempotent for {cmd!r}")
            # ... and never contains a follow flag.
            self.assertNotIn("-f", safe1.split())
            self.assertNotIn("--follow", safe1.split())


class TestDestructive(unittest.TestCase):
    def test_positive(self):
        for cmd in ("rm -rf /", "sudo rm -rf /  ", "mkfs.ext4 /dev/sda1",
                    "dd if=/dev/zero of=/dev/sda", "DROP DATABASE bugtraceai_web;",
                    "docker volume rm pgdata",
                    "docker compose down -v", "docker compose down --volumes",
                    "docker-compose down -v", "docker compose down -fv"):
            self.assertTrue(core.is_destructive(cmd), cmd)

    def test_negative(self):
        for cmd in ("rm -rf ./build", "docker compose up -d --build",
                    "rm -f /tmp/x", "git clone repo", "apt-get install -y curl",
                    "docker compose down", "docker compose down --remove-orphans"):
            self.assertFalse(core.is_destructive(cmd), cmd)


class TestRetryPolicy(unittest.TestCase):
    def test_should_retry_table(self):
        self.assertTrue(core.should_retry(429, 1, 4))
        self.assertTrue(core.should_retry(503, 2, 4))
        self.assertTrue(core.should_retry(None, 1, 4))   # network error
        self.assertFalse(core.should_retry(400, 1, 4))   # client error: no retry
        self.assertFalse(core.should_retry(200, 1, 4))
        self.assertFalse(core.should_retry(429, 4, 4))   # attempts exhausted

    def test_never_exceeds_max_property(self):
        rng = random.Random(7)
        for _ in range(1000):
            status = rng.choice([None, 200, 400, 401, 429, 500, 502, 503])
            mx = rng.randint(1, 6)
            attempt = rng.randint(mx, mx + 3)  # at or beyond max
            self.assertFalse(core.should_retry(status, attempt, mx))

    def test_backoff_monotonic_and_capped(self):
        prev = -1.0
        for attempt in range(1, 10):
            d = core.backoff_delay(attempt, base=1.0, cap=30.0)
            self.assertGreaterEqual(d, prev)
            self.assertLessEqual(d, 30.0)
            prev = d
        self.assertEqual(core.backoff_delay(0), 0.0)
        self.assertEqual(core.backoff_delay(1, base=1.0), 1.0)
        self.assertEqual(core.backoff_delay(2, base=1.0), 2.0)


class TestMaskSecret(unittest.TestCase):
    def test_empty(self):
        self.assertEqual(core.mask_secret(""), "")

    def test_reveals_last_five(self):
        key = "sk-or-v1-EXAMPLE-FAKE-TEST-KEY-0-abcde"
        masked = core.mask_secret(key)
        self.assertTrue(masked.endswith(key[-5:]))         # last 5 chars visible
        self.assertEqual(masked[-5:], key[-5:])
        self.assertTrue(masked.startswith("*"))
        self.assertEqual(len(masked), len(key))            # length preserved by stars

    def test_short_secret_fully_masked(self):
        # Nothing should leak when the secret is <= visible.
        self.assertEqual(core.mask_secret("abc"), "***")
        self.assertEqual(core.mask_secret("12345"), "*****")

    def test_never_exposes_more_than_visible_property(self):
        rng = random.Random(5)
        alphabet = "abcdefABCDEF0123456789-_"
        for _ in range(1000):
            n = rng.randint(0, 40)
            key = "".join(rng.choice(alphabet) for _ in range(n))
            masked = core.mask_secret(key, visible=5)
            # The number of non-masked (revealed) chars never exceeds 5.
            revealed = sum(1 for a, b in zip(masked, key) if a == b and a != "*")
            self.assertLessEqual(revealed, 5)


class TestParsing(unittest.TestCase):
    def test_parse_tool_arguments_valid(self):
        r = core.parse_tool_arguments('{"command": "ls"}')
        self.assertIsInstance(r, Ok)
        self.assertEqual(r.value["command"], "ls")

    def test_parse_tool_arguments_empty(self):
        r = core.parse_tool_arguments("")
        self.assertIsInstance(r, Ok)
        self.assertEqual(r.value, {})

    def test_parse_tool_arguments_broken(self):
        r = core.parse_tool_arguments('{"command": ')
        self.assertIsInstance(r, Err)
        self.assertEqual(r.error.kind, "invalid_tool_args")

    def test_parse_tool_arguments_non_dict(self):
        r = core.parse_tool_arguments('[1,2,3]')
        self.assertIsInstance(r, Err)

    def test_parse_api_response_with_tool_calls(self):
        body = json.dumps({"choices": [{"message": {
            "content": None,
            "tool_calls": [{"id": "c1", "function": {"name": "run_command",
                                                     "arguments": '{"command":"ls"}'}}],
        }}]})
        r = core.parse_api_response(body)
        self.assertIsInstance(r, Ok)
        msg = r.value
        self.assertEqual(len(msg.tool_calls), 1)
        self.assertEqual(msg.tool_calls[0].name, "run_command")
        self.assertIsNotNone(msg.raw)

    def test_parse_api_response_with_content(self):
        body = json.dumps({"choices": [{"message": {"content": "hola"}}]})
        r = core.parse_api_response(body)
        self.assertIsInstance(r, Ok)
        self.assertEqual(r.value.content, "hola")
        self.assertEqual(r.value.tool_calls, ())

    def test_parse_api_response_error_shaped(self):
        body = json.dumps({"error": {"message": "rate limited", "code": 429}})
        r = core.parse_api_response(body)
        self.assertIsInstance(r, Err)
        self.assertEqual(r.error.kind, "api_error")

    def test_parse_api_response_empty_choices(self):
        r = core.parse_api_response(json.dumps({"choices": []}))
        self.assertIsInstance(r, Err)
        self.assertEqual(r.error.kind, "bad_response_shape")

    def test_parse_api_response_invalid_json(self):
        r = core.parse_api_response("<html>502 Bad Gateway</html>")
        self.assertIsInstance(r, Err)
        self.assertEqual(r.error.kind, "invalid_json")

    def test_parse_api_response_missing_message(self):
        r = core.parse_api_response(json.dumps({"choices": [{"finish_reason": "stop"}]}))
        self.assertIsInstance(r, Err)

    def test_parse_never_raises_property(self):
        rng = random.Random(99)
        alphabet = '{}[]":,abc 012\\n'
        for _ in range(2000):
            s = "".join(rng.choice(alphabet) for _ in range(rng.randint(0, 30)))
            # Must return a Result, never raise.
            self.assertIn(type(core.parse_api_response(s)), (Ok, Err))
            self.assertIn(type(core.parse_tool_arguments(s)), (Ok, Err))


class TestVerification(unittest.TestCase):
    def test_evaluate_predicates(self):
        self.assertTrue(core.evaluate_check("rc0_nonempty", 0, "1.2.3"))
        self.assertFalse(core.evaluate_check("rc0_nonempty", 1, "1.2.3"))
        self.assertFalse(core.evaluate_check("rc0_nonempty", 0, "  "))
        self.assertTrue(core.evaluate_check("nonempty", 1, "x"))
        self.assertTrue(core.evaluate_check("health", 0, '{"status":"healthy"}'))
        self.assertTrue(core.evaluate_check("health", 0, '{"status":"ok"}'))
        self.assertTrue(core.evaluate_check("health", 0, '{"status":"degraded but up"}'))
        self.assertTrue(core.evaluate_check("health", 0, ""))
        self.assertFalse(core.evaluate_check("health", 7, '{"status":"healthy"}'))
        self.assertFalse(core.evaluate_check("health", 0, "down"))
        self.assertTrue(core.evaluate_check("http200", 0, "200\n"))
        self.assertFalse(core.evaluate_check("http200", 0, "500"))
        self.assertTrue(core.evaluate_check("exists", 0, "exists"))
        self.assertFalse(core.evaluate_check("unknown_predicate", 0, "x"))

    def test_checks_per_mode(self):
        full = core.verification_checks("full", "/home/u/bugtraceai")
        cli = core.verification_checks("cli", "/home/u/bugtraceai")
        web = core.verification_checks("web", "/home/u/bugtraceai")
        # full = docker + cli(3) + web(5)
        self.assertEqual(len(full), 1 + 3 + 5)
        self.assertEqual(len(cli), 1 + 3)
        self.assertEqual(len(web), 1 + 5)
        # all checks are immutable Check instances with a known predicate
        valid = {"rc0_nonempty", "nonempty", "health", "http200", "exists"}
        for c in full:
            self.assertIsInstance(c, Check)
            self.assertIn(c.predicate, valid)

    def test_checks_reference_install_dir(self):
        checks = core.verification_checks("cli", "/opt/bt")
        env_check = [c for c in checks if c.predicate == "exists"][0]
        self.assertIn("/opt/bt/BugTraceAI-CLI/.env", env_check.command)


class TestSystemPrompt(unittest.TestCase):
    def spec(self, mode="full", action="install"):
        return PromptSpec(mode=mode, action=action, install_dir="/home/u/bugtraceai",
                          api_key="sk-or-TESTKEY", cli_repo="git://cli", web_repo="git://web",
                          max_turns=40)

    def test_contains_key_and_dir(self):
        p = core.build_system_prompt(self.spec())
        self.assertIn("sk-or-TESTKEY", p)
        self.assertIn("/home/u/bugtraceai", p)

    def test_hardening_chmod_present(self):
        p = core.build_system_prompt(self.spec())
        self.assertIn("chmod 600 .env", p)

    def test_no_follow_in_logs(self):
        # No actual `docker ... logs -f/--follow` *command* should remain (it
        # would hang). The RULE line that forbids follow is allowed — it has no
        # "docker" token, so we only inspect lines that look like a real command.
        p = core.build_system_prompt(self.spec())
        for line in p.splitlines():
            toks = line.split()
            if "docker" in toks and "logs" in toks and ("-f" in toks or "--follow" in toks):
                self.fail(f"system prompt still instructs follow-logs: {line!r}")

    def test_repair_vs_install_instructions(self):
        self.assertIn("REPAIR MODE RULES", core.build_system_prompt(self.spec(action="repair")))
        self.assertIn("INSTALL MODE RULES", core.build_system_prompt(self.spec(action="install")))

    def test_mode_label(self):
        self.assertIn("CLI Only", core.build_system_prompt(self.spec(mode="cli")))
        self.assertIn("WEB Only", core.build_system_prompt(self.spec(mode="web")))


class TestParsingHardened(unittest.TestCase):
    """Regression tests for the boundary-parser never-crash contract (the HIGH
    finding) and content normalization."""
    def test_function_null_does_not_crash(self):
        body = json.dumps({"choices": [{"message": {"tool_calls": [
            {"id": "x", "function": None}]}}]})
        r = core.parse_api_response(body)
        self.assertIsInstance(r, Ok)
        self.assertEqual(r.value.tool_calls[0].name, "")

    def test_function_string_does_not_crash(self):
        body = json.dumps({"choices": [{"message": {"tool_calls": [
            {"id": "x", "function": "oops"}]}}]})
        r = core.parse_api_response(body)
        self.assertIsInstance(r, Ok)
        self.assertEqual(r.value.tool_calls[0].arguments, "")

    def test_idless_tool_call_gets_synthetic_id(self):
        body = json.dumps({"choices": [{"message": {"tool_calls": [
            {"function": {"name": "run_command", "arguments": "{}"}}]}}]})
        r = core.parse_api_response(body)
        self.assertTrue(r.value.tool_calls[0].id)  # non-empty synthesized id

    def test_content_list_normalized_to_str(self):
        body = json.dumps({"choices": [{"message": {"content": [
            {"type": "text", "text": "ho"}, {"type": "text", "text": "la"}]}}]})
        r = core.parse_api_response(body)
        self.assertEqual(r.value.content, "hola")

    def test_content_weird_type_coerced(self):
        body = json.dumps({"choices": [{"message": {"content": 123}}]})
        r = core.parse_api_response(body)
        self.assertIsInstance(r.value.content, str)

    def test_non_dict_tool_call_filtered(self):
        body = json.dumps({"choices": [{"message": {"tool_calls": ["nope", 5]}}]})
        r = core.parse_api_response(body)
        self.assertEqual(r.value.tool_calls, ())


class TestAssistantMessageDict(unittest.TestCase):
    def test_sent_ids_match_parsed(self):
        body = json.dumps({"choices": [{"message": {"content": None, "tool_calls": [
            {"id": "a", "function": {"name": "run_command", "arguments": "{}"}},
            {"function": {"name": "ask_user", "arguments": "{}"}},  # id-less
        ]}}]})
        msg = core.parse_api_response(body).value
        d = core.assistant_message_dict(msg)
        sent_ids = [tc["id"] for tc in d["tool_calls"]]
        answered_ids = [tc.id for tc in msg.tool_calls]
        self.assertEqual(sent_ids, answered_ids)          # every sent id is answerable
        self.assertTrue(all(sent_ids))                    # none empty
        self.assertEqual(d["role"], "assistant")

    def test_no_tool_calls_key_when_none(self):
        body = json.dumps({"choices": [{"message": {"content": "hi"}}]})
        d = core.assistant_message_dict(core.parse_api_response(body).value)
        self.assertNotIn("tool_calls", d)


class TestClassifyHttpError(unittest.TestCase):
    def test_error_shaped_body(self):
        e = core.classify_http_error(429, json.dumps({"error": {"message": "rate"}}))
        self.assertEqual(e.kind, "api_error")

    def test_html_body(self):
        e = core.classify_http_error(502, "<html>bad gateway</html>")
        self.assertIn("502", e.message)

    def test_empty_body(self):
        # No parseable error body → generic HTTP error that keeps the status.
        e = core.classify_http_error(500, "")
        self.assertEqual(e.kind, "api_error")
        self.assertIn("500", e.message)


class TestRetryBoundaries(unittest.TestCase):
    def test_5xx_boundaries(self):
        self.assertFalse(core.should_retry(499, 1, 4))
        self.assertTrue(core.should_retry(500, 1, 4))
        self.assertTrue(core.should_retry(599, 1, 4))
        self.assertFalse(core.should_retry(600, 1, 4))

    def test_backoff_cap_reached_and_custom_base(self):
        self.assertEqual(core.backoff_delay(10, base=1.0, cap=30.0), 30.0)
        self.assertEqual(core.backoff_delay(1, base=2.5), 2.5)


class TestDestructiveHardened(unittest.TestCase):
    def test_relative_and_split_and_tools(self):
        for cmd in ("rm -rf .", "rm -rf *", "rm -r -f /", "rm  -rf  /",
                    "git clean -fdx", "docker rm -f web", "docker rmi img",
                    "rm -rf ~", "sudo rm -rf /"):
            self.assertTrue(core.is_destructive(cmd), cmd)

    def test_legit_not_flagged(self):
        for cmd in ("rm -rf ./build", "rm -rf node_modules", "rm -rf dist",
                    "docker compose up -d --build", "git clone x", "rm -f /tmp/x.log"):
            self.assertFalse(core.is_destructive(cmd), cmd)


class TestEvaluateExistsExact(unittest.TestCase):
    def test_exact_match_only(self):
        self.assertTrue(core.evaluate_check("exists", 0, "exists\n"))
        self.assertFalse(core.evaluate_check("exists", 0, "directory already exists"))
        self.assertFalse(core.evaluate_check("exists", 0, "preexists"))


class TestMaskWidth(unittest.TestCase):
    def test_fixed_width_hides_length(self):
        short = core.mask_secret("sk-or-abcdefg", mask_width=8)
        long = core.mask_secret("sk-or-" + "x" * 60 + "abcde", mask_width=8)
        self.assertEqual(short, "********" + "cdefg")
        self.assertTrue(long.startswith("********"))
        self.assertEqual(long[-5:], "abcde")
        self.assertEqual(len(long.rstrip("abcde").rstrip()), 8)  # fixed prefix

    def test_custom_mask_char_and_visible_zero(self):
        self.assertTrue(core.mask_secret("abcdefgh", mask_char="#").endswith("defgh"))
        self.assertEqual(set(core.mask_secret("abcdef", visible=0)), {"*"})


class TestSpinnerLabels(unittest.TestCase):
    def test_command_spinner_label_precedence(self):
        self.assertEqual(core.command_spinner_label("docker compose build"), "Building Docker images")
        self.assertEqual(core.command_spinner_label("docker ps"), "Running Docker")
        self.assertEqual(core.command_spinner_label("ls -la"), "Running command")

    def test_docker_progress_label_branches(self):
        self.assertTrue(core.docker_progress_label("#5 building").startswith("Building:"))
        self.assertTrue(core.docker_progress_label("Step 3/16 : RUN").startswith("Building:"))
        self.assertTrue(core.docker_progress_label("Pulling fs layer").startswith("Downloading:"))
        self.assertTrue(core.docker_progress_label("Creating bugtrace_api").startswith("Docker:"))
        self.assertIsNone(core.docker_progress_label("   "))
        self.assertIsNone(core.docker_progress_label("random noise line"))


class TestHardenSurvival(unittest.TestCase):
    def test_non_follow_tokens_survive(self):
        # Over-stripping would be a bug: only -f/--follow must be removed.
        safe, mod = core.harden_command("docker compose logs -f --tail=20 web")
        self.assertTrue(mod)
        for tok in ("docker", "compose", "logs", "--tail=20", "web"):
            self.assertIn(tok, safe.split())


class TestResultGuards(unittest.TestCase):
    def test_is_ok_is_err_exclusive(self):
        self.assertFalse(core.is_ok(Err(DomainError("k", "m"))))
        self.assertFalse(core.is_err(Ok(1)))


if __name__ == "__main__":
    unittest.main(verbosity=2)