tok/integration_advanced_test.go at main · GrayCodeAI/tok · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
package tok_test

import (
	"strings"
	"testing"
	"time"

	"github.com/GrayCodeAI/tok"
)

// ---------------------------------------------------------------------------
// 6. Performance - Test that compression completes within reasonable time
// ---------------------------------------------------------------------------

func TestIntegration_Performance_SmallInput(t *testing.T) {
	input := "Short text for performance test."
	deadline := 500 * time.Millisecond

	start := time.Now()
	tok.Compress(input)
	elapsed := time.Since(start)

	if elapsed > deadline {
		t.Errorf("small input compression took %v, expected under %v", elapsed, deadline)
	}
}

func TestIntegration_Performance_MediumInput(t *testing.T) {
	input := strings.Repeat("This is a medium-length sentence for performance testing. ", 100)
	deadline := 2 * time.Second

	start := time.Now()
	tok.Compress(input, tok.Aggressive)
	elapsed := time.Since(start)

	if elapsed > deadline {
		t.Errorf("medium input compression took %v, expected under %v", elapsed, deadline)
	}
}

func TestIntegration_Performance_LargeInput(t *testing.T) {
	// ~100 KB of content
	input := strings.Repeat("This is a line of text that will be compressed by the full pipeline. It has enough content to exercise multiple layers. ", 2000)
	// Race detector adds significant overhead; allow extra headroom when enabled.
	deadline := 10 * time.Second
	if raceEnabled() {
		deadline = 120 * time.Second
	}

	start := time.Now()
	tok.Compress(input, tok.Aggressive)
	elapsed := time.Since(start)

	if elapsed > deadline {
		t.Errorf("large input compression took %v, expected under %v", elapsed, deadline)
	}
}

func TestIntegration_Performance_CodeInput(t *testing.T) {
	var sb strings.Builder
	for i := 0; i < 100; i++ {
		sb.WriteString("func process")
		sb.WriteString(strings.Repeat("x", 10))
		sb.WriteString("(input string) string {\n")
		sb.WriteString("\tresult := strings.TrimSpace(input)\n")
		sb.WriteString("\tresult = strings.ToLower(result)\n")
		sb.WriteString("\treturn result\n")
		sb.WriteString("}\n\n")
	}
	input := sb.String()
	deadline := 5 * time.Second

	start := time.Now()
	tok.Compress(input, tok.Code)
	elapsed := time.Since(start)

	if elapsed > deadline {
		t.Errorf("code input compression took %v, expected under %v", elapsed, deadline)
	}
}

func TestIntegration_Performance_RepeatedCalls(t *testing.T) {
	input := "This is a sentence for testing repeated compression calls with the same input."
	deadline := 5 * time.Second

	start := time.Now()
	for i := 0; i < 100; i++ {
		tok.Compress(input)
	}
	elapsed := time.Since(start)

	if elapsed > deadline {
		t.Errorf("100 repeated compressions took %v, expected under %v", elapsed, deadline)
	}
}

func TestIntegration_Performance_CompressorReuse(t *testing.T) {
	c := tok.NewCompressor(tok.Adaptive)
	inputs := []string{
		"First input to the reusable compressor.",
		"Second input with different content for comparison.",
		"Third input that is a bit longer to test how the pipeline handles varied sizes across calls.",
		strings.Repeat("Fourth input is quite large. ", 50),
	}

	deadline := 5 * time.Second
	start := time.Now()
	for _, input := range inputs {
		output, stats := c.Compress(input)
		if output == "" {
			t.Fatal("reusable compressor returned empty output")
		}
		if stats.OriginalTokens == 0 {
			t.Fatal("reusable compressor returned zero OriginalTokens")
		}
	}
	elapsed := time.Since(start)

	if elapsed > deadline {
		t.Errorf("4 compressions with reused compressor took %v, expected under %v", elapsed, deadline)
	}
}

// ---------------------------------------------------------------------------
// 7. Configuration - Test that pipeline configuration affects output correctly
// ---------------------------------------------------------------------------

func TestIntegration_Configuration_BudgetEnforcement(t *testing.T) {
	input := strings.Repeat("word ", 500) // ~500 words

	_, tightStats := tok.Compress(input, tok.WithBudget(50))
	_, looseStats := tok.Compress(input, tok.WithBudget(500))

	if tightStats.FinalTokens > 70 {
		t.Errorf("tight budget (50) produced %d tokens, expected near 50", tightStats.FinalTokens)
	}
	// Loose budget should preserve more tokens
	if looseStats.FinalTokens < tightStats.FinalTokens {
		t.Errorf("loose budget (%d) produced fewer tokens (%d) than tight budget (%d)",
			500, looseStats.FinalTokens, tightStats.FinalTokens)
	}
}

func TestIntegration_Configuration_MinimalVsAggressive(t *testing.T) {
	input := strings.Repeat("This is test content for comparing compression modes. ", 100)

	_, minimalStats := tok.Compress(input, tok.Minimal)
	_, aggressiveStats := tok.Compress(input, tok.Aggressive)

	if minimalStats.OriginalTokens == 0 || aggressiveStats.OriginalTokens == 0 {
		t.Fatal("OriginalTokens should be non-zero for both modes")
	}

	// Aggressive mode should produce fewer or equal tokens compared to minimal
	if aggressiveStats.FinalTokens > minimalStats.FinalTokens+10 { // allow small margin
		t.Errorf("aggressive mode (%d tokens) should not produce more than minimal mode (%d tokens)",
			aggressiveStats.FinalTokens, minimalStats.FinalTokens)
	}
}

func TestIntegration_Configuration_CodeVsLogTier(t *testing.T) {
	codeInput := `func main() {
	fmt.Println("hello")
	x := 42
	return x
}

func helper(s string) string {
	return strings.TrimSpace(s)
}`

	_, codeStats := tok.Compress(codeInput, tok.Code)
	_, logStats := tok.Compress(codeInput, tok.Log)

	// Both should produce non-empty output
	if codeStats.OriginalTokens == 0 || logStats.OriginalTokens == 0 {
		t.Fatal("both tiers should report non-zero OriginalTokens")
	}
	// Stats may differ based on tier-specific layers
	if codeStats.FinalTokens == 0 {
		t.Error("code tier should produce non-zero FinalTokens")
	}
	if logStats.FinalTokens == 0 {
		t.Error("log tier should produce non-zero FinalTokens")
	}
}

func TestIntegration_Configuration_WithAndWithoutQuery(t *testing.T) {
	input := `[INFO] Application started
[ERROR] Failed to connect to database
[WARN] Retrying connection
[INFO] Connected successfully
[ERROR] Timeout on request to /api/data
[INFO] Retrying request
[INFO] Request succeeded`

	_, noQueryStats := tok.Compress(input)
	_, withQueryStats := tok.Compress(input, tok.WithQuery("database errors"))

	if noQueryStats.OriginalTokens == 0 || withQueryStats.OriginalTokens == 0 {
		t.Fatal("OriginalTokens should be non-zero")
	}
	// Query-aware compression may produce different results (more or fewer tokens
	// depending on relevance scoring); both should be valid
	if noQueryStats.FinalTokens == 0 {
		t.Error("no-query compression should produce non-zero FinalTokens")
	}
	if withQueryStats.FinalTokens == 0 {
		t.Error("query-aware compression should produce non-zero FinalTokens")
	}
}

func TestIntegration_Configuration_AdaptiveTier_AutoDetectsContentType(t *testing.T) {
	logInput := strings.Repeat("[INFO] 2026-05-28T10:00:00Z level=info msg=\"request processed\" status=200\n", 100)
	_, logStats := tok.Compress(logInput, tok.Adaptive)
	if logStats.OriginalTokens == 0 {
		t.Fatal("adaptive tier should report non-zero OriginalTokens for log content")
	}

	codeInput := strings.Repeat("func processItem(id int) error {\n\treturn nil\n}\n\n", 50)
	_, codeStats := tok.Compress(codeInput, tok.Adaptive)
	if codeStats.OriginalTokens == 0 {
		t.Fatal("adaptive tier should report non-zero OriginalTokens for code content")
	}
}

func TestIntegration_Configuration_SurfaceTier_FastPath(t *testing.T) {
	input := strings.Repeat("content line for surface tier testing\n", 100)

	start := time.Now()
	output, stats := tok.Compress(input, tok.Surface)
	elapsed := time.Since(start)

	if output == "" {
		t.Fatal("surface tier returned empty output")
	}
	if stats.OriginalTokens == 0 {
		t.Fatal("surface tier should report non-zero OriginalTokens")
	}
	// Surface tier (4 layers) should be fast
	if elapsed > 2*time.Second {
		t.Errorf("surface tier took %v, expected under 2s for medium input", elapsed)
	}
}

func TestIntegration_Configuration_StatsLayerBreakdown(t *testing.T) {
	input := strings.Repeat("test content for layer stats verification\n", 100)
	_, stats := tok.Compress(input, tok.Minimal)

	if stats.Layers == nil {
		t.Fatal("stats.Layers should not be nil")
	}
	if len(stats.Layers) == 0 {
		t.Error("stats.Layers should contain at least one layer stat")
	}
	// Verify that reported layers have reasonable values
	for name, ls := range stats.Layers {
		if ls.TokensSaved < 0 {
			t.Errorf("layer %q has negative TokensSaved: %d", name, ls.TokensSaved)
		}
		if ls.DurationMs < 0 {
			t.Errorf("layer %q has negative DurationMs: %d", name, ls.DurationMs)
		}
	}
}

// ---------------------------------------------------------------------------
// Additional integration tests: Compressor reuse, concurrent safety
// ---------------------------------------------------------------------------

func TestIntegration_Compressor_ReuseAcrossInputs(t *testing.T) {
	c := tok.NewCompressor(tok.Minimal)

	inputs := []string{
		"First unique input for reuse testing.",
		"Second completely different content here.",
		"",
		"Fourth input after empty.",
	}

	for i, input := range inputs {
		output, stats := c.Compress(input)
		if input == "" {
			if output != "" {
				t.Errorf("call %d: empty input should produce empty output", i)
			}
			continue
		}
		if output == "" {
			t.Errorf("call %d: non-empty input produced empty output", i)
		}
		if stats.OriginalTokens == 0 {
			t.Errorf("call %d: OriginalTokens is zero for non-empty input", i)
		}
	}
}

func TestIntegration_ConcurrentCompression(t *testing.T) {
	input := strings.Repeat("concurrent safety test content with enough words\n", 50)

	done := make(chan bool, 20)
	for i := 0; i < 20; i++ {
		go func() {
			output, stats := tok.Compress(input)
			if output == "" {
				t.Error("concurrent compression returned empty output")
			}
			if stats.OriginalTokens == 0 {
				t.Error("concurrent compression returned zero OriginalTokens")
			}
			done <- true
		}()
	}

	for i := 0; i < 20; i++ {
		select {
		case <-done:
		case <-time.After(10 * time.Second):
			t.Fatal("concurrent compression timed out")
		}
	}
}

func TestIntegration_ConcurrentCompressor(t *testing.T) {
	c := tok.NewCompressor(tok.Adaptive)
	input := strings.Repeat("compressor concurrent test input with enough content\n", 50)

	done := make(chan bool, 10)
	for i := 0; i < 10; i++ {
		go func() {
			output, stats := c.Compress(input)
			if output == "" {
				t.Error("concurrent compressor returned empty output")
			}
			if stats.OriginalTokens == 0 {
				t.Error("concurrent compressor returned zero OriginalTokens")
			}
			done <- true
		}()
	}

	for i := 0; i < 10; i++ {
		select {
		case <-done:
		case <-time.After(10 * time.Second):
			t.Fatal("concurrent compressor timed out")
		}
	}
}

// ---------------------------------------------------------------------------
// Additional integration tests: CompactionSchema and BuildCompactionPrompt
// ---------------------------------------------------------------------------

func TestIntegration_CompactionSchema_ToPrompt(t *testing.T) {
	schema := &tok.CompactionSchema{
		TaskOverview:         "Implement compression pipeline tests",
		CurrentState:         "Writing integration tests for the tok package",
		ImportantDiscoveries: []string{"Pipeline has 20 layers", "Supports multiple tiers"},
		NextSteps:            []string{"Run tests", "Fix failures"},
		ContextToPreserve:    []string{"File: integration_test.go", "Module: github.com/GrayCodeAI/tok"},
	}

	prompt := schema.ToPrompt()
	if prompt == "" {
		t.Fatal("ToPrompt returned empty string")
	}
	if !strings.Contains(prompt, "Task Overview") {
		t.Error("prompt should contain 'Task Overview' section")
	}
	if !strings.Contains(prompt, "Important Discoveries") {
		t.Error("prompt should contain 'Important Discoveries' section")
	}
	if !strings.Contains(prompt, "compression pipeline") {
		t.Error("prompt should contain the task overview text")
	}
}

func TestIntegration_CompactionSchema_ParseResponse(t *testing.T) {
	jsonResponse := `{
		"task_overview": "Building test suite",
		"current_state": "Almost done",
		"important_discoveries": ["Finding 1", "Finding 2"],
		"next_steps": ["Run tests"],
		"context_to_preserve": ["key detail"]
	}`

	schema, err := tok.ParseCompactionResponse(jsonResponse)
	if err != nil {
		t.Fatalf("ParseCompactionResponse failed: %v", err)
	}
	if schema.TaskOverview != "Building test suite" {
		t.Errorf("TaskOverview = %q, want %q", schema.TaskOverview, "Building test suite")
	}
	if len(schema.ImportantDiscoveries) != 2 {
		t.Errorf("ImportantDiscoveries count = %d, want 2", len(schema.ImportantDiscoveries))
	}
}

func TestIntegration_CompactionSchema_ParseMarkdownFencedResponse(t *testing.T) {
	fencedResponse := "```json\n{\n\t\"task_overview\": \"Test\",\n\t\"current_state\": \"Done\"\n}\n```"

	schema, err := tok.ParseCompactionResponse(fencedResponse)
	if err != nil {
		t.Fatalf("ParseCompactionResponse with markdown fences failed: %v", err)
	}
	if schema.TaskOverview != "Test" {
		t.Errorf("TaskOverview = %q, want %q", schema.TaskOverview, "Test")
	}
}

func TestIntegration_BuildCompactionPrompt(t *testing.T) {
	prompt := tok.BuildCompactionPrompt("some context to compress", 0)
	if prompt == "" {
		t.Fatal("BuildCompactionPrompt returned empty string")
	}
	if !strings.Contains(prompt, "some context to compress") {
		t.Error("prompt should contain the input context")
	}
	if !strings.Contains(prompt, "task_overview") {
		t.Error("prompt should contain schema field names")
	}
}

func TestIntegration_BuildCompactionPrompt_Truncation(t *testing.T) {
	longContext := strings.Repeat("x", 10000)
	prompt := tok.BuildCompactionPrompt(longContext, 1000)
	// The context portion should be truncated to maxChars
	if len(prompt) > 2000 { // system prompt + truncated context
		t.Errorf("prompt length %d exceeds expected maximum", len(prompt))
	}
}

// ---------------------------------------------------------------------------
// Additional integration tests: Token estimation precision
// ---------------------------------------------------------------------------

func TestIntegration_TokenEstimation_PreciseVsFast(t *testing.T) {
	// EstimateTokens uses BPE (precise); we verify it returns positive values
	// for a variety of content types
	contents := []string{
		"Hello, world!",
		"func main() { fmt.Println(\"test\") }",
		"The quick brown fox jumps over the lazy dog.",
		strings.Repeat("a", 1000),
		"JSON: {\"key\": \"value\", \"array\": [1, 2, 3]}",
	}

	for _, c := range contents {
		tokens := tok.EstimateTokens(c)
		if tokens <= 0 {
			t.Errorf("EstimateTokens(%q) = %d, want > 0", c[:min(30, len(c))], tokens)
		}
	}
}

func TestIntegration_TokenEstimation_Monotonicity(t *testing.T) {
	// Longer text should generally have more tokens
	short := "Hello"
	medium := "Hello, this is a medium-length sentence for testing."
	long := "Hello, this is a much longer sentence that goes on and on with many more words and details about testing token estimation monotonicity properties."

	shortTok := tok.EstimateTokens(short)
	medTok := tok.EstimateTokens(medium)
	longTok := tok.EstimateTokens(long)

	if medTok < shortTok {
		t.Errorf("medium tokens (%d) should be >= short tokens (%d)", medTok, shortTok)
	}
	if longTok < medTok {
		t.Errorf("long tokens (%d) should be >= medium tokens (%d)", longTok, medTok)
	}
}