ik_llama.cpp/tests/test-function-calls.cpp at main · ProgenyAlpha/ik_llama.cpp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <cassert>
#include <string>
#include <iostream>
#include <chrono>

// Include the function calling parser and streaming support
#include "../examples/server/function_calls.hpp"
#include "../examples/server/streaming_chat.hpp"
#include "../common/chat-parser.h"

// Stub definitions for server variables (needed for json-partial.cpp)
bool server_verbose = false;
bool server_log_json = false;

// Test data for native Kimi-K2 token format
const std::string token_response = R"(I'll help you check the weather.

<|tool_calls_section_begin|>
<|tool_call_begin|>
functions.get_weather:0<|tool_call_argument_begin|>
{"location": "Tokyo"}
<|tool_call_end|>
<|tool_calls_section_end|>

Let me get that information for you.)";

const std::string multiple_token_calls = R"(I'll help you with both tasks.

<|tool_calls_section_begin|>
<|tool_call_begin|>
functions.get_weather:0<|tool_call_argument_begin|>
{"location": "Tokyo"}
<|tool_call_end|>
<|tool_call_begin|>
functions.calculate:1<|tool_call_argument_begin|>
{"expression": "15 * 23"}
<|tool_call_end|>
<|tool_calls_section_end|>

Here are the results.)";

const std::string malformed_token_response = R"(I'll check the weather.

<|tool_calls_section_begin|>
<|tool_call_begin|>
functions.get_weather:0<|tool_call_argument_begin|>
{"location": "Tokyo"}
<!-- Missing closing tag -->

Let me help you.)";

const std::string no_function_calls = R"(I can help you with that. The weather in Tokyo is usually quite pleasant this time of year.)";

// Test data for simple function call format
const std::string simple_function_call = R"(functions.ping:0{"domain": "google.de"})";

const std::string simple_multiple_calls = R"(functions.calculate:0{"expression": "15 * 23"}functions.ping:1{"domain": "google.com"})";

const std::string partial_function_call = R"(functions.get_weather:0{"location": "Tok)";

const std::string malformed_simple_call = R"(functions.invalid:0{invalid json})";

const std::string empty_function_name = R"(functions.:0{"param": "value"})";

// Test data for streaming scenarios
const std::string streaming_incremental_1 = R"(I'll help you with that.)";
const std::string streaming_incremental_2 = R"(I'll help you with that. functions.ping:0{"domain": ")";
const std::string streaming_incremental_3 = R"(I'll help you with that. functions.ping:0{"domain": "google.de"})";

const std::string streaming_with_content = R"(I'll ping the domain for you. functions.ping:0{"domain": "google.de"} The request has been sent.)";

const std::string streaming_unicode = R"(Testing unicode: 测试 functions.test:0{"message": "こんにちは world 🌍"})";

const std::string streaming_large_args = R"(functions.process:0{"data": ")" + std::string(10000, 'x') + R"("})";

const std::string streaming_nested_json = R"(functions.complex:0{"config": {"nested": {"deep": {"value": 42}}, "array": [1, 2, 3]}})";

const std::string streaming_special_chars = R"(functions.special:0{"text": "Line 1\nLine 2\tTabbed \"Quoted\" 'Single' \\Backslash"})";

const std::string streaming_empty_args = R"(functions.empty:0{})";

const std::string streaming_null_args = R"(functions.nulltest:0{"value": null, "array": [null, 1, null]})";

const std::string streaming_boolean_args = R"(functions.booltest:0{"enabled": true, "disabled": false, "count": 0})";

const std::string streaming_content_only = R"(This is just regular content without any tool calls.)";

const std::string streaming_mixed_format = R"(<|tool_calls_section_begin|>
<|tool_call_begin|>
functions.get_weather:0<|tool_call_argument_begin|>
{"location": "Tokyo"}
<|tool_call_end|>
<|tool_calls_section_end|>
Also: functions.ping:1{"host": "example.com"})";

const std::string streaming_no_args = R"(functions.noargs:0)";

const std::string streaming_incomplete_json = R"(functions.incomplete:0{"started": "but not finished")";

const std::string streaming_very_long_name = R"(functions.)" + std::string(1000, 'a') + R"(:0{"test": true})";

const std::string streaming_empty_function_content = R"(functions.:0{"empty": "name"})";

const std::string streaming_invalid_index = R"(functions.test:abc{"invalid": "index"})";

const std::string streaming_negative_index = R"(functions.test:-1{"negative": "index"})";

const std::string streaming_missing_colon = R"(functions.test0{"missing": "colon"})";

const std::string streaming_missing_brace = R"(functions.test:0"missing": "brace")";

const std::string streaming_extra_brace = R"(functions.test:0{"extra": "brace"}})";

const std::string streaming_control_chars = R"(functions.control:0{"data": "\u0000\u0001\u0002\u0003"})";

const std::string streaming_emoji_args = R"(functions.emoji:0{"message": "Hello 👋 World 🌍 Test 🚀"})";

const std::string streaming_multiple_incremental_steps = R"(Let me help you.
functions.step1:0{"action": "initialize"}
Then I'll do this:
functions.step2:1{"action": "process", "data": [1, 2, 3]}
Finally:
functions.step3:2{"action": "finalize", "result": "complete"})";

// Malformed test cases for edge cases
const std::string malformed_no_closing_brace = R"(functions.test:0{"key": "value")";
const std::string malformed_invalid_json_chars = R"(functions.test:0{key: value})";
const std::string malformed_unescaped_quotes = R"(functions.test:0{"message": "Hello "world""})";
const std::string malformed_trailing_comma = R"(functions.test:0{"key": "value",})";
const std::string malformed_duplicate_keys = R"(functions.test:0{"key": "value1", "key": "value2"})";

// Error recovery test cases
const std::string error_recovery_partial = R"(Good content here functions.broken:0{invalid then more good content.)";
const std::string error_recovery_mixed = R"(functions.good:0{"valid": true} some text functions.bad:1{broken} functions.good2:2{"also": "valid"})";
const std::string error_recovery_empty_then_good = R"(functions.:0{} functions.good:1{"valid": true})";

// Performance test cases
const std::string performance_many_small_calls = R"(functions.a:0{"x":1}functions.b:1{"x":2}functions.c:2{"x":3}functions.d:3{"x":4}functions.e:4{"x":5})";
const std::string performance_deeply_nested = R"(functions.deep:0{"a":{"b":{"c":{"d":{"e":{"f":{"g":{"h":{"i":{"j":"deep"}}}}}}}}})";

// Content cleaning test cases
const std::string content_cleaning_simple = R"(I'll ping the domain. functions.ping:0{"domain": "google.de"} Request sent.)";
const std::string content_cleaning_multiple = R"(Processing: functions.step1:0{"action": "start"} functions.step2:1{"action": "end"} Done.)";
const std::string content_cleaning_mixed_formats = R"(First: <|tool_calls_section_begin|><|tool_call_begin|>functions.weather:0<|tool_call_argument_begin|>{"location": "NYC"}<|tool_call_end|><|tool_calls_section_end|> Then: functions.ping:1{"host": "test.com"} Finished.)";

// TDD: Reproduction of exact contamination issue from server logs
// From manual_logs/kimi-k2/ls/test_case_ls_logs_claude-code-ui.log:5
const std::string contamination_ls_issue = R"(I'll help you examine the workspace. Let me list the current directory contents.functions.LS:1{"path": "/tmp/example_workspace"})";
const std::string expected_clean_ls = R"(I'll help you examine the workspace. Let me list the current directory contents.)";

// DeepSeek R1 test data
const std::string deepseek_r1_simple = R"(<think>Need weather.</think>I'll check weather.

<｜tool▁calls▁begin｜>
<｜tool▁call▁begin｜>
function<｜tool▁sep｜>get_weather
```json
{"location": "Tokyo"}
```
<｜tool▁call▁end｜>
<｜tool▁calls▁end｜>

Getting weather info.)";

const std::string deepseek_r1_multiple = R"(<think>Weather and math.</think>Doing both tasks.

<｜tool▁calls▁begin｜>
<｜tool▁call▁begin｜>
function<｜tool▁sep｜>get_weather
```json
{"location": "Tokyo"}
```
<｜tool▁call▁end｜>
<｜tool▁call▁begin｜>
function<｜tool▁sep｜>calculate
```json
{"expression": "15 * 23"}
```
<｜tool▁call▁end｜>
<｜tool▁calls▁end｜>

Results complete.)";

const std::string deepseek_r1_no_reasoning = R"(Checking weather.

<｜tool▁calls▁begin｜>
<｜tool▁call▁begin｜>
function<｜tool▁sep｜>get_weather
```json
{"location": "Tokyo"}
```
<｜tool▁call▁end｜>
<｜tool▁calls▁end｜>

Done.)";

const std::string deepseek_r1_reasoning_only = R"(<think>Just thinking, no tools needed.</think>Here's my direct response.)";

// DeepSeek R1 format without separator (actual format sometimes generated by models)
const std::string deepseek_r1_no_separator = R"(I'll help you add the new cleaning step for resetting device orientation. Let me break this down into tasks:

<｜tool▁calls▁begin｜>
<｜tool▁call▁begin｜>
function<TodoWrite>
```json
{
  "items": [
    {
      "description": "Create ResetOrientation cleaning step class",
      "status": "pending"
    },
    {
      "description": "Implement Android orientation reset using provided ADB command",
      "status": "pending"
    }
  ]
}
```
<｜tool▁call▁end｜>
<｜tool▁calls▁end｜>)";

// Advanced partial detection test cases based on original llama.cpp patterns
// TDD: Advanced partial detection - streaming edge cases
const std::string partial_incomplete_function_name = R"(Let me help you with that. func)";
const std::string partial_incomplete_function_prefix = R"(Let me help you with that. functions)";
const std::string partial_incomplete_function_call = R"(Let me help you with that. functions.)";
const std::string partial_incomplete_function_with_name = R"(Let me help you with that. functions.ls)";
const std::string partial_incomplete_function_with_colon = R"(Let me help you with that. functions.ls:)";
const std::string partial_incomplete_function_with_id = R"(Let me help you with that. functions.ls:1)";
const std::string partial_incomplete_json_opening = R"(Let me help you with that. functions.ls:1{)";
const std::string partial_incomplete_json_partial = R"(Let me help you with that. functions.ls:1{"path)";
const std::string partial_incomplete_json_value = R"(Let me help you with that. functions.ls:1{"path":)";
const std::string partial_incomplete_json_quote = R"(Let me help you with that. functions.ls:1{"path": ")";
const std::string partial_incomplete_json_string = R"(Let me help you with that. functions.ls:1{"path": "/us)";
const std::string partial_multiple_incomplete = R"(First functions.step1:0{"data": "test"} then functions.step2:1{)";

// TDD: Token format partial detection
const std::string partial_token_opening = R"(I'll search for files. <|tool_calls_section_begin|>)";
const std::string partial_token_call_start = R"(I'll search for files. <|tool_calls_section_begin|><|tool_call_begin|>)";
const std::string partial_token_incomplete = R"(I'll search for files. <|tool_calls_section_begin|><|tool_call_begin|>functions.find:0<|tool_call_argument_begin|>{"query)";

// TDD: Mixed format edge cases
const std::string partial_mixed_formats = R"(Processing: <|tool_calls_section_begin|><|tool_call_begin|>functions.step1:0<|tool_call_argument_begin|>{"action": "start"}<|tool_call_end|><|tool_calls_section_end|> then functions.step2:1{)";
const std::string partial_unicode_edge_case = R"(Analysis: functions.analyze:0{"text": "héllo wørld unicode test 中文)";
const std::string partial_nested_braces = R"(Complex: functions.process:0{"config": {"nested": {"value": )";
const std::string partial_escaped_json = R"(Escape test: functions.escape:0{"text": "quote \" and backslash \\)"; // INCOMPLETE - missing closing quote and brace

// Additional contamination test cases for different scenarios
const std::string contamination_partial_streaming = R"(I'll help you examine the workspace. Let me list the current directory contents.functions.LS:)";
const std::string contamination_incomplete_json = R"(I'll help you examine the workspace. Let me list the current directory contents.functions.LS:1{"path": "/Users)";
const std::string contamination_mixed_content = R"(Starting task. functions.TASK:1{"id": "test123"} Processing files. functions.LIST:2{"dir": "/workspace"} Task completed.)";
const std::string contamination_mixed_expected_clean = R"(Starting task.  Processing files.  Task completed.)";

// Unicode and international test cases
const std::string unicode_function_args = R"(functions.translate:0{"text": "Hello", "from": "en", "to": "ja", "result": "こんにちは"})";
const std::string unicode_mixed_languages = R"(functions.process:0{"chinese": "你好", "japanese": "こんにちは", "korean": "안녕하세요", "arabic": "مرحبا", "hebrew": "שלום"})";
const std::string unicode_emojis_complex = R"(functions.social:0{"post": "🎉 New release! 🚀 Check it out: https://example.com 📱💻🌐", "tags": ["🎉", "🚀", "📱"]})";

// Boundary value test cases
const std::string boundary_zero_length_args = R"(functions.test:0{})";
const std::string boundary_single_char_args = R"(functions.test:0{"a":"b"})";
const std::string boundary_max_index = R"(functions.test:4294967295{"max": "index"})";

// Whitespace and formatting test cases
const std::string whitespace_extra_spaces = R"(   functions.test:0   {   "key"   :   "value"   }   )";
const std::string whitespace_tabs_newlines = R"(functions.test:0{
    "key": "value",
    "nested": {
        "inner": "data"
    }
})";
const std::string whitespace_no_spaces = R"(functions.test:0{"key":"value","number":123,"boolean":true})";

// Multiple function calls with mixed success/failure
const std::string mixed_success_failure = R"(functions.good1:0{"valid": true}functions.bad:1{invalidjson}functions.good2:2{"also": "valid"}functions.:3{"empty": "name"}functions.good3:4{"final": "valid"})";

// Edge case: function name with numbers and underscores
const std::string function_name_variations = R"(functions.test_function_123:0{"test": true}functions.another_test:1{"value": 42}functions.func123:2{"mixed": "chars"})";

// Edge case: very long argument values
const std::string long_argument_values = R"(functions.longtest:0{"short": "value", "medium": ")" + std::string(1000, 'x') + R"(", "long": ")" + std::string(10000, 'y') + R"("})";

// Edge case: deeply nested arrays and objects
const std::string deeply_nested_structures = R"(functions.nested:0{"level1": {"level2": {"level3": {"level4": {"level5": {"data": [[[[[1]]]]], "deep": true}}}}, "arrays": [1, [2, [3, [4, [5, [6, [7, [8, [9, [10]]]]]]]]]})";

// Edge case: all JSON data types
const std::string all_json_types = R"(functions.types:0{"string": "text", "number": 42, "float": 3.14, "boolean_true": true, "boolean_false": false, "null_value": null, "array": [1, "two", true, null], "object": {"nested": "value"}})";

// Edge case: escape sequences in strings
const std::string escape_sequences = R"(functions.escape:0{"escaped": "Line 1\\nLine 2\\tTabbed \\\"Quoted\\\" \\'Single\\' \\\\Backslash \\/ Slash", "unicode": "\\u0048\\u0065\\u006c\\u006c\\u006f"})";

// Edge case: empty content with tool calls
const std::string empty_content_with_tools = R"(functions.tool:0{"action": "execute"})";

// Edge case: content before and after tool calls
const std::string content_before_after = R"(Starting the process. functions.middle:0{"step": "processing"} Process completed successfully.)";

// Edge case: multiple tool calls of same function
const std::string same_function_multiple = R"(functions.ping:0{"host": "server1.com"}functions.ping:1{"host": "server2.com"}functions.ping:2{"host": "server3.com"})";

// Edge case: tool calls with no content
const std::string tools_no_content = R"(functions.silent:0{"quiet": true}functions.background:1{"hidden": true})";

// Edge case: interleaved content and tools
const std::string interleaved_content_tools = R"(First I'll functions.step1:0{"action": "start"} then some explanation functions.step2:1{"action": "continue"} and finally functions.step3:2{"action": "finish"} all done.)";

// Edge case: function calls at boundaries
const std::string function_at_start = R"(functions.first:0{"position": "start"} This comes after.)";
const std::string function_at_end = R"(This comes before functions.last:0{"position": "end"})";

// Edge case: repeated function names with different indices
const std::string repeated_names = R"(functions.repeat:0{"call": 1}functions.repeat:1{"call": 2}functions.repeat:2{"call": 3})";

// Edge case: zero and negative numbers in arguments
const std::string numeric_edge_cases = R"(functions.numbers:0{"zero": 0, "negative": -42, "float": -3.14159, "scientific": 1.23e-10, "large": 9223372036854775807})";

// Edge case: boolean and null combinations
const std::string boolean_null_combinations = R"(functions.combo:0{"true_value": true, "false_value": false, "null_value": null, "mixed_array": [true, false, null, 1, "string"]})";

// Edge case: empty arrays and objects
const std::string empty_structures = R"(functions.empty:0{"empty_object": {}, "empty_array": [], "nested_empty": {"obj": {}, "arr": []}})";

// Edge case: single character values
const std::string single_char_values = R"(functions.chars:0{"a": "b", "c": "d", "e": "f", "space": " ", "tab": "\t", "newline": "\n"})";

// Edge case: JSON with comments (should be invalid but test robustness)
const std::string json_with_comments = R"(functions.test:0{/* comment */ "key": "value" // line comment
})";

// Edge case: mixed quote types (should be invalid)
const std::string mixed_quotes = R"(functions.test:0{'single': "double", "mixed': 'quotes'})";

// Edge case: function calls in different contexts
const std::string different_contexts = R"(
Context 1: Here's a tool call functions.context1:0{"location": "start"}
Context 2: Another one functions.context2:1{"location": "middle"} with text
Context 3: functions.context3:2{"location": "end"}
)";

// Edge case: streaming simulation (incremental building)
const std::string streaming_step1 = R"(I'll help you. functions.ping:0{"domain": ")";
const std::string streaming_step2 = R"(I'll help you. functions.ping:0{"domain": "google)"; // INCOMPLETE
const std::string streaming_step3 = R"(I'll help you. functions.ping:0{"domain": "google.de"})";
const std::string streaming_step4 = R"(I'll help you. functions.ping:0{"domain": "google.de"} Done.)";

// Edge case: recovery after partial function calls
const std::string recovery_after_partial = R"(functions.partial:0{"incomplete": then normal text continues here.)";

// Edge case: very long function names
const std::string very_long_function_name = R"(functions.)" + std::string(500, 'a') + R"(:0{"test": "long name"})";

// Edge case: function call with only closing brace
const std::string only_closing_brace = R"(functions.test:0})";

// Edge case: function call with only opening brace
const std::string only_opening_brace = R"(functions.test:0{)";

// Edge case: multiple consecutive function calls
const std::string consecutive_calls = R"(functions.a:0{"x":1}functions.b:1{"x":2}functions.c:2{"x":3}functions.d:3{"x":4}functions.e:4{"x":5}functions.f:5{"x":6}functions.g:6{"x":7}functions.h:7{"x":8}functions.i:8{"x":9}functions.j:9{"x":10})";

// Edge case: function calls with array-only arguments
const std::string array_only_args = R"(functions.arrays:0[1, 2, 3, "test", true, null])";

// Edge case: function calls with number-only arguments
const std::string number_only_args = R"(functions.number:042)";

// Edge case: function calls with string-only arguments
const std::string string_only_args = R"(functions.string:0"just a string")";

// Edge case: function calls with boolean-only arguments
const std::string boolean_only_args = R"(functions.bool:0true)";

// Edge case: function calls with null-only arguments
const std::string null_only_args = R"(functions.null:0null)";

// Qwen3 XML format test data (Hermes-style XML tool calls)
const std::string qwen3_single_tool_call = R"(I'll help you check the weather for Tokyo.

<tool_call>
{"name": "get_weather", "arguments": {"location": "Tokyo", "units": "celsius"}}
</tool_call>

Let me fetch that information for you.)";

const std::string qwen3_multiple_tool_calls = R"(I'll help you with both tasks.

<tool_call>
{"name": "get_weather", "arguments": {"location": "Tokyo"}}
</tool_call>

<tool_call>
{"name": "calculate", "arguments": {"expression": "15 * 23"}}
</tool_call>

Here are the results.)";

const std::string qwen3_malformed_json = R"(I'll try to help but this has bad JSON.

<tool_call>
{"name": "test", "arguments": {bad json}}
</tool_call>

Sorry about that.)";

const std::string qwen3_missing_fields = R"(Testing missing required fields.

<tool_call>
{"arguments": {"param": "value"}}
</tool_call>

<tool_call>
{"name": "", "arguments": {"param": "value"}}
</tool_call>)";

const std::string qwen3_empty_arguments = R"(Testing empty arguments.

<tool_call>
{"name": "empty_test", "arguments": {}}
</tool_call>)";

const std::string qwen3_string_arguments = R"(Testing string arguments format.

<tool_call>
{"name": "string_args", "arguments": "{\"key\": \"value\"}"}
</tool_call>)";

const std::string qwen3_nested_json = R"(Testing complex nested JSON.

<tool_call>
{"name": "complex", "arguments": {"config": {"nested": {"deep": {"value": 42}}, "array": [1, 2, 3]}, "metadata": {"enabled": true, "null_field": null}}}
</tool_call>)";

const std::string qwen3_unicode_content = R"(Testing unicode content with Japanese characters.

<tool_call>
{"name": "translate", "arguments": {"text": "こんにちは世界", "from": "ja", "to": "en"}}
</tool_call>

Translation completed.)";

const std::string qwen3_streaming_partial_1 = R"(I'll help you with that. <tool_call>)";
const std::string qwen3_streaming_partial_2 = R"(I'll help you with that. <tool_call>
{"name": "ping")";
const std::string qwen3_streaming_partial_3 = R"(I'll help you with that. <tool_call>
{"name": "ping", "arguments": {"domain": "google.de"})";
const std::string qwen3_streaming_complete = R"(I'll help you with that. <tool_call>
{"name": "ping", "arguments": {"domain": "google.de"}}
</tool_call>)";

const std::string qwen3_no_tool_calls = R"(This is just regular content without any XML tool calls. It should be parsed normally.)";

const std::string qwen3_incomplete_closing_tag = R"(Testing incomplete closing tag.

<tool_call>
{"name": "test", "arguments": {"param": "value"}}
</tool_cal)";

const std::string qwen3_whitespace_variations = R"(Testing whitespace handling.

<tool_call>
   {"name": "whitespace_test", "arguments": {"param": "value"}}
</tool_call>

<tool_call>
{"name":"no_spaces","arguments":{"compact":true}}
</tool_call>)";

const std::string qwen3_mixed_with_kimi = R"(Mixed format testing.

<|tool_calls_section_begin|>
<|tool_call_begin|>
functions.get_weather:0<|tool_call_argument_begin|>
{"location": "Tokyo"}
<|tool_call_end|>
<|tool_calls_section_end|>

<tool_call>
{"name": "calculate", "arguments": {"expression": "2 + 2"}}
</tool_call>)";

const std::string qwen3_model_detection_tests[] = {
    "qwen3-7b",
    "Qwen-3-8B",
    "qwen_3.5-instruct",
    "QWEN3-CHAT",
    "my-qwen3-model",
    "qwen-3-turbo",
    "custom_qwen_3_finetune"
};

// Complex real-world scenarios
const std::string real_world_api_call = R"(I'll make an API call for you. functions.http_request:0{"method": "POST", "url": "https://api.example.com/v1/users", "headers": {"Content-Type": "application/json", "Authorization": "Bearer abc123"}, "body": {"name": "John Doe", "email": "john@example.com", "preferences": {"notifications": true, "theme": "dark"}}} Request completed.)";

const std::string real_world_data_processing = R"(Processing the data: functions.process_data:0{"input_file": "/path/to/data.csv", "operations": [{"type": "filter", "column": "status", "value": "active"}, {"type": "sort", "column": "created_at", "order": "desc"}, {"type": "limit", "count": 100}], "output_format": "json"} functions.save_results:1{"path": "/path/to/output.json", "compress": true} Processing complete.)";

const std::string real_world_multi_step = R"(I'll help you with this multi-step process:

Step 1 - Authentication:
functions.authenticate:0{"service": "oauth2", "client_id": "abc123", "scopes": ["read", "write"]}

Step 2 - Data retrieval:
functions.fetch_data:1{"endpoint": "/api/v2/datasets", "filters": {"category": "analytics", "date_range": {"start": "2024-01-01", "end": "2024-12-31"}}, "pagination": {"page": 1, "limit": 50}}

Step 3 - Data transformation:
functions.transform_data:2{"operations": [{"type": "aggregate", "group_by": ["category", "month"], "metrics": ["sum", "avg", "count"]}, {"type": "normalize", "method": "z-score"}], "output_schema": "enhanced"}

Step 4 - Export results:
functions.export_data:3{"format": "xlsx", "sheets": {"summary": "aggregated_data", "details": "raw_data"}, "destination": {"type": "s3", "bucket": "data-exports", "path": "analytics/2024/"}}

All steps completed successfully!)";

// Stress test cases
const std::string stress_test_many_calls = []() {
    std::string result = "Stress testing with many function calls: ";
    for (int i = 0; i < 100; ++i) {
        result += "functions.test" + std::to_string(i) + ":" + std::to_string(i) + R"({"iteration": )" + std::to_string(i) + R"(, "data": "test_data_)" + std::to_string(i) + R"("})";
    }
    return result;
}();

const std::string stress_test_large_json = R"(functions.large:0{"data": ")" + std::string(100000, 'x') + R"(", "metadata": {"size": 100000, "type": "stress_test"}})";

const std::string stress_test_deep_nesting = []() {
    std::string nested = R"({"level0": )";
    for (int i = 1; i <= 100; ++i) {
        nested += R"({"level)" + std::to_string(i) + R"(": )";
    }
    nested += R"("deep_value")";
    for (int i = 0; i <= 100; ++i) {
        nested += "}";
    }
    return "functions.deep:0" + nested;
}();

// Test helper
void test_assert(bool condition, const std::string& test_name) {
    if (condition) {
        std::cout << "✅ PASS: " << test_name << std::endl;
    } else {
        std::cout << "❌ FAIL: " << test_name << std::endl;
        assert(false);
    }
}

// Test cases
void test_native_token_format() {
    json result = parse_kimi_k2_tool_calls(token_response);

    test_assert(result.is_array(), "Native Token: Result is array");
    test_assert(result.size() == 1, "Native Token: Single function call");

    if (result.size() > 0) {
        json tool_call = result[0];
        test_assert(tool_call["type"] == "function", "Native Token: Correct type");
        test_assert(tool_call["id"] == "functions.get_weather:0", "Native Token: Correct ID");

        json function = tool_call["function"];
        test_assert(function["name"] == "get_weather", "Native Token: Correct function name");

        // Arguments should be JSON string
        std::string args_str = function["arguments"];
        json args = json::parse(args_str);
        test_assert(args["location"] == "Tokyo", "Native Token: Correct location argument");
    }
}

void test_no_function_calls() {
    json result = parse_kimi_k2_tool_calls(no_function_calls);

    test_assert(result.is_array(), "No function calls: Result is array");
    test_assert(result.size() == 0, "No function calls: Empty array");
}

void test_multiple_function_calls() {
    json result = parse_kimi_k2_tool_calls(multiple_token_calls);

    test_assert(result.is_array(), "Multiple calls: Result is array");
    test_assert(result.size() == 2, "Multiple calls: Two function calls");

    if (result.size() >= 2) {
        json first_call = result[0];
        json second_call = result[1];

        test_assert(first_call["function"]["name"] == "get_weather", "Multiple calls: First function name");
        test_assert(second_call["function"]["name"] == "calculate", "Multiple calls: Second function name");
        test_assert(first_call["id"] == "functions.get_weather:0", "Multiple calls: First ID");
        test_assert(second_call["id"] == "functions.calculate:1", "Multiple calls: Second ID");
    }
}

void test_malformed_input() {
    json result = parse_kimi_k2_tool_calls(malformed_token_response);

    test_assert(result.is_array(), "Malformed input: Result is array");
    test_assert(result.size() == 0, "Malformed input: Empty array for malformed input");
}

// Test simple function call format
void test_simple_function_calls() {
    json result = parse_kimi_k2_tool_calls(simple_function_call);

    test_assert(result.is_array(), "Simple: Result is array");
    test_assert(result.size() == 1, "Simple: Single function call");

    if (result.size() > 0) {
        json tool_call = result[0];
        test_assert(tool_call["type"] == "function", "Simple: Correct type");
        test_assert(tool_call["function"]["name"] == "ping", "Simple: Correct function name");

        std::string args_str = tool_call["function"]["arguments"];
        json args = json::parse(args_str);
        test_assert(args["domain"] == "google.de", "Simple: Correct domain argument");
    }
}

void test_simple_multiple_calls() {
    json result = parse_kimi_k2_tool_calls(simple_multiple_calls);

    test_assert(result.is_array(), "Simple Multiple: Result is array");
    test_assert(result.size() == 2, "Simple Multiple: Two function calls");

    if (result.size() >= 2) {
        test_assert(result[0]["function"]["name"] == "calculate", "Simple Multiple: First function name");
        test_assert(result[1]["function"]["name"] == "ping", "Simple Multiple: Second function name");
    }
}

// Test streaming incremental parsing
void test_streaming_incremental() {
    ik_chat_msg msg1 = parse_chat_message_incremental(streaming_incremental_1, true);
    test_assert(msg1.tool_calls.empty(), "Streaming 1: No tool calls");
    test_assert(!msg1.content.empty(), "Streaming 1: Has content");

    ik_chat_msg msg2 = parse_chat_message_incremental(streaming_incremental_2, true);
    test_assert(msg2.tool_calls.empty(), "Streaming 2: No complete tool calls yet");

    ik_chat_msg msg3 = parse_chat_message_incremental(streaming_incremental_3, false);
    test_assert(msg3.tool_calls.size() == 1, "Streaming 3: One complete tool call");
    test_assert(msg3.tool_calls[0].name == "ping", "Streaming 3: Correct function name");
}

// Test differential streaming
void test_streaming_diffs() {
    ik_chat_msg prev;
    prev.role = "assistant";
    prev.content = "I'll help you with that.";

    ik_chat_msg curr;
    curr.role = "assistant";
    curr.content = "I'll help you with that.";
    curr.tool_calls.push_back({"ping", R"({"domain": "google.de"})", "call_1"});

    auto diffs = ik_chat_msg_diff::compute_diffs(prev, curr);
    test_assert(!diffs.empty(), "Diffs: Has differences");
    test_assert(diffs[0].tool_call_index == 0, "Diffs: Correct tool call index");
    test_assert(diffs[0].tool_call_delta.name == "ping", "Diffs: Correct function name");
}

// Test error handling and edge cases
void test_error_handling() {
    // Test malformed JSON
    json result1 = parse_kimi_k2_tool_calls(malformed_simple_call);
    test_assert(result1.size() == 0, "Error: Malformed JSON handled gracefully");

    // Test empty function name
    json result2 = parse_kimi_k2_tool_calls(empty_function_name);
    test_assert(result2.size() == 0, "Error: Empty function name handled gracefully");

    // Test incremental parsing with error
    ik_chat_msg msg = parse_chat_message_incremental(malformed_simple_call, false);
    test_assert(msg.tool_calls.empty(), "Error: Incremental parsing handles errors gracefully");
    test_assert(!msg.content.empty(), "Error: Falls back to content-only");
}

// Test content cleaning
void test_content_cleaning() {
    ik_chat_msg msg = parse_chat_message_incremental(content_cleaning_simple, false);
    test_assert(msg.tool_calls.size() == 1, "Cleaning: Tool call parsed");
    test_assert(msg.tool_calls[0].name == "ping", "Cleaning: Correct function name");

    // Content should be cleaned of function calls
    std::string cleaned_content = msg.content;
    test_assert(cleaned_content.find("functions.ping") == std::string::npos, "Cleaning: Function call removed from content");
    test_assert(cleaned_content.find("I'll ping the domain.") != std::string::npos, "Cleaning: Original content preserved");
    test_assert(cleaned_content.find("Request sent.") != std::string::npos, "Cleaning: Trailing content preserved");
}

// TDD: Test that reproduces exact contamination issue from server logs (SHOULD FAIL initially)
void test_contamination_reproduction() {
    std::cout << "🚨 TDD: Testing exact contamination reproduction from server logs..." << std::endl;

    // Test 1: Exact issue from manual_logs/kimi-k2/ls/test_case_ls_logs_claude-code-ui.log:5
    ik_chat_msg msg = parse_chat_message_incremental(contamination_ls_issue, false);

    // Verify tool call is extracted correctly
    test_assert(msg.tool_calls.size() == 1, "TDD Contamination: Tool call should be extracted");
    test_assert(msg.tool_calls[0].name == "LS", "TDD Contamination: Correct function name extracted");

    std::string expected_args = R"({"path": "/tmp/example_workspace"})";
    test_assert(msg.tool_calls[0].arguments == expected_args, "TDD Contamination: Correct arguments extracted");

    // 🚨 THE CRITICAL TEST: Content should be cleaned of function call syntax
    std::cout << "   Raw content length: " << contamination_ls_issue.length() << std::endl;
    std::cout << "   Parsed content length: " << msg.content.length() << std::endl;
    std::cout << "   Parsed content: '" << msg.content << "'" << std::endl;
    std::cout << "   Expected clean: '" << expected_clean_ls << "'" << std::endl;

    // These should FAIL initially (demonstrating the contamination issue)
    test_assert(msg.content.find("functions.LS:1") == std::string::npos, "TDD Contamination: Function call syntax removed from content");
    test_assert(msg.content == expected_clean_ls, "TDD Contamination: Content matches expected clean version");

    // Test 2: Mixed content with multiple function calls
    ik_chat_msg msg2 = parse_chat_message_incremental(contamination_mixed_content, false);
    test_assert(msg2.tool_calls.size() == 2, "TDD Contamination: Multiple tool calls extracted");
    test_assert(msg2.content.find("functions.") == std::string::npos, "TDD Contamination: No function syntax in mixed content");
    test_assert(msg2.content == contamination_mixed_expected_clean, "TDD Contamination: Mixed content cleaned correctly");

    std::cout << "✅ TDD contamination reproduction test completed" << std::endl;
}

// Test mixed format support
void test_mixed_formats() {
    std::cout << "\n🔍 Debugging Mixed Format Test:" << std::endl;
    std::cout << "Input: " << streaming_mixed_format << std::endl;

    json result = parse_kimi_k2_tool_calls(streaming_mixed_format);

    std::cout << "Result size: " << result.size() << std::endl;
    std::cout << "Result: " << result.dump(2) << std::endl;

    test_assert(result.size() == 2, "Mixed: Two tool calls found");

    if (result.size() >= 2) {
        test_assert(result[0]["function"]["name"] == "get_weather", "Mixed: First function (token format)");
        test_assert(result[1]["function"]["name"] == "ping", "Mixed: Second function (simple format)");
    }
}

// Test Unicode and special characters
void test_unicode_support() {
    json result = parse_kimi_k2_tool_calls(streaming_unicode);
    test_assert(result.size() == 1, "Unicode: Tool call parsed");

    if (result.size() > 0) {
        std::string args_str = result[0]["function"]["arguments"];
        json args = json::parse(args_str);
        std::string message = args["message"];
        test_assert(message.find("こんにちは") != std::string::npos, "Unicode: Japanese characters preserved");
        test_assert(message.find("🌍") != std::string::npos, "Unicode: Emoji preserved");
    }
}

// Test validation and robustness
void test_validation_robustness() {
    // Test various malformed inputs
    test_assert(parse_kimi_k2_tool_calls(malformed_no_closing_brace).empty(), "Validation: Missing brace handled");
    test_assert(parse_kimi_k2_tool_calls(malformed_invalid_json_chars).empty(), "Validation: Invalid JSON handled");
    test_assert(parse_kimi_k2_tool_calls(streaming_missing_colon).empty(), "Validation: Missing colon handled");
    test_assert(parse_kimi_k2_tool_calls(streaming_missing_brace).empty(), "Validation: Missing brace handled");

    // Test partial parsing mode
    ik_chat_msg partial_msg = parse_chat_message_incremental(streaming_incomplete_json, true);
    test_assert(partial_msg.tool_calls.empty(), "Validation: Incomplete JSON in partial mode handled");
}

// Test performance with many calls
void test_performance() {
    json result1 = parse_kimi_k2_tool_calls(performance_many_small_calls);
    test_assert(result1.size() == 5, "Performance: Multiple small calls parsed");

    json result2 = parse_kimi_k2_tool_calls(consecutive_calls);
    test_assert(result2.size() == 10, "Performance: Consecutive calls parsed");

    // Test large arguments
    json result3 = parse_kimi_k2_tool_calls(streaming_large_args);
    test_assert(result3.size() == 1, "Performance: Large arguments handled");
}

// Test streaming chunk generation
void test_streaming_chunks() {
    ik_chat_msg_diff diff;
    diff.content_delta = "Hello world";
    diff.tool_call_index = 0;
    diff.tool_call_delta.name = "test_function";
    diff.tool_call_delta.arguments = R"({"param": "value"})";
    diff.tool_call_delta.id = "call_123";

    std::vector<ik_chat_msg_diff> diffs = {diff};
    auto chunks = generate_streaming_chunks(diffs, "test_completion", "test_model");

    test_assert(!chunks.empty(), "Chunks: Generated successfully");
    test_assert(chunks[0]["object"] == "chat.completion.chunk", "Chunks: Correct object type");
    test_assert(chunks[0]["model"] == "test_model", "Chunks: Correct model");
    test_assert(chunks[0]["id"] == "test_completion", "Chunks: Correct completion ID");

    json delta = chunks[0]["choices"][0]["delta"];
    test_assert(delta.contains("content"), "Chunks: Has content delta");
    test_assert(delta.contains("tool_calls"), "Chunks: Has tool calls delta");
}

// Test real-world scenarios
void test_real_world_scenarios() {
    json result1 = parse_kimi_k2_tool_calls(real_world_api_call);
    test_assert(result1.size() == 1, "Real World: API call parsed");

    json result2 = parse_kimi_k2_tool_calls(real_world_data_processing);
    test_assert(result2.size() == 2, "Real World: Data processing calls parsed");

    json result3 = parse_kimi_k2_tool_calls(real_world_multi_step);
    test_assert(result3.size() == 4, "Real World: Multi-step process parsed");
}

// Test stress scenarios
void test_stress_scenarios() {
    json result1 = parse_kimi_k2_tool_calls(stress_test_many_calls);
    test_assert(result1.size() == 100, "Stress: Many calls handled");

    // Large JSON test
    json result2 = parse_kimi_k2_tool_calls(stress_test_large_json);
    test_assert(result2.size() == 1, "Stress: Large JSON handled");

    // Deep nesting test
    json result3 = parse_kimi_k2_tool_calls(stress_test_deep_nesting);
    test_assert(result3.size() == 1, "Stress: Deep nesting handled");
}

// Test for the streaming vs non-streaming discrepancy issue
void test_streaming_vs_nonstreaming_consistency() {
    // Test data that reproduces the exact issue found in production
    const std::string tool_call_content = R"(functions.WebFetch:1{"url": "https://google.de"})";

    std::cout << "\n🔍 Testing Streaming vs Non-Streaming Consistency Issue:" << std::endl;

    // Test 1: Non-streaming parsing (this works correctly)
    json non_streaming_result = parse_kimi_k2_tool_calls(tool_call_content);

    test_assert(non_streaming_result.is_array(), "Non-streaming: Result is array");
    test_assert(non_streaming_result.size() == 1, "Non-streaming: Single tool call detected");

    if (non_streaming_result.size() > 0) {
        json tool_call = non_streaming_result[0];
        test_assert(tool_call["type"] == "function", "Non-streaming: Correct type");
        test_assert(tool_call["id"] == "functions.WebFetch:1", "Non-streaming: Correct ID");
        test_assert(tool_call["function"]["name"] == "WebFetch", "Non-streaming: Correct function name");

        std::string args_str = tool_call["function"]["arguments"];
        json args = json::parse(args_str);
        test_assert(args["url"] == "https://google.de", "Non-streaming: Correct URL argument");
    }

    // Test 2: Incremental streaming parsing (simulates the issue)
    ik_chat_msg streaming_msg = parse_chat_message_incremental(tool_call_content, false);

    test_assert(!streaming_msg.tool_calls.empty(), "Streaming: Tool calls detected in incremental parsing");
    test_assert(streaming_msg.tool_calls.size() == 1, "Streaming: Single tool call in incremental parsing");

    if (!streaming_msg.tool_calls.empty()) {
        auto& tc = streaming_msg.tool_calls[0];
        test_assert(tc.name == "WebFetch", "Streaming: Correct function name in incremental");
        test_assert(tc.arguments == R"({"url": "https://google.de"})", "Streaming: Correct arguments in incremental");
    }

    // Test 3: Differential streaming (reproduces the issue scenario)
    ik_chat_msg empty_msg;
    empty_msg.role = "assistant";

    ik_chat_msg complete_msg = parse_chat_message_incremental(tool_call_content, false);

    // This simulates what should happen in streaming but currently fails
    std::vector<ik_chat_msg_diff> diffs = ik_chat_msg_diff::compute_diffs(empty_msg, complete_msg);

    test_assert(!diffs.empty(), "Streaming: Diffs generated for tool calls");

    // Test 4: Demonstrate the issue - streaming chunks generation
    std::vector<json> streaming_chunks = generate_streaming_chunks(diffs, "test-completion-id", "test-model");

    bool has_tool_call_delta = false;
    bool has_content_delta = false;

    for (const auto& chunk : streaming_chunks) {
        if (chunk.contains("choices") && chunk["choices"].is_array() && !chunk["choices"].empty()) {
            auto& choice = chunk["choices"][0];
            if (choice.contains("delta")) {
                auto& delta = choice["delta"];
                if (delta.contains("tool_calls")) {
                    has_tool_call_delta = true;
                }
                if (delta.contains("content")) {
                    has_content_delta = true;
                }
            }
        }
    }

    test_assert(has_tool_call_delta, "Streaming: Tool call delta generated (expected behavior)");

    // This assertion documents the current issue - if it fails, it means the bug is fixed!
    if (has_content_delta && !has_tool_call_delta) {
        std::cout << "⚠️  WARNING: Streaming is returning tool calls as content instead of tool_calls array!" << std::endl;
        std::cout << "   This is the exact issue found in production testing." << std::endl;
        std::cout << "   Non-streaming works correctly, but streaming falls back to content." << std::endl;
    }

    std::cout << "📊 Consistency Test Results:" << std::endl;
    std::cout << "   • Non-streaming: ✅ Returns proper tool_calls array" << std::endl;
    std::cout << "   • Streaming parsing: ✅ Detects tool calls correctly" << std::endl;
    std::cout << "   • Differential streaming: " << (has_tool_call_delta ? "✅" : "❌") << " Tool call deltas" << std::endl;

    // Test 5: Document the exact production scenario
    std::cout << "\n🎯 Production Issue Reproduction:" << std::endl;
    std::cout << "   Input: " << tool_call_content << std::endl;
    std::cout << "   Expected streaming: {\"delta\": {\"tool_calls\": [...]}}" << std::endl;
    std::cout << "   Actual streaming: {\"delta\": {\"content\": \"functions.WebFetch:1...\"}}" << std::endl;
    std::cout << "   Root cause: format_partial_response_oaicompat() falls back to content streaming" << std::endl;
}

// Test for server integration - this would have caught the missing includes
void test_server_integration_requirements() {
    std::cout << "\n🔌 Testing Server Integration Requirements:" << std::endl;

    // Test 1: Verify required functions are available (compile-time check)
    const std::string test_content = R"(functions.WebFetch:1{"url": "https://google.de"})";

    // These calls should compile without errors - if server.cpp is missing includes,
    // this test would catch it during integration testing
    try {
        // Test incremental parsing availability
        ik_chat_msg msg = parse_chat_message_incremental(test_content, false);
        test_assert(true, "Integration: parse_chat_message_incremental available");

        // Test diff computation availability
        ik_chat_msg empty_msg;
        std::vector<ik_chat_msg_diff> diffs = ik_chat_msg_diff::compute_diffs(empty_msg, msg);
        test_assert(true, "Integration: ik_chat_msg_diff::compute_diffs available");

        // Test that we can generate tool call IDs (this would fail if function missing)
        if (!msg.tool_calls.empty()) {
            std::vector<std::string> tool_call_ids;
            auto generate_id = []() -> std::string { return "test_id"; };
            msg.ensure_tool_call_ids_set(tool_call_ids, generate_id);
            test_assert(true, "Integration: Tool call ID generation works");
        }

        // Test streaming chunk generation (this should be available)
        if (!diffs.empty()) {
            // This would fail in server if generate_streaming_chunks wasn't implemented
            std::cout << "   • Streaming chunk generation components available" << std::endl;
        }

    } catch (const std::exception& e) {
        std::cout << "❌ Integration test failed: " << e.what() << std::endl;
        test_assert(false, "Integration: Server functions not properly integrated");
    }

    // Test 2: Validate end-to-end tool call flow simulation
    std::cout << "   • Testing end-to-end tool call simulation:" << std::endl;

    // Simulate what server should do:
    // 1. Parse tool calls from content
    json parsed_calls = parse_kimi_k2_tool_calls(test_content);
    test_assert(!parsed_calls.empty(), "Integration: Tool calls parsed successfully");

    // 2. Convert to streaming message format
    ik_chat_msg server_msg = parse_chat_message_incremental(test_content, false);
    test_assert(!server_msg.tool_calls.empty(), "Integration: Converted to streaming format");

    // 3. Generate diffs (what server streaming should do)
    ik_chat_msg prev_msg;
    std::vector<ik_chat_msg_diff> server_diffs = ik_chat_msg_diff::compute_diffs(prev_msg, server_msg);
    test_assert(!server_diffs.empty(), "Integration: Server diffs generated");

    // Test 3: Validate that the expected server response format is achievable
    bool has_tool_calls_in_diffs = false;
    for (const auto& diff : server_diffs) {
        if (diff.tool_call_index != std::string::npos) {
            has_tool_calls_in_diffs = true;
            break;
        }
    }
    test_assert(has_tool_calls_in_diffs, "Integration: Tool calls present in streaming diffs");

    std::cout << "✅ Server integration requirements validated" << std::endl;
    std::cout << "   This test would have caught missing includes/functions in server.cpp" << std::endl;
}

// Test that validates compilation dependencies
void test_compilation_dependencies() {
    std::cout << "\n📦 Testing Compilation Dependencies:" << std::endl;

    // This test documents what server.cpp needs to include
    std::cout << "   • Required includes for server.cpp:" << std::endl;
    std::cout << "     - #include \"function_calls.hpp\"" << std::endl;
    std::cout << "     - #include \"streaming_chat.hpp\"" << std::endl;

    std::cout << "   • Required functions for server.cpp:" << std::endl;
    std::cout << "     - generate_tool_call_id()" << std::endl;
    std::cout << "     - generate_streaming_chunks()" << std::endl;

    // Test that core functions are available in this compilation unit
    const std::string test_input = "functions.test:0{\"param\":\"value\"}";