@@ -617,3 +617,89 @@ def tracked_variation(key, context, default):
617617 assert len (variation_calls ) == 1 , f"Expected 1 variation call, got { len (variation_calls )} "
618618 assert config is not None
619619 assert config .evaluation_metric_key == '$ld:ai:judge:from-flag'
620+
621+
622+ class TestJudgeTemplateInjection :
623+ """Regression tests for template injection vulnerability.
624+
625+ These tests verify that the judge's message interpolation uses simple string
626+ replacement instead of Mustache templating. Attacker-controlled values from
627+ pass 1 (e.g. Mustache delimiter-change tags) must be treated as inert literal
628+ text by pass 2.
629+ """
630+
631+ def _make_judge (self , content : str , tracker , mock_runner ) -> Judge :
632+ """Helper to create a Judge with a single message containing the given content."""
633+ config = AIJudgeConfig (
634+ key = 'test-judge' ,
635+ enabled = True ,
636+ evaluation_metric_key = 'metric' ,
637+ messages = [LDMessage (role = 'user' , content = content )],
638+ model = ModelConfig ('gpt-4' ),
639+ provider = ProviderConfig ('openai' ),
640+ )
641+ return Judge (config , tracker , mock_runner )
642+
643+ @pytest .mark .parametrize ('name,payload' , [
644+ ('delimiter change brackets' , '{{=[ ]=}}' ),
645+ ('delimiter change angle' , '{{=<% %>=}}' ),
646+ ('partial' , '{{> evil}}' ),
647+ ('comment' , '{{! drop everything }}' ),
648+ ('triple stache' , '{{{raw}}}' ),
649+ ('section' , '{{#section}}inject{{/section}}' ),
650+ ('inverted section' , '{{^section}}inject{{/section}}' ),
651+ ])
652+ def test_injection_variants_in_message_history (
653+ self , name : str , payload : str , tracker : LDAIConfigTracker , mock_runner
654+ ):
655+ """Mustache control sequences injected via context must not blind the judge."""
656+ after_pass1 = f'Auditing { payload } : ' + '{{message_history}}'
657+
658+ judge = self ._make_judge (after_pass1 , tracker , mock_runner )
659+ messages = judge ._construct_evaluation_messages ('ACTUAL HISTORY' , 'some output' )
660+
661+ assert len (messages ) == 1
662+ assert 'ACTUAL HISTORY' in messages [0 ].content , \
663+ f'payload { payload !r} must not blind the judge to the actual history'
664+ assert '{{message_history}}' not in messages [0 ].content , \
665+ f'placeholder must be fully substituted after payload { payload !r} '
666+
667+ def test_injection_via_response (self , tracker : LDAIConfigTracker , mock_runner ):
668+ """Injection payloads in the response being evaluated are equally neutralized."""
669+ after_pass1 = 'History: {{message_history}}\n Response: {{response_to_evaluate}}'
670+
671+ judge = self ._make_judge (after_pass1 , tracker , mock_runner )
672+ malicious_response = '{{=[ ]=}} INJECTION ATTEMPT'
673+ messages = judge ._construct_evaluation_messages ('normal history' , malicious_response )
674+
675+ assert len (messages ) == 1
676+ assert malicious_response in messages [0 ].content , \
677+ 'malicious content in response must appear verbatim'
678+ assert '{{response_to_evaluate}}' not in messages [0 ].content , \
679+ 'response placeholder must be fully substituted'
680+
681+ def test_multiple_placeholder_occurrences (self , tracker : LDAIConfigTracker , mock_runner ):
682+ """When a template contains the same placeholder more than once, every occurrence is substituted."""
683+ template = '{{message_history}} | {{message_history}}'
684+
685+ judge = self ._make_judge (template , tracker , mock_runner )
686+ messages = judge ._construct_evaluation_messages ('HISTORY' , 'RESPONSE' )
687+
688+ assert len (messages ) == 1
689+ assert messages [0 ].content == 'HISTORY | HISTORY'
690+
691+ def test_mustache_syntax_in_content (self , tracker : LDAIConfigTracker , mock_runner ):
692+ """Mustache-like syntax inside history or response values is preserved verbatim."""
693+ template = 'History: {{message_history}}\n Response: {{response_to_evaluate}}'
694+
695+ judge = self ._make_judge (template , tracker , mock_runner )
696+ history_with_mustache = 'How do I use {{user}} in Mustache?'
697+ response_with_mustache = 'Use {{user}} like this: {{#user}}Hello{{/user}}'
698+
699+ messages = judge ._construct_evaluation_messages (history_with_mustache , response_with_mustache )
700+
701+ assert len (messages ) == 1
702+ assert history_with_mustache in messages [0 ].content , \
703+ 'Mustache-like syntax in history must be preserved verbatim'
704+ assert response_with_mustache in messages [0 ].content , \
705+ 'Mustache-like syntax in response must be preserved verbatim'
0 commit comments