google · tcconnally · Jun 15, 2026 · Jun 16, 2026 · Jun 17, 2026 · Jun 25, 2026
diff --git a/src/google/adk/evaluation/final_response_match_v1.py b/src/google/adk/evaluation/final_response_match_v1.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import re
 from typing import Optional
 
 from google.genai import types as genai_types
@@ -92,6 +93,34 @@ def _get_eval_status(score: float, threshold: float):
   return EvalStatus.PASSED if score >= threshold else EvalStatus.FAILED
 
 
+class _UnicodeTokenizer:
+  """Tokenizer that handles Unicode text with word-boundary awareness.
+
+  The default RougeScorer tokenizer strips characters outside ``[a-z0-9]``, so
+  text in scripts without Latin word boundaries (Chinese, Japanese, Thai, etc.)
+  produces zero tokens and scores 0.0 even on an exact match.
+
+  ASCII-majority text is delegated to rouge-score's ``DefaultTokenizer`` so the
+  existing behavior -- including Porter stemming -- is preserved exactly. For
+  non-ASCII text, Latin/digit runs are kept as words and each remaining word
+  character (e.g. a CJK ideograph) becomes its own token, so partial overlap is
+  scored instead of collapsing into a single opaque token.
+  """
+
+  def __init__(self, use_stemmer: bool = True):
+    self._default = rouge_scorer.tokenizers.DefaultTokenizer(use_stemmer)
+
+  def tokenize(self, text: str) -> list[str]:
+    """Tokenizes text using Unicode-aware word boundaries."""
+    text = text.lower()
+    if not text:
+      return []
+    ascii_chars = sum(1 for c in text if ord(c) < 128)
+    if ascii_chars > len(text) * 0.5:
+      return self._default.tokenize(text)
+    return re.findall(r"[a-z0-9]+|\w", text, re.UNICODE)
+
+
 def _calculate_rouge_1_scores(candidate: str, reference: str):
   """Calculates the ROUGE-1 score between a candidate and reference text.
 
@@ -110,7 +139,11 @@ def _calculate_rouge_1_scores(candidate: str, reference: str):
   Returns:
       A dictionary containing the ROUGE-1 precision, recall, and f-measure.
   """
-  scorer = rouge_scorer.RougeScorer(["rouge1"], use_stemmer=True)
+  scorer = rouge_scorer.RougeScorer(
+      ["rouge1"],
+      use_stemmer=True,
+      tokenizer=_UnicodeTokenizer(),
+  )
 
   # The score method returns a dictionary where keys are the ROUGE types
   # and values are Score objects (tuples) with precision, recall, and fmeasure.