Fix TypeError when using write_out with log_samples (#839)

Luodian · claude · web-flow · commit 8818d45c9aed · 2025-09-29T09:31:58.000+08:00
* Fix TypeError when using write_out with log_samples When both --write_out and --log_samples flags are used together, the print_writeout function can encounter instances where inst.doc is None, causing a TypeError when trying to access doc[doc_to_target]. This fix adds a check for None documents and provides a fallback message instead of crashing. Fixes the issue where the following error occurs: TypeError: 'NoneType' object is not subscriptable at lmms_eval/api/task.py, line 1347, in doc_to_target 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Reported-by: rixejzvdl649 Reported-by: pspdada Github-Issue: #143 * Add warnings for --write_out flag usage The --write_out flag is intended for debugging purposes only and can significantly impact performance during evaluations. This commit adds: 1. Runtime warning when --write_out is enabled 2. Updated help text to clearly indicate it's for debugging only 3. Documentation in print_writeout function about its debugging purpose 4. Suggestion to use --log_samples for production use These warnings help users understand that --write_out should not be used during actual evaluation runs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * Mark --write_out flag as deprecated The --write_out flag has limited use and overlaps with --log_samples functionality. This commit marks it as deprecated to guide users toward the better-maintained --log_samples feature. Changes: - Added DEPRECATION WARNING when --write_out is used - Updated help text to indicate deprecation - Added deprecation notices in function docstrings - Specified removal target as v0.5.0 - Clear guidance to use --log_samples instead The --write_out flag only prints first few documents to console and impacts performance, while --log_samples saves all outputs to files for comprehensive debugging without performance impact. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/lmms_eval/__main__.py b/lmms_eval/__main__.py
@@ -165,7 +165,9 @@ def parse_eval_args() -> argparse.Namespace:
         "-w",
         action="store_true",
         default=False,
-        help="Prints the prompt for the first few documents.",
+        help="DEPRECATED: This flag is deprecated and will be removed in a future version. "
+        "For debugging, use --log_samples to save all outputs to files. "
+        "This flag prints prompts for the first few documents to console, impacting performance.",
     )
     parser.add_argument(
         "--log_samples",
@@ -399,6 +401,14 @@ def cli_evaluate_single(args: Union[argparse.Namespace, None] = None) -> None:
 
     evaluation_tracker = EvaluationTracker(**evaluation_tracker_args)
 
+    if args.write_out:
+        eval_logger.warning(
+            "DEPRECATION WARNING: --write_out is deprecated and will be removed in v0.5.0. "
+            "For debugging and analysis, use --log_samples instead, which saves all model "
+            "outputs to files without impacting performance. The --write_out flag only prints "
+            "the first few documents to console and provides limited debugging value."
+        )
+
     if args.predict_only:
         args.log_samples = True
     if (args.log_samples or args.predict_only) and not args.output_path:
diff --git a/lmms_eval/evaluator.py b/lmms_eval/evaluator.py
@@ -459,6 +459,11 @@ def evaluate(
         )
         eval_logger.debug(f"Task: {task_output.task_name}; number of requests on this rank: {len(task._instances)}")
         if write_out:
+            eval_logger.warning(
+                "DEPRECATION WARNING: --write_out is deprecated and will be removed in v0.5.0. "
+                "Use --log_samples instead for saving model outputs and debugging. "
+                "The write_out flag only prints the first few documents and impacts performance."
+            )
             print_writeout(task)
         # aggregate Instances by LM method requested to get output.
         for instance in task.instances:
diff --git a/lmms_eval/evaluator_utils.py b/lmms_eval/evaluator_utils.py
@@ -177,12 +177,22 @@ def get_subtask_list(task_dict, task_root=None, depth=0):
 
 
 def print_writeout(task) -> None:
+    """Print first few documents for debugging purposes.
+
+    DEPRECATED: This function is deprecated and will be removed in v0.5.0.
+    Use log_samples functionality instead for better debugging capabilities.
+
+    WARNING: This function only prints the first few documents to console
+    and can significantly impact performance during evaluations.
+    """
     for inst in task.instances:
         # print the prompt for the first few documents
         if inst.doc_id < 1:
+            # Handle cases where inst.doc might be None (e.g., when using log_samples)
+            target = "N/A (document is None)" if inst.doc is None else task.doc_to_target(inst.doc)
             eval_logger.info(
                 f"Task: {task}; document {inst.doc_id}; context prompt (starting on next line):\
-    \n{inst.args[0]}\n(end of prompt on previous line)\ntarget string or answer choice index (starting on next line):\n{task.doc_to_target(inst.doc)}\n(end of target on previous line)"
+    \n{inst.args[0]}\n(end of prompt on previous line)\ntarget string or answer choice index (starting on next line):\n{target}\n(end of target on previous line)"
             )
             eval_logger.info(f"Request: {str(inst)}")