diff --git a/vlmeval/dataset/image_vqa.py b/vlmeval/dataset/image_vqa.py index bec6af2a0..66a1a1705 100644 --- a/vlmeval/dataset/image_vqa.py +++ b/vlmeval/dataset/image_vqa.py @@ -4048,7 +4048,7 @@ class MathCanvas(ImageBaseDataset): "https://huggingface.co/datasets/shiwk24/MathCanvas-Bench/resolve/main/MathCanvas_Bench_VLMEvalKit.tsv" } DATASET_MD5 = { - "MathCanvas-Bench": "9fd0b783ca416dbb20ecfb04d2711411" + "MathCanvas-Bench": "827dd1b1ce9c17d2b8338af6a13b6791" } HINT = ( diff --git a/vlmeval/dataset/utils/mathcanvas.py b/vlmeval/dataset/utils/mathcanvas.py index 93cabd254..b0ed8fa3c 100644 --- a/vlmeval/dataset/utils/mathcanvas.py +++ b/vlmeval/dataset/utils/mathcanvas.py @@ -233,15 +233,14 @@ def summarize_mathcanvas_results(all_results: List[Dict]): stats['overall']['completely_correct_count'] += 1 # 2. Image Presence Stats - q_images = [p for p in item.get("question_interleave", []) if p['type'] == 'image'] + q_images = item.get("image", []) image_presence_key = "Has Image" if len(q_images) > 0 else "No Image" _update_stats(stats['by_question_image_count'], image_presence_key, score) # 3. Knowledge Stats # Use the primary knowledge area for categorization. - knowledges = item.get("knowledges", []) - knowledge_key = knowledges[0] if knowledges else "Unknown" - _update_stats(stats['by_knowledge'], knowledge_key, score) + category = item.get("category", "Unknown") + _update_stats(stats['by_knowledge'], category, score) # --- Final Report Generation --- def calculate_accuracy(data_dict):