From 1cef8bf9283623e598699f2f69205e3100e3c785 Mon Sep 17 00:00:00 2001 From: deep9539 Date: Tue, 28 Apr 2026 00:06:00 -0700 Subject: [PATCH 1/6] Add AVMeme-Exam zero shot classification task --- .../zeroshot_classification/eng/__init__.py | 6 + .../eng/avmeme_exam_classification.py | 115 ++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py diff --git a/mteb/tasks/zeroshot_classification/eng/__init__.py b/mteb/tasks/zeroshot_classification/eng/__init__.py index 31d2f88591..a6d4403833 100644 --- a/mteb/tasks/zeroshot_classification/eng/__init__.py +++ b/mteb/tasks/zeroshot_classification/eng/__init__.py @@ -2,6 +2,10 @@ AVEDatasetVideoZeroShotClassification, AVEDatasetZeroShotClassification, ) +from .avmeme_exam_classification import ( + AVMemeAudioVideoZeroShotClassification, + AVMemeVideoZeroShotClassification, +) from .birdsnap import BirdsnapZeroShotClassification from .breakfast_classification import BreakfastZeroShotClassification from .caltech101 import Caltech101ZeroShotClassification @@ -56,6 +60,8 @@ "CLEVR", "AVEDatasetVideoZeroShotClassification", "AVEDatasetZeroShotClassification", + "AVMemeAudioVideoZeroShotClassification", + "AVMemeVideoZeroShotClassification", "BirdsnapZeroShotClassification", "BreakfastZeroShotClassification", "CIFAR10ZeroShotClassification", diff --git a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py new file mode 100644 index 0000000000..d16caf2121 --- /dev/null +++ b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +from mteb.abstasks.task_metadata import TaskMetadata +from mteb.abstasks.zeroshot_classification import AbsTaskZeroShotClassification + +CITATION = r""" +@inproceedings{jiang2026avmeme, + author = {Jiang, Xilin and Wang, Qiaolin and Wu, Junkai and He, Xiaomin and Xu, Zhongweiyang and Ma, Yinghao and Piao, Minshuo and Yang, Kaiyi and Zheng, Xiuwen and Shimizu, Riki and others}, + journal = {arXiv preprint arXiv:2601.17645}, + title = {AVMeme Exam: A Multimodal Multilingual Multicultural Benchmark for LLMs' Contextual and Cultural Knowledge and Thinking}, + year = {2026}, +} +""" + + +class AVMemeAudioVideoZeroShotClassification(AbsTaskZeroShotClassification): + metadata = TaskMetadata( + name="AVMemeAudioVideoZeroShot", + description="AVMeme Exam is a multimodal, multilingual, and multicultural benchmark of over 1,000 iconic Internet audio-visual memes spanning speech, songs, music, and sound effects. Each meme is paired with a Q&A assessing levels of understanding from surface content to context, emotion, usage, and world knowledge. This zero-shot classification task predicts the sound category of each meme clip.", + reference="https://arxiv.org/pdf/2601.17645", + dataset={ + "path": "mteb/AVMeme-Exam", + "revision": "7070d1979d9a4943dd49b2e72858eb1e54f6bd5b", + }, + type="VideoZeroshotClassification", + category="va2t", + eval_splits=["test"], + eval_langs=[ + "bos-Latn", + "bre-Latn", + "deu-Latn", + "eng-Latn", + "fas-Arab", + "fin-Latn", + "fra-Latn", + "hin-Deva", + "ita-Latn", + "jpn-Jpan", + "kor-Hang", + "por-Latn", + "rus-Cyrl", + "spa-Latn", + "tel-Telu", + "zho-Hans", + ], + main_score="accuracy", + date=("2026-01-25", "2026-01-25"), + domains=["Web", "Entertainment", "Music"], + task_subtypes=["Topic classification"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + modalities=["video", "audio"], + sample_creation="found", + bibtex_citation=CITATION, + is_beta=True, + ) + input_column_name = ("video", "audio") + label_column_name: str = "category" + + def get_candidate_labels(self) -> list[str]: + return [ + name for name in self.dataset["test"].features[self.label_column_name].names + ] + + +class AVMemeVideoZeroShotClassification(AbsTaskZeroShotClassification): + metadata = TaskMetadata( + name="AVMemeVideoZeroShot", + description="AVMeme Exam is a multimodal, multilingual, and multicultural benchmark of over 1,000 iconic Internet audio-visual memes spanning speech, songs, music, and sound effects. Each meme is paired with a Q&A assessing levels of understanding from surface content to context, emotion, usage, and world knowledge. This zero-shot classification task predicts the sound category of each meme clip.", + reference="https://arxiv.org/pdf/2601.17645", + dataset={ + "path": "mteb/AVMeme-Exam", + "revision": "7070d1979d9a4943dd49b2e72858eb1e54f6bd5b", + }, + type="VideoZeroshotClassification", + category="v2t", + eval_splits=["test"], + eval_langs=[ + "bos-Latn", + "bre-Latn", + "deu-Latn", + "eng-Latn", + "fas-Arab", + "fin-Latn", + "fra-Latn", + "hin-Deva", + "ita-Latn", + "jpn-Jpan", + "kor-Hang", + "por-Latn", + "rus-Cyrl", + "spa-Latn", + "tel-Telu", + "zho-Hans", + ], + main_score="accuracy", + date=("2026-01-25", "2026-01-25"), + domains=["Web", "Entertainment", "Music"], + task_subtypes=["Topic classification"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + modalities=["video"], + sample_creation="found", + bibtex_citation=CITATION, + is_beta=True, + ) + input_column_name = "video" + label_column_name: str = "category" + + def get_candidate_labels(self) -> list[str]: + return [ + name for name in self.dataset["test"].features[self.label_column_name].names + ] \ No newline at end of file From 0125e37fd8e4632d7ba945f339fb69b7ce5a3265 Mon Sep 17 00:00:00 2001 From: deep9539 Date: Wed, 29 Apr 2026 18:57:39 -0700 Subject: [PATCH 2/6] Add a video of emotion text in candidate_labels --- .../zeroshot_classification/eng/avmeme_exam_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py index d16caf2121..95c9339ec7 100644 --- a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py +++ b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py @@ -111,5 +111,5 @@ class AVMemeVideoZeroShotClassification(AbsTaskZeroShotClassification): def get_candidate_labels(self) -> list[str]: return [ - name for name in self.dataset["test"].features[self.label_column_name].names + f"a video of {name} emotion" for name in self.dataset["test"].features[self.label_column_name].names ] \ No newline at end of file From 9c239fd437b7c611dd42d9cdfb08c990b427f6b5 Mon Sep 17 00:00:00 2001 From: deep9539 Date: Wed, 29 Apr 2026 20:43:25 -0700 Subject: [PATCH 3/6] Remove emotion from the candidate label prefix for AVMEME --- .../zeroshot_classification/eng/avmeme_exam_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py index 95c9339ec7..57777fd8c9 100644 --- a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py +++ b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py @@ -60,7 +60,7 @@ class AVMemeAudioVideoZeroShotClassification(AbsTaskZeroShotClassification): def get_candidate_labels(self) -> list[str]: return [ - name for name in self.dataset["test"].features[self.label_column_name].names + f"a video of {name}" for name in self.dataset["test"].features[self.label_column_name].names ] @@ -111,5 +111,5 @@ class AVMemeVideoZeroShotClassification(AbsTaskZeroShotClassification): def get_candidate_labels(self) -> list[str]: return [ - f"a video of {name} emotion" for name in self.dataset["test"].features[self.label_column_name].names + f"a video of {name}" for name in self.dataset["test"].features[self.label_column_name].names ] \ No newline at end of file From eade654813a38eee0068bab705e771e3aefe250a Mon Sep 17 00:00:00 2001 From: deep9539 Date: Wed, 29 Apr 2026 20:52:53 -0700 Subject: [PATCH 4/6] Ran make lint --- .../eng/avmeme_exam_classification.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py index 57777fd8c9..b575d65b5a 100644 --- a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py +++ b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py @@ -60,7 +60,8 @@ class AVMemeAudioVideoZeroShotClassification(AbsTaskZeroShotClassification): def get_candidate_labels(self) -> list[str]: return [ - f"a video of {name}" for name in self.dataset["test"].features[self.label_column_name].names + f"a video of {name}" + for name in self.dataset["test"].features[self.label_column_name].names ] @@ -111,5 +112,6 @@ class AVMemeVideoZeroShotClassification(AbsTaskZeroShotClassification): def get_candidate_labels(self) -> list[str]: return [ - f"a video of {name}" for name in self.dataset["test"].features[self.label_column_name].names - ] \ No newline at end of file + f"a video of {name}" + for name in self.dataset["test"].features[self.label_column_name].names + ] From c4d133217e189e4ccc82f7ce621159d89e67a062 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Thu, 30 Apr 2026 13:54:42 +0300 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Roman Solomatin --- .../zeroshot_classification/eng/avmeme_exam_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py index b575d65b5a..cf52a8719d 100644 --- a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py +++ b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py @@ -50,7 +50,7 @@ class AVMemeAudioVideoZeroShotClassification(AbsTaskZeroShotClassification): license="cc-by-4.0", annotations_creators="human-annotated", dialect=[], - modalities=["video", "audio"], + modalities=["video", "audio", "text"], sample_creation="found", bibtex_citation=CITATION, is_beta=True, @@ -102,7 +102,7 @@ class AVMemeVideoZeroShotClassification(AbsTaskZeroShotClassification): license="cc-by-4.0", annotations_creators="human-annotated", dialect=[], - modalities=["video"], + modalities=["video", "text"], sample_creation="found", bibtex_citation=CITATION, is_beta=True, From 3c76882879240bcf2ef12eab53c0725cb6125ab0 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Thu, 30 Apr 2026 13:57:39 +0300 Subject: [PATCH 6/6] Apply suggestions from code review Co-authored-by: Roman Solomatin --- .../zeroshot_classification/eng/avmeme_exam_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py index cf52a8719d..987b2f00cc 100644 --- a/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py +++ b/mteb/tasks/zeroshot_classification/eng/avmeme_exam_classification.py @@ -56,7 +56,7 @@ class AVMemeAudioVideoZeroShotClassification(AbsTaskZeroShotClassification): is_beta=True, ) input_column_name = ("video", "audio") - label_column_name: str = "category" + label_column_name: str = "emotion" def get_candidate_labels(self) -> list[str]: return [ @@ -108,7 +108,7 @@ class AVMemeVideoZeroShotClassification(AbsTaskZeroShotClassification): is_beta=True, ) input_column_name = "video" - label_column_name: str = "category" + label_column_name: str = "emotion" def get_candidate_labels(self) -> list[str]: return [