Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions mteb/tasks/zeroshot_classification/eng/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from .rendered_sst2 import RenderedSST2
from .resisc45 import RESISC45ZeroShotClassification
from .sci_mmir import SciMMIR
from .something_something_v2_classification import (
SomethingSomethingV2ZeroShotClassification,
)
from .speech_commands import (
SpeechCommandsZeroshotClassificationV01,
SpeechCommandsZeroshotClassificationv02,
Expand Down Expand Up @@ -54,6 +57,7 @@
"STL10ZeroShotClassification",
"SUN397ZeroShotClassification",
"SciMMIR",
"SomethingSomethingV2ZeroShotClassification",
"SpeechCommandsZeroshotClassificationV01",
"SpeechCommandsZeroshotClassificationv02",
"StanfordCarsZeroShotClassification",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from __future__ import annotations

from mteb.abstasks.task_metadata import TaskMetadata
from mteb.abstasks.zeroshot_classification import AbsTaskZeroShotClassification


class SomethingSomethingV2ZeroShotClassification(AbsTaskZeroShotClassification):
metadata = TaskMetadata(
name="SomethingSomethingV2ZeroShotClassification",
description="Something-Something V2 contains 220,847 short video clips of humans performing pre-defined basic actions with everyday objects. This subset of 5,444 clips is used for action classification into 174 fine-grained categories.",
reference="https://developer.qualcomm.com/software/ai-datasets/something-something",
dataset={
"path": "mteb/SomethingSomethingV2",
"revision": "13bbc49a06df3ffe41f3823cf429e2d8d685689f",
},
type="VideoZeroshotClassification",
category="v2t",
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=(
"2017-06-01",
"2017-12-31",
),
domains=["Scene"],
task_subtypes=["Activity recognition"],
license="not specified",
annotations_creators="human-annotated",
dialect=[],
modalities=["video", "text"],
sample_creation="found",
is_beta=True,
bibtex_citation=r"""
@inproceedings{goyal2017something,
author = {Goyal, Raghav and Ebrahimi Kahou, Samira and Michalski, Vincent and Materzy{\'n}ska, Joanna and Westphal, Susanne and Kim, Heuna and Haenel, Valentin and Fruend, Ingo and Yianilos, Peter and Mueller-Freitag, Moritz and Hoppe, Florian and Thurau, Christian and Bax, Ingo and Memisevic, Roland},
booktitle = {2017 IEEE International Conference on Computer Vision (ICCV)},
doi = {10.1109/ICCV.2017.622},
pages = {5843-5851},
title = {The "Something Something" Video Database for Learning and Evaluating Visual Common Sense},
year = {2017},
}
""",
)

input_column_name = "video"
label_column_name: str = "label"

is_cross_validation: bool = True

def get_candidate_labels(self) -> list[str]:
return [
f"a video of {name}"
for name in self.dataset["test"].features[self.label_column_name].names
]

def dataset_transform(self, num_proc=None):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you push this using this command:
https://embeddings-benchmark.github.io/mteb/contributing/adding_a_dataset/#pushing-the-dataset-to-the-hub

then we avoid downloading the whole thing during eval

self.dataset = self.stratified_subsampling(
self.dataset,
seed=self.seed,
splits=["test"],
label=self.label_column_name,
n_samples=2048,
)
Loading