Skip to content

Commit f85e7cd

Browse files
[benchmark] evaluate the detectors on the AutoShot dataset (#486)
benchmark detectors on the AutoShot dataset Co-authored-by: Brandon Castellano <Breakthrough@users.noreply.github.com>
1 parent 99b116e commit f85e7cd

File tree

5 files changed

+86
-9
lines changed

5 files changed

+86
-9
lines changed

benchmarks/README.md

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,14 @@ unzip BBC/videos.zip -d BBC
1616
rm -rf BBC/videos.zip
1717
```
1818

19-
### Evaluation
19+
### AutoShot
20+
Download `AutoShot_test.tar.gz` from [Google drive](https://drive.google.com/file/d/17diRkLlNUUjHDooXdqFUTXYje2-x4Yt6/view?usp=sharing).
21+
```
22+
tar -zxvf AutoShot.tar.gz
23+
rm AutoShot.tar.gz
24+
```
25+
26+
## Evaluation
2027
To evaluate PySceneDetect on a dataset, run the following command:
2128
```
2229
python benchmark.py -d <dataset_name> --detector <detector_name>
@@ -28,7 +35,8 @@ python evaluate.py -d BBC --detector detect-content
2835

2936
### Result
3037
The performance is computed as recall, precision, f1, and elapsed time.
31-
The following results indicate that ContentDetector achieves the highest performance on the BBC dataset.
38+
39+
#### BBC
3240

3341
| Detector | Recall | Precision | F1 | Elapsed time (second) |
3442
|:-----------------:|:------:|:---------:|:-----:|:---------------------:|
@@ -38,6 +46,16 @@ The following results indicate that ContentDetector achieves the highest perform
3846
| HistogramDetector | 90.55 | 72.76 | 80.68 | 16.13 |
3947
| ThresholdDetector | 0.00 | 0.00 | 0.00 | 18.95 |
4048

49+
#### AutoShot
50+
51+
| Detector | Recall | Precision | F1 | Elapsed time (second) |
52+
|:-----------------:|:------:|:---------:|:-----:|:---------------------:|
53+
| AdaptiveDetector | 70.77 | 77.65 | 74.05 | 1.23 |
54+
| ContentDetector | 63.67 | 76.40 | 69.46 | 1.21 |
55+
| HashDetector | 56.66 | 76.35 | 65.05 | 1.16 |
56+
| HistogramDetector | 63.36 | 53.34 | 57.92 | 1.23 |
57+
| ThresholdDetector | 0.75 | 38.64 | 1.47 | 1.24 |
58+
4159
## Citation
4260
### BBC
4361
```
@@ -47,4 +65,14 @@ The following results indicate that ContentDetector achieves the highest perform
4765
booktitle = {Proceedings of the 23rd ACM International Conference on Multimedia},
4866
year = {2015},
4967
}
50-
```
68+
```
69+
70+
### AutoShot
71+
```
72+
@InProceedings{autoshot_dataset,
73+
author = {Wentao Zhu and Yufang Huang and Xiufeng Xie and Wenxian Liu and Jincan Deng and Debing Zhang and Zhangyang Wang and Ji Liu},
74+
title = {AutoShot: A Short Video Dataset and State-of-the-Art Shot Boundary Detection},
75+
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
76+
year = {2023},
77+
}
78+
```

benchmarks/autoshot_dataset.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import glob
2+
import os
3+
4+
class AutoShotDataset:
5+
"""
6+
The AutoShot Dataset (test splits) proposed by Zhu et al. in AutoShot: A Short Video Dataset and State-of-the-Art Shot Boundary Detection
7+
Link: https://openaccess.thecvf.com/content/CVPR2023W/NAS/html/Zhu_AutoShot_A_Short_Video_Dataset_and_State-of-the-Art_Shot_Boundary_Detection_CVPRW_2023_paper.html
8+
The original test set consists of 200 videos, but 36 videos are missing (AutoShot/videos/<video_id>.mp4).
9+
The annotated scenes are provided in corresponding files (AutoShot/annotations/<video_id>.txt)
10+
"""
11+
12+
def __init__(self, dataset_dir: str):
13+
self._video_files = [
14+
file for file in sorted(glob.glob(os.path.join(dataset_dir, "videos", "*.mp4")))
15+
]
16+
self._scene_files = [
17+
file for file in sorted(glob.glob(os.path.join(dataset_dir, "annotations", "*.txt")))
18+
]
19+
for video_file, scene_file in zip(self._video_files, self._scene_files):
20+
video_id = os.path.basename(video_file).split(".")[0]
21+
scene_id = os.path.basename(scene_file).split(".")[0]
22+
assert video_id == scene_id
23+
24+
def __getitem__(self, index):
25+
video_file = self._video_files[index]
26+
scene_file = self._scene_files[index]
27+
return video_file, scene_file
28+
29+
def __len__(self):
30+
return len(self._video_files)

benchmarks/bbc_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def __init__(self, dataset_dir: str):
2020
assert len(self._video_files) == len(self._scene_files)
2121
for video_file, scene_file in zip(self._video_files, self._scene_files):
2222
video_id = os.path.basename(video_file).replace("bbc_", "").split(".")[0]
23-
scene_id = os.path.basename(scene_file).split("_")[0]
23+
scene_id = os.path.basename(scene_file).split("-")[0]
2424
assert video_id == scene_id
2525

2626
def __getitem__(self, index):

benchmarks/benchmark.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import time
33

44
from bbc_dataset import BBCDataset
5+
from autoshot_dataset import AutoShotDataset
6+
57
from evaluator import Evaluator
68
from tqdm import tqdm
79

@@ -15,7 +17,7 @@
1517
)
1618

1719

18-
def make_detector(detector_name: str):
20+
def _make_detector(detector_name: str):
1921
detector_map = {
2022
"detect-adaptive": AdaptiveDetector(),
2123
"detect-content": ContentDetector(),
@@ -26,11 +28,19 @@ def make_detector(detector_name: str):
2628
return detector_map[detector_name]
2729

2830

31+
def _make_dataset(dataset_name: str):
32+
dataset_map = {
33+
"BBC": BBCDataset("BBC"),
34+
"AutoShot": AutoShotDataset("AutoShot"),
35+
}
36+
return dataset_map[dataset_name]
37+
38+
2939
def _detect_scenes(detector_type: str, dataset):
3040
pred_scenes = {}
3141
for video_file, scene_file in tqdm(dataset):
3242
start = time.time()
33-
detector = make_detector(detector_type)
43+
detector = _make_detector(detector_type)
3444
pred_scene_list = detect(video_file, detector)
3545
elapsed = time.time() - start
3646
scenes = {
@@ -53,7 +63,7 @@ def _detect_scenes(detector_type: str, dataset):
5363

5464

5565
def main(args):
56-
pred_scenes = _detect_scenes(detector_type=args.detector, dataset=BBCDataset("BBC"))
66+
pred_scenes = _detect_scenes(detector_type=args.detector, dataset=_make_dataset(args.dataset))
5767
result = Evaluator().evaluate_performance(pred_scenes)
5868
print("Overall Results:")
5969
print(
@@ -65,6 +75,16 @@ def main(args):
6575

6676
if __name__ == "__main__":
6777
parser = argparse.ArgumentParser(description="Benchmarking PySceneDetect performance.")
78+
parser.add_argument(
79+
"--dataset",
80+
type=str,
81+
choices=[
82+
"BBC",
83+
"AutoShot",
84+
],
85+
default="BBC",
86+
help="Dataset name. Supported datasets are BBC and AutoShot.",
87+
)
6888
parser.add_argument(
6989
"--detector",
7090
type=str,

benchmarks/evaluator.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ def evaluate_performance(self, pred_scenes):
2424
total_pred += len(pred_list)
2525
total_gt += len(gt_scene_list)
2626

27-
assert total_pred, pred_scenes
2827
recall = total_correct / total_gt
29-
precision = total_correct / total_pred
28+
precision = total_correct / total_pred if total_pred != 0 else 0
3029
f1 = 2 * recall * precision / (recall + precision) if (recall + precision) != 0 else 0
3130
avg_elapsed = mean([x["elapsed"] for x in pred_scenes.values()])
3231
result = {

0 commit comments

Comments
 (0)