-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscores_metadata.json
More file actions
115 lines (115 loc) · 3.82 KB
/
Copy pathscores_metadata.json
File metadata and controls
115 lines (115 loc) · 3.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
"benchmark": {
"name": "Voices for Christ Benchmark",
"submission_process": "Submit a pull request that adds a new benchmark_data/<mode> directory with one output file per original filename.",
"task": "speech restoration and denoising",
"version": "0.3.0"
},
"dataset": {
"clip_count": 100,
"clip_duration_seconds": 30,
"distribution_status": "public",
"license_note": "Current working assumption: the source Voices for Christ recordings are pre-1990 and open-domain/public-domain compatible. This should be verified before wider redistribution or downstream commercial reuse.",
"license_status": "assumed_public_domain_unverified",
"name": "Voices for Christ archive excerpts",
"source_format": "mp3",
"splits": [
"public_test"
],
"storage": "git-lfs",
"total_duration_seconds": 3000,
"version": "0.1.0"
},
"results": {
"mode_summaries": {
"adobe_podcast": {
"mean": 3.8695788383483887,
"median": 3.941006660461426,
"n": 100,
"std": 0.5502485632896423
},
"diffio_3_5": {
"mean": 4.257707118988037,
"median": 4.33013391494751,
"n": 100,
"std": 0.41899362206459045
},
"original": {
"mean": 2.147495746612549,
"median": 2.142691135406494,
"n": 100,
"std": 0.4837152063846588
}
}
},
"run": {
"argv": [
"score_scoreq.py"
],
"file_count": 100,
"mode_names": [
"original",
"adobe_podcast",
"diffio_3_5"
],
"package_versions": {
"numpy": "2.2.6",
"onnxruntime-gpu": "1.23.2",
"soundfile": "0.13.1",
"torch": "2.8.0",
"torchaudio": "2.8.0",
"tqdm": "4.67.3"
},
"platform": "Linux-6.5.0-15-generic-x86_64-with-glibc2.35",
"python_version": "3.10.12",
"timestamp_utc": "2026-03-17T22:50:55.306102Z"
},
"scoring": {
"data_domain": "natural",
"mode": "nr",
"model_filename": "adapt_nr_telephone.onnx",
"model_url": "https://zenodo.org/records/15739280/files/adapt_nr_telephone.onnx",
"onnx_execution_providers": [
"CUDAExecutionProvider",
"CPUExecutionProvider"
],
"primary_metric": "SCOREQ",
"resolved_model_filename": "adapt_nr_telephone.onnx",
"resolved_model_path": "/home/nharmon/.cache/scoreq/onnx-models/adapt_nr_telephone.onnx",
"resolved_model_url": "https://zenodo.org/records/15739280/files/adapt_nr_telephone.onnx",
"secondary_metric": "WER",
"secondary_metric_rationale": "Use a stronger decode to freeze proxy transcripts for the noisy originals once, then use a weaker decode on restored outputs so ASR WER remains sensitive to restoration gains in a no-reference benchmark.",
"secondary_metric_reference_asr_model": "faster-whisper large-v3",
"secondary_metric_reference_decode": {
"beam_size": 15,
"best_of": 15,
"compute_type": "float16",
"condition_on_previous_text": true,
"device": "cuda:0",
"patience": 2.0
},
"secondary_metric_reference_policy": "Use the frozen transcript in reference_transcripts.csv for each benchmark_data/original/<filename> as the reference transcript for that filename.",
"secondary_metric_submission_asr_model": "faster-whisper small.en",
"secondary_metric_submission_decode": {
"beam_size": 1,
"best_of": 1,
"compute_type": "float16",
"condition_on_previous_text": false,
"device": "cuda:1",
"patience": 1.0
},
"tertiary_metric": "DNSMOS_P835",
"tertiary_metric_config": {
"mono": true,
"outputs": [
"p808_mos",
"sig",
"bak",
"ovr"
],
"personalized": false,
"sample_rate": 16000
},
"tertiary_metric_local_implementation": "torchmetrics.functional.audio.dnsmos.deep_noise_suppression_mean_opinion_score"
}
}