-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_test_stats.py
More file actions
80 lines (65 loc) · 2.54 KB
/
check_test_stats.py
File metadata and controls
80 lines (65 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
テストデータの統計情報を確認するスクリプト
"""
import json
import pandas as pd
def check_test_data_stats():
# テストデータを読み込み
with open('data/processed/test_dataset/test_samples_300.json', 'r', encoding='utf-8') as f:
test_data = json.load(f)
print('=== テストデータ統計情報 ===')
print(f'総サンプル数: {len(test_data)}')
# 災害タイプ別
disaster_counts = {}
for sample in test_data:
dtype = sample['disaster_type']
disaster_counts[dtype] = disaster_counts.get(dtype, 0) + 1
print('\n災害タイプ別分布:')
for dtype, count in disaster_counts.items():
print(f' {dtype}: {count}')
# 難易度別
difficulty_counts = {}
for sample in test_data:
diff = sample['difficulty_level']
difficulty_counts[diff] = difficulty_counts.get(diff, 0) + 1
print('\n難易度別分布:')
for diff, count in difficulty_counts.items():
print(f' {diff}: {count}')
# 質問タイプ別
qtype_counts = {}
for sample in test_data:
qtype = sample['question_type']
qtype_counts[qtype] = qtype_counts.get(qtype, 0) + 1
print('\n質問タイプ別分布:')
for qtype, count in qtype_counts.items():
print(f' {qtype}: {count}')
# ユニークな質問数
unique_questions = set()
for sample in test_data:
unique_questions.add(sample['question'])
print(f'\nユニークな質問数: {len(unique_questions)}')
# サンプル質問の例
print('\n=== テスト質問の例 ===')
for i, sample in enumerate(test_data[:5]):
print(f'{i+1}. 災害タイプ: {sample["disaster_type"]}')
print(f' 質問: {sample["question"]}')
print(f' 難易度: {sample["difficulty_level"]}')
print()
# 各災害タイプからランダムに1つずつ表示
print('\n=== 各災害タイプの質問例 ===')
disaster_samples = {}
for sample in test_data:
dtype = sample['disaster_type']
if dtype not in disaster_samples:
disaster_samples[dtype] = sample
for dtype, sample in disaster_samples.items():
print(f'【{dtype.upper()}】')
print(f' 質問: {sample["question"]}')
print(f' コンテキスト: {sample["context"][:100]}...')
print(f' 回答: {sample["answer"][:60]}...')
print(f' 難易度: {sample["difficulty_level"]}')
print()
if __name__ == "__main__":
check_test_data_stats()