-
Notifications
You must be signed in to change notification settings - Fork 80
Expand file tree
/
Copy pathqa_evaluation_config.yaml
More file actions
99 lines (91 loc) · 2.46 KB
/
qa_evaluation_config.yaml
File metadata and controls
99 lines (91 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
global_params:
working_dir: cache
graph_backend: networkx # graph database backend, support: kuzu, networkx
kv_backend: json_kv # key-value store backend, support: rocksdb, json_kv
nodes:
- id: read_files # id is unique in the pipeline, and can be referenced by other steps
op_name: read
type: source
dependencies: []
params:
input_path:
- examples/input_examples/jsonl_demo.jsonl # input file path, support json, jsonl, txt, pdf. See examples/input_examples for examples
- id: chunk_documents
op_name: chunk
type: map_batch
dependencies:
- read_files
execution_params:
replicas: 4
params:
chunk_size: 1024 # chunk size for text splitting
chunk_overlap: 100 # chunk overlap for text splitting
- id: build_kg
op_name: build_kg
type: map_batch
dependencies:
- chunk_documents
execution_params:
replicas: 1
batch_size: 128
- id: quiz
op_name: quiz
type: aggregate
dependencies:
- build_kg
execution_params:
replicas: 1
batch_size: 128
params:
quiz_samples: 2 # number of quiz samples to generate
concurrency_limit: 200
- id: judge
op_name: judge
type: map_batch
dependencies:
- quiz
execution_params:
replicas: 1
batch_size: 128
- id: partition
op_name: partition
type: aggregate
dependencies:
- judge
params:
method: ece # ece is a custom partition method based on comprehension loss
method_params:
max_units_per_community: 20 # max nodes and edges per community
min_units_per_community: 5 # min nodes and edges per community
max_tokens_per_community: 10240 # max tokens per community
unit_sampling: max_loss # unit sampling strategy, support: random, max_loss, min_loss
- id: generate
op_name: generate
type: map_batch
dependencies:
- partition
execution_params:
replicas: 1
batch_size: 128
save_output: true
params:
method: aggregated # atomic, aggregated, multi_hop, cot, vqa
data_format: ChatML # Alpaca, Sharegpt, ChatML
- id: evaluate
op_name: evaluate
type: map_batch
dependencies:
- generate
execution_params:
replicas: 1
batch_size: 128
save_output: true
params:
target: qa
metrics:
- length
- mtld
# - reward_score
# - uni_score
mtld_params:
threshold: 0.7