-
Notifications
You must be signed in to change notification settings - Fork 160
Expand file tree
/
Copy pathschema.rs
More file actions
50 lines (45 loc) · 1.78 KB
/
schema.rs
File metadata and controls
50 lines (45 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
use std::sync::Arc;
use arrow_schema::DataType;
use arrow_schema::DataType::*;
use arrow_schema::Field;
use arrow_schema::Schema;
use arrow_schema::SchemaRef;
use noodles_vcf::Header;
use crate::statpopgen::vcf_conversion::data_type_from_info;
pub fn list(x: DataType) -> DataType {
List(Arc::new(Field::new("item", x, true)))
}
pub fn schema_from_vcf_header(header: &Header) -> SchemaRef {
let info_fields = header.infos().iter().map(|(name, info)| {
let data_type = data_type_from_info(info);
Arc::new(Field::new(name, data_type, true))
});
Arc::from(Schema::new(
[
Arc::new(Field::new("CHROM", Utf8, true)),
Arc::new(Field::new("POS", UInt64, true)),
Arc::new(Field::new("ID", Utf8, true)),
Arc::new(Field::new("REF", Utf8, true)),
Arc::new(Field::new("ALT", list(Utf8), true)),
Arc::new(Field::new("QUAL", Float32, true)),
Arc::new(Field::new("FILTER", list(Utf8), true)),
]
.into_iter()
.chain(info_fields)
.chain([
// GT is NULL, 0, 1, or 2
Arc::new(Field::new("GT", list(UInt8), true)),
Arc::new(Field::new("GQ", list(Int32), true)),
Arc::new(Field::new("DP", list(Int32), true)),
Arc::new(Field::new("AD", list(list(Int32)), true)),
Arc::new(Field::new("MIN_DP", list(Int32), true)),
Arc::new(Field::new("PGT", list(Int32), true)),
Arc::new(Field::new("PID", list(Utf8), true)),
Arc::new(Field::new("PL", list(list(Int32)), true)),
Arc::new(Field::new("SB", list(list(Int32)), true)),
])
.collect::<Vec<_>>(),
))
}