Skip to content

Commit 9a0aee6

Browse files
committed
Added prepending of 'chr' based on ref seq
1 parent 38b6c08 commit 9a0aee6

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

selene_sdk/predict/_variant_effect_prediction.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ def read_vcf_file(input_path,
6262
"""
6363
variants = []
6464
na_rows = []
65+
check_chr = True
66+
for chrom in reference_sequence.get_chrs():
67+
if not chrom.startswith("chr"):
68+
check_chr = False
69+
break
6570
with open(input_path, 'r') as file_handle:
6671
lines = file_handle.readlines()
6772
index = 0
@@ -85,6 +90,8 @@ def read_vcf_file(input_path,
8590
chrom = str(cols[0])
8691
if 'CHR' == chrom[:3]:
8792
chrom = chrom.replace('CHR', 'chr')
93+
elif "chr" not in chrom and check_chr is True:
94+
chrom = "chr" + chrom
8895

8996
if chrom == "chrMT" and \
9097
chrom not in reference_sequence.get_chrs():

selene_sdk/predict/model_predict.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,11 @@ def _get_sequences_from_bed_file(self,
298298
sequences = []
299299
labels = []
300300
na_rows = []
301+
check_chr = True
302+
for chrom in reference_sequence.get_chrs():
303+
if not chrom.startswith("chr"):
304+
check_chr = False
305+
break
301306
with open(input_path, 'r') as read_handle:
302307
for i, line in enumerate(read_handle):
303308
cols = line.strip().split('\t')
@@ -310,6 +315,8 @@ def _get_sequences_from_bed_file(self,
310315
strand = '.'
311316
if isinstance(strand_index, int) and len(cols) > strand_index:
312317
strand = cols[strand_index]
318+
if 'chr' not in chrom and check_chr is True:
319+
chrom = "chr{0}".format(chrom)
313320
if not str.isdigit(start) or not str.isdigit(end) \
314321
or chrom not in self.reference_sequence.genome:
315322
na_rows.append(line)

0 commit comments

Comments
 (0)