@@ -161,19 +161,23 @@ def parse_input_vcf(
161161 # Retrieve the GT from the first sample in the record
162162 genotype = retrieve_genotype (record )
163163
164- elif "WP" in [x .split ('=' ) for x in record [7 ].split (';' )]:
164+ elif "WP" in [x .split ('=' )[ 0 ] for x in record [7 ].split (';' ) if '=' in x ]:
165165 """
166166 "WP" is the legacy code NEAT used for genotype it added. It was found in the INFO field.
167167 We're just going to make a sample column in this version of NEAT
168168 The logic of the statement is split the info field on ';' which is used as a divider in that field.
169169 Most but not all fields also have an '=', so split there too, then look for "WP"
170170 """
171171 format_column = f"GT:{ record [8 ]} "
172- for record in record [7 ].split (';' ):
173- if record .startswith ('WP' ):
174- genotype = record .split ('=' )[1 ].replace ('/' , '|' ).split ('|' )
172+ sample_field = record [9 ]
173+ for info_item in record [7 ].split (';' ):
174+ if info_item .startswith ('WP' ) and '=' in info_item :
175+ genotype = info_item .split ('=' )[1 ].replace ('/' , '|' ).split ('|' )
175176 genotype = np .array ([int (x ) for x in genotype ])
176- normal_sample_field = f"{ get_genotype_string (genotype )} :{ record [9 ]} "
177+ normal_sample_field = f"{ get_genotype_string (genotype )} :{ sample_field } "
178+ elif info_item .startswith ('WP' ):
179+ _LOG .error (f'Malformed WP field in INFO (missing value): { record [7 ]} ' )
180+ sys .exit (1 )
177181
178182 else :
179183 format_column = 'GT:' + record [8 ]
@@ -182,20 +186,22 @@ def parse_input_vcf(
182186 gt_field = get_genotype_string (genotype )
183187 normal_sample_field = f'{ gt_field } :{ record [9 ]} '
184188
185- elif "WP" in [x .split ('=' ) for x in record [7 ].split (';' )]:
189+ elif "WP" in [x .split ('=' )[ 0 ] for x in record [7 ].split (';' ) if '=' in x ]:
186190 """
187191 "WP" is the legacy code NEAT used for genotype it added. It was found in the INFO field.
188192 We're just going to make a sample column in this version of NEAT
189193 The logic of the statement is split the info field on ';' which is used as a divider in that field.
190194 Most but not all fields also have an '=', so split there too, then look for "WP"
191195 """
192196 format_column = "GT"
193- info_split = record [7 ].split (';' )
194- for record in info_split :
195- if record .startswith ('WP' ):
196- genotype = record .split ('=' )[1 ].replace ('/' , '|' ).split ('|' )
197+ for info_item in record [7 ].split (';' ):
198+ if info_item .startswith ('WP' ) and '=' in info_item :
199+ genotype = info_item .split ('=' )[1 ].replace ('/' , '|' ).split ('|' )
197200 genotype = np .array ([int (x ) for x in genotype ])
198201 normal_sample_field = get_genotype_string (genotype )
202+ elif info_item .startswith ('WP' ):
203+ _LOG .error (f'Malformed WP field in INFO (missing value): { record [7 ]} ' )
204+ sys .exit (1 )
199205
200206 else :
201207 # If there was no format column, there's no sample column, so we'll generate one
0 commit comments