@@ -49,14 +49,16 @@ class OutputParser(metaclass=ABCMeta):
4949 def __init__ (self , ** kwargs ):
5050 self .input_iedb_files = kwargs ['input_iedb_files' ]
5151 self .input_tsv_file = kwargs ['input_tsv_file' ]
52- self .key_file = kwargs ['key_file ' ]
52+ self .key_files = kwargs ['key_files ' ]
5353 self .output_file = kwargs ['output_file' ]
5454 self .sample_name = kwargs ['sample_name' ]
5555 self .add_sample_name = kwargs .get ('add_sample_name_column' )
5656 self .flurry_state = kwargs .get ('flurry_state' )
5757 self .use_normalized_percentiles = kwargs .get ('use_normalized_percentiles' , False )
5858 self .reference_scores_path = kwargs .get ('reference_scores_path' , None )
5959 self .reference_scores = {}
60+ self .pipeline_type = kwargs .get ('pipeline_type' , None )
61+ self .input_file_type = kwargs .get ('input_file_type' )
6062
6163 def parse_input_tsv_file (self ):
6264 with open (self .input_tsv_file , 'r' ) as reader :
@@ -1090,12 +1092,16 @@ def execute(self):
10901092class DefaultOutputParser (OutputParser ):
10911093
10921094 def parse_iedb_file (self , tsv_entries ):
1093- with open (self .key_file , 'r' ) as key_file_reader :
1094- protein_identifiers_from_label = yaml .load (key_file_reader , Loader = yaml .FullLoader )
1095+ protein_identifiers_from_label = {}
1096+ for key_file in self .key_files :
1097+ with open (key_file , 'r' ) as key_file_reader :
1098+ chunk = key_file .rsplit ('.' , 2 )[1 ].split ('_' )[1 ]
1099+ protein_identifiers_from_label [chunk ] = yaml .load (key_file_reader , Loader = yaml .FullLoader )
10951100 iedb_results = {}
10961101 wt_iedb_results = {}
10971102 for input_iedb_file in self .input_iedb_files :
10981103 with open (input_iedb_file , 'r' ) as reader :
1104+ chunk = input_iedb_file .rsplit ('_' , 1 )[1 ]
10991105 iedb_tsv_reader = csv .DictReader (reader , delimiter = '\t ' )
11001106 filename = os .path .basename (input_iedb_file )
11011107
@@ -1118,8 +1124,8 @@ def parse_iedb_file(self, tsv_entries):
11181124 allele = line ['allele' ]
11191125 peptide_length = len (epitope )
11201126
1121- if protein_identifiers_from_label [protein_label ] is not None :
1122- protein_identifiers = protein_identifiers_from_label [protein_label ]
1127+ if protein_identifiers_from_label [chunk ][ protein_label ] is not None :
1128+ protein_identifiers = protein_identifiers_from_label [chunk ][ protein_label ]
11231129
11241130 for protein_identifier in protein_identifiers :
11251131 (protein_type , tsv_index ) = protein_identifier .split ('.' , 1 )
@@ -1152,11 +1158,21 @@ def parse_iedb_file(self, tsv_entries):
11521158
11531159class UnmatchedSequencesOutputParser (OutputParser ):
11541160 def parse_iedb_file (self ):
1155- with open (self .key_file , 'r' ) as key_file_reader :
1156- tsv_indices_from_label = yaml .load (key_file_reader , Loader = yaml .FullLoader )
1161+ protein_identifiers_from_label = {}
1162+ for key_file in self .key_files :
1163+ with open (key_file , 'r' ) as key_file_reader :
1164+ if self .input_file_type == 'pvacvector_input_fasta' :
1165+ chunk = 1
1166+ else :
1167+ chunk = key_file .rsplit ('.' , 2 )[1 ].split ('_' )[1 ]
1168+ protein_identifiers_from_label [chunk ] = yaml .load (key_file_reader , Loader = yaml .FullLoader )
11571169 iedb_results = {}
11581170 for input_iedb_file in self .input_iedb_files :
11591171 with open (input_iedb_file , 'r' ) as reader :
1172+ if self .input_file_type == 'pvacvector_input_fasta' :
1173+ chunk = 1
1174+ else :
1175+ chunk = input_iedb_file .rsplit ('_' , 1 )[1 ]
11601176 iedb_tsv_reader = csv .DictReader (reader , delimiter = '\t ' )
11611177 filename = os .path .basename (input_iedb_file )
11621178
@@ -1179,8 +1195,8 @@ def parse_iedb_file(self):
11791195 allele = line ['allele' ]
11801196 peptide_length = len (epitope )
11811197
1182- if tsv_indices_from_label [protein_label ] is not None :
1183- tsv_indices = tsv_indices_from_label [protein_label ]
1198+ if protein_identifiers_from_label [ chunk ] [protein_label ] is not None :
1199+ tsv_indices = protein_identifiers_from_label [ chunk ] [protein_label ]
11841200
11851201 for index in tsv_indices :
11861202 key = '|' .join ([index , position ])
@@ -1353,13 +1369,17 @@ def execute(self):
13531369class PvacspliceOutputParser (UnmatchedSequencesOutputParser ):
13541370 def parse_iedb_file (self ):
13551371 # input key file
1356- with open (self .key_file , 'r' ) as key_file_reader :
1357- protein_identifiers_from_label = yaml .load (key_file_reader , Loader = yaml .FullLoader )
1372+ protein_identifiers_from_label = {}
1373+ for key_file in self .key_files :
1374+ with open (key_file , 'r' ) as key_file_reader :
1375+ chunk = key_file .rsplit ('.' , 2 )[1 ].split ('_' )[1 ]
1376+ protein_identifiers_from_label [chunk ] = yaml .load (key_file_reader , Loader = yaml .FullLoader )
13581377 # final output
13591378 iedb_results = {}
13601379 for input_iedb_file in self .input_iedb_files :
13611380 # input iedb file
13621381 with open (input_iedb_file , 'r' ) as reader :
1382+ chunk = input_iedb_file .rsplit ('_' , 1 )[1 ]
13631383 iedb_tsv_reader = csv .DictReader (reader , delimiter = '\t ' )
13641384 filename = os .path .basename (input_iedb_file )
13651385 pattern = re .compile (rf"{ re .escape (self .sample_name )} \.(\w+(?:-\d+\.\d+)?)" )
@@ -1376,9 +1396,9 @@ def parse_iedb_file(self):
13761396 peptide_length = len (epitope )
13771397 scores = self .get_scores (line , method )
13781398 # get fasta_id/combined_name from fasta key file
1379- if protein_identifiers_from_label [fasta_label ] is not None :
1399+ if protein_identifiers_from_label [chunk ][ fasta_label ] is not None :
13801400 # comma-separated string (1 or more ids) as 1 entry in list
1381- protein_label = protein_identifiers_from_label [fasta_label ][0 ]
1401+ protein_label = protein_identifiers_from_label [chunk ][ fasta_label ][0 ]
13821402 # one index at a time
13831403 for key in protein_label .split (',' ):
13841404
0 commit comments