88import argparse , gzip , os , sys
99
1010# Constants
11- # Usage and help section
11+ # Usage and help section
1212_HELP = dedent ("""
1313@Usage:
1414 $ ./exon_annotation.py [-h] [--version] \\
1515 --input GTF_FILE \\
16- --output TSV_FILE
16+ --output TSV_FILE
1717
1818@About:
1919 Given an annotation in GTF format, this script
2828 • exon_seqname
2929 • exon_start
3030 • exon_end
31+ • exon_strand
3132 • exon_length
32-
33+
3334 If the 9th column of the GTF file does not
3435 contain the attributes listed above, they
3536 will resolve to "Unknown" in the output file.
4142 file as input.
4243 -o, --output TSV_FILE
4344 Output TSV file with parsed exon information.
44-
4545@Options:
4646 -h, --help Shows this help message and exits.
4747 -v, --version Prints the version and exits.
@@ -199,7 +199,7 @@ def parsed_feature(
199199 Default is 'exon'.
200200 @param parse <list[str]>:
201201 List of attributes to parse from the GTF file.
202- Default includes:
202+ Default includes:
203203 - gene_id, gene_name
204204 - transcript_id, transcript_name
205205 - exon_id, exon_number
@@ -208,19 +208,19 @@ def parsed_feature(
208208 The attribute key contains a dictionary of parsed
209209 attributes from the 9th column of the GTF file.
210210 """
211- # Handler for uncompressed files
211+ # Handler for uncompressed files
212212 open_func = open
213213 if gtf_file .endswith ('.gz' ):
214214 # Handler for gzip files
215215 open_func = gzip .open
216-
217- line_number = 0 # Used for error reporting
216+
217+ line_number = 0 # Used for error reporting
218218 with open_func (gtf_file , 'rt' ) as fh :
219219 for line in fh :
220220 line_number += 1
221221 if line .startswith ('#' ):
222222 # Skip comment lines
223- continue
223+ continue
224224 # Split the line into columns
225225 tokens = line .strip ().split ('\t ' )
226226 if len (tokens ) < 9 :
@@ -246,24 +246,24 @@ def parsed_feature(
246246if __name__ == '__main__' :
247247 # Parse command line arguments
248248 args = parse_cli_arguments ()
249-
249+
250250 # Sanity check for usage
251251 if len (sys .argv ) == 1 :
252252 # Nothing was provided
253253 fatal ('Invalid usage: {0} [-h] ...' .format (os .path .basename (sys .argv [0 ])))
254-
254+
255255 # Create output directory if
256256 # it does not exist
257257 output_dir = os .path .abspath (os .path .dirname (args .output ))
258258 if not os .path .exists (output_dir ):
259259 try : os .makedirs (output_dir )
260- except OSError as e :
260+ except OSError as e :
261261 fatal (
262262 "Fatal error: Failed to create output directory: {0}\n {1}" .format (
263263 output_dir , e
264264 )
265265 )
266-
266+
267267 # Attributes (key, value pairs) to parse
268268 # from the 9th column of the GTF file
269269 PARSE_ATTRS = [
@@ -272,7 +272,7 @@ def parsed_feature(
272272 "exon_id" , "exon_number"
273273 ]
274274 # Location attributes of the exon
275- LOC_ATTRS = ["seqname" , "start" , "end" , "length" ]
275+ LOC_ATTRS = ["seqname" , "start" , "end" , "strand" , " length" ]
276276 FEATURE = "exon"
277277 # Output TSV file handle
278278 with open (args .output , 'w' ) as out_fh :
@@ -292,4 +292,3 @@ def parsed_feature(
292292 output_line = attr_list + loc_list
293293 # Write to output file
294294 out_fh .write ("{0}\n " .format ('\t ' .join (output_line )))
295-
0 commit comments