Skip to content

Commit 275e4fd

Browse files
committed
Adding strand information to exon annotation output file.
1 parent 299a4e4 commit 275e4fd

1 file changed

Lines changed: 14 additions & 15 deletions

File tree

workflow/scripts/exon_annotation.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
import argparse, gzip, os, sys
99

1010
# Constants
11-
# Usage and help section
11+
# Usage and help section
1212
_HELP = dedent("""
1313
@Usage:
1414
$ ./exon_annotation.py [-h] [--version] \\
1515
--input GTF_FILE \\
16-
--output TSV_FILE
16+
--output TSV_FILE
1717
1818
@About:
1919
Given an annotation in GTF format, this script
@@ -28,8 +28,9 @@
2828
• exon_seqname
2929
• exon_start
3030
• exon_end
31+
• exon_strand
3132
• exon_length
32-
33+
3334
If the 9th column of the GTF file does not
3435
contain the attributes listed above, they
3536
will resolve to "Unknown" in the output file.
@@ -41,7 +42,6 @@
4142
file as input.
4243
-o, --output TSV_FILE
4344
Output TSV file with parsed exon information.
44-
4545
@Options:
4646
-h, --help Shows this help message and exits.
4747
-v, --version Prints the version and exits.
@@ -199,7 +199,7 @@ def parsed_feature(
199199
Default is 'exon'.
200200
@param parse <list[str]>:
201201
List of attributes to parse from the GTF file.
202-
Default includes:
202+
Default includes:
203203
- gene_id, gene_name
204204
- transcript_id, transcript_name
205205
- exon_id, exon_number
@@ -208,19 +208,19 @@ def parsed_feature(
208208
The attribute key contains a dictionary of parsed
209209
attributes from the 9th column of the GTF file.
210210
"""
211-
# Handler for uncompressed files
211+
# Handler for uncompressed files
212212
open_func = open
213213
if gtf_file.endswith('.gz'):
214214
# Handler for gzip files
215215
open_func = gzip.open
216-
217-
line_number = 0 # Used for error reporting
216+
217+
line_number = 0 # Used for error reporting
218218
with open_func(gtf_file, 'rt') as fh:
219219
for line in fh:
220220
line_number += 1
221221
if line.startswith('#'):
222222
# Skip comment lines
223-
continue
223+
continue
224224
# Split the line into columns
225225
tokens = line.strip().split('\t')
226226
if len(tokens) < 9:
@@ -246,24 +246,24 @@ def parsed_feature(
246246
if __name__ == '__main__':
247247
# Parse command line arguments
248248
args = parse_cli_arguments()
249-
249+
250250
# Sanity check for usage
251251
if len(sys.argv) == 1:
252252
# Nothing was provided
253253
fatal('Invalid usage: {0} [-h] ...'.format(os.path.basename(sys.argv[0])))
254-
254+
255255
# Create output directory if
256256
# it does not exist
257257
output_dir = os.path.abspath(os.path.dirname(args.output))
258258
if not os.path.exists(output_dir):
259259
try: os.makedirs(output_dir)
260-
except OSError as e:
260+
except OSError as e:
261261
fatal(
262262
"Fatal error: Failed to create output directory: {0}\n{1}".format(
263263
output_dir, e
264264
)
265265
)
266-
266+
267267
# Attributes (key, value pairs) to parse
268268
# from the 9th column of the GTF file
269269
PARSE_ATTRS=[
@@ -272,7 +272,7 @@ def parsed_feature(
272272
"exon_id", "exon_number"
273273
]
274274
# Location attributes of the exon
275-
LOC_ATTRS = ["seqname", "start", "end", "length"]
275+
LOC_ATTRS = ["seqname", "start", "end", "strand", "length"]
276276
FEATURE = "exon"
277277
# Output TSV file handle
278278
with open(args.output, 'w') as out_fh:
@@ -292,4 +292,3 @@ def parsed_feature(
292292
output_line = attr_list + loc_list
293293
# Write to output file
294294
out_fh.write("{0}\n".format('\t'.join(output_line)))
295-

0 commit comments

Comments
 (0)