|
13 | 13 | _HELP = dedent(""" |
14 | 14 | @Usage: |
15 | 15 | $ ./splicing_annotation.py [-h] [--version] \\ |
| 16 | + [--sort-exons-by-exon-order] \\ |
16 | 17 | --exon-ann EXON_ANN_FILE \\ |
17 | 18 | --output OUTPUT_FILE |
18 | 19 | @About: |
|
28 | 29 | • exon_id.1|exon_id.2|... |
29 | 30 | • exon_number.1|exon_number.2|... |
30 | 31 | • exon_seqname |
31 | | - • exon_start:exon_end.1|exon_start.2:exon_end.2|... |
| 32 | + • exon_start.1:exon_end.1|exon_start.2:exon_end.2|... |
32 | 33 | • exon_strand |
33 | 34 |
|
34 | 35 | This file has 1:M exon information collapsed by |
|
47 | 48 | exon information. This represent the |
48 | 49 | transcript model for each gene. |
49 | 50 | @Options: |
| 51 | + --sort-exons-by-exon-order |
| 52 | + By default, 1:M exon information is |
| 53 | + sorted by seqname, exon_start, exon_end, |
| 54 | + and strand. This results in 1:M exon |
| 55 | + information being sorted by their genomic |
| 56 | + position which is not the same as their |
| 57 | + splicing order for transcripts on the |
| 58 | + negative strand. |
| 59 | + The default behavior will result in 1:M |
| 60 | + exon information being reporting in the |
| 61 | + following order: |
| 62 | + • Positive strand transcripts: |
| 63 | + • exon.1, exon.2, exon.3, ... |
| 64 | + • Negative strand transcripts: |
| 65 | + • ..., exon.3, exon.2, exon.1 |
| 66 | + If this option IS provided, the order |
| 67 | + will be reversed for negative strand |
| 68 | + transcripts to reflect the correct |
| 69 | + splicing order, meaning it will be |
| 70 | + sorted by exon order instead of |
| 71 | + genomic position. The order will be: |
| 72 | + • Positive strand transcripts: |
| 73 | + • exon.1, exon.2, exon.3, ... |
| 74 | + • Negative strand transcripts: |
| 75 | + • exon.1, exon.2, exon.3, ... |
| 76 | + It is worth noting that if this option |
| 77 | + IS NOT provided (default behavior), |
| 78 | + 1:M exon_start_end information related |
| 79 | + to exon location will be listed in |
| 80 | + increasing order for negative strand |
| 81 | + transcripts-- whereas if this option |
| 82 | + is provided, 1:M exon_start_end info |
| 83 | + will be listed in decreasing order for |
| 84 | + negative strand transcripts. |
| 85 | + • Default: False (i.e exons are |
| 86 | + sorted by genomic position). |
50 | 87 | -h, --help |
51 | 88 | Shows help message and exits. |
52 | 89 | -v, --version |
@@ -170,6 +207,14 @@ def parse_cli_arguments(): |
170 | 207 | required=True, |
171 | 208 | help=argparse.SUPPRESS |
172 | 209 | ) |
| 210 | + # Sort exons by exon order, |
| 211 | + # not by genomic position |
| 212 | + parser.add_argument( |
| 213 | + '--sort-exons-by-exon-order', |
| 214 | + action='store_true', |
| 215 | + default=False, |
| 216 | + help=argparse.SUPPRESS, |
| 217 | + ) |
173 | 218 | # Get version information |
174 | 219 | parser.add_argument( |
175 | 220 | '-v', '--version', |
@@ -426,7 +471,7 @@ def get_with_default(line_list, column_name_idx_dict, column_name, default_value |
426 | 471 | # for the first exon in the list |
427 | 472 | # to determine if the order |
428 | 473 | # needs to be reversed. |
429 | | - if v[EXON_1toM_KEY][0][PARSE_1toM_COLUMNS.index("exon_strand")] == "-": |
| 474 | + if v[EXON_1toM_KEY][0][PARSE_1toM_COLUMNS.index("exon_strand")] == "-" and args.sort_exons_by_exon_order: |
430 | 475 | # If the strand is negative, |
431 | 476 | # reverse the order of the exon |
432 | 477 | # information to reflect the |
|
0 commit comments