1717 --effect-sizes EFFECT_SIZES_FILE \\
1818 --cluster-signif CLUSTER_SIGNIF_FILE \\
1919 --intron-ann INTRON_ANN_FILE \\
20- --exon -ann EXON_ANN_FILE \\
20+ --splicing -ann SPLICING_ANN_FILE \\
2121 --output OUTPUT_FILE
2222@About:
2323 Given the differential splicing results from
2424 leafcutter (effect size, cluster signif output
25- files), an intron annotation file, and an exon
25+ files), an intron annotation file, and a splicing
2626 annotation file, this script will collate info
2727 from each source to add the cluster adjusted
2828 p-values, transcript information and exon
3737 is set to "0.1".
3838
3939@Required:
40- -s , --effect-sizes EFFECT_SIZES_FILE
40+ -e , --effect-sizes EFFECT_SIZES_FILE
4141 Input leafcutter effect sizes file.
4242 This file is generated by running
4343 "leafcutter_ds.R".
5050 file was generated by exporting the
5151 "intron" table from the Rdata file
5252 generated by "prepare_results.R".
53- -e, --exon-ann EXON_ANN_FILE
54- Input exon annotation file. This file
55- was generated by parsing exon info
56- from the GTF file. It is the output
57- file of "exon_annotation.py".
53+ -s, --splicing-ann SPLICING_ANN_FILE
54+ Input splicing annotation file. This
55+ file was generated by parsing exon
56+ and transcript information from the
57+ GTF file. It is the output file of
58+ "splicing_annotation.py".
5859 -o, --output OUTPUT_FILE
5960 Output file with merged and annotated
6061 leafcutter results.
7273
7374@Example:
7475 $ ./leafcutter_annotation.py \\
75- -s leafcutter_effect_sizes.txt \\
76+ -e leafcutter_effect_sizes.txt \\
7677 -c leafcutter_cluster_significance.txt \\
7778 -i intron_annotation.tsv \\
78- -e exon_annotation .tsv \\
79+ -s splicing_annotation .tsv \\
7980 -o leafcutter_annotated_results.tsv \\
8081 -f 0.1
8182"""
@@ -138,10 +139,10 @@ def check_permissions(parser, path, *args, **kwargs):
138139 """Checks permissions using os.access() to see the
139140 user is authorized to access a file/directory. Checks
140141 for existence, read, write and execute via args:
141- - os.F_OK (tests existence)
142- - os.R_OK (tests read)
143- - os.W_OK (tests write)
144- - os.X_OK (tests exec)
142+ • os.F_OK (tests existence)
143+ • os.R_OK (tests read)
144+ • os.W_OK (tests write)
145+ • os.X_OK (tests exec)
145146 @param parser <argparse.ArgumentParser() object>:
146147 Argparse parser object
147148 @param path <str>:
@@ -179,7 +180,7 @@ def parse_cli_arguments():
179180 )
180181 # Leafcutter effect sizes file
181182 parser .add_argument (
182- '-s ' , '--effect-sizes' ,
183+ '-e ' , '--effect-sizes' ,
183184 type = lambda file : \
184185 check_permissions (parser , file , os .R_OK ),
185186 required = True ,
@@ -201,9 +202,9 @@ def parse_cli_arguments():
201202 required = True ,
202203 help = argparse .SUPPRESS
203204 )
204- # Exon annotation file
205+ # Splicing annotation file
205206 parser .add_argument (
206- '-e ' , '--exon -ann' ,
207+ '-s ' , '--splicing -ann' ,
207208 type = lambda file : \
208209 check_permissions (parser , file , os .R_OK ),
209210 required = True ,
@@ -290,13 +291,13 @@ def index_file(file, keys, key_delim, values):
290291 be pulled by their name.
291292 @return file_idx <dict[str]=str>:
292293 Nested dictionary where,
293- - key = 'key_delim'.join(keys)
294- - value = {val_col1: "A", val_col2:"B"}
294+ • key = 'key_delim'.join(keys)
295+ • value = {val_col1: "A", val_col2:"B"}
295296 Given,
296297 keys=["A","B"], values["C","D"], key_delim="|"
297298 returns {"A|B": {"C": "c_i", "D": "d_i"}}
298299 """
299- log ("Started indexing input file: {0}" . format ( file ) )
300+ log ("Started indexing input file: " , file )
300301 file_idx = {}
301302 # Handler for opening files, i.e.
302303 # uncompressed or gzip files
@@ -314,6 +315,7 @@ def index_file(file, keys, key_delim, values):
314315 _k = key_delim .join ([tokens [col_idx [k ]] for k in keys ])
315316 _v = {v : tokens [col_idx [v ]] for v in values }
316317 file_idx [_k ] = _v
318+ log ("Completed indexing input file: " , file )
317319 return file_idx
318320
319321
@@ -412,6 +414,10 @@ def get_additional_annotation_information(annotation_dict, first_key, values):
412414 key_delim = ":"
413415 )
414416
417+ # Parse exon information from the
418+ # splicing annotation file where:
419+ # key = {transcript}:{exonA_end}:{exonB_start}
420+
415421 # Loop through effect sizes file
416422 # and add more detailed information
417423 log ("Writing annotated output file: " , args .output )
0 commit comments