Updating cli option names for splicing annotation file

skchronicles · skchronicles · commit af38acec2692 · 2025-06-18T11:03:09.000-04:00
diff --git a/workflow/scripts/leafcutter_annotation.py b/workflow/scripts/leafcutter_annotation.py
@@ -17,12 +17,12 @@
             --effect-sizes EFFECT_SIZES_FILE \\
             --cluster-signif CLUSTER_SIGNIF_FILE \\
             --intron-ann INTRON_ANN_FILE \\
-            --exon-ann EXON_ANN_FILE \\
+            --splicing-ann SPLICING_ANN_FILE \\
             --output OUTPUT_FILE
 @About:
     Given the differential splicing results from
     leafcutter (effect size, cluster signif output
-    files), an intron annotation file, and an exon
+    files), an intron annotation file, and a splicing
     annotation file, this script will collate info
     from each source to add the cluster adjusted
     p-values, transcript information and exon
@@ -37,7 +37,7 @@
     is set to "0.1".
 
 @Required:
-    -s, --effect-sizes EFFECT_SIZES_FILE
+    -e, --effect-sizes EFFECT_SIZES_FILE
         Input leafcutter effect sizes file.
         This file is generated by running
         "leafcutter_ds.R".
@@ -50,11 +50,12 @@
         file was generated by exporting the
         "intron" table from the Rdata file
         generated by "prepare_results.R".
-    -e, --exon-ann EXON_ANN_FILE
-        Input exon annotation file. This file
-        was generated by parsing exon info
-        from the GTF file. It is the output
-        file of "exon_annotation.py".
+    -s, --splicing-ann SPLICING_ANN_FILE
+        Input splicing annotation file. This 
+        file was generated by parsing exon
+        and transcript information from the
+        GTF file. It is the output file of
+        "splicing_annotation.py".
     -o, --output OUTPUT_FILE
         Output file with merged and annotated
         leafcutter results.
@@ -72,10 +73,10 @@
 
 @Example:
     $ ./leafcutter_annotation.py \\
-        -s leafcutter_effect_sizes.txt \\
+        -e leafcutter_effect_sizes.txt \\
         -c leafcutter_cluster_significance.txt \\
         -i intron_annotation.tsv \\
-        -e exon_annotation.tsv \\
+        -s splicing_annotation.tsv \\
         -o leafcutter_annotated_results.tsv \\
         -f 0.1
 """
@@ -138,10 +139,10 @@ def check_permissions(parser, path, *args, **kwargs):
     """Checks permissions using os.access() to see the
     user is authorized to access a file/directory. Checks
     for existence, read, write and execute via args:
-        - os.F_OK (tests existence)
-        - os.R_OK (tests read)
-        - os.W_OK (tests write)
-        - os.X_OK (tests exec)
+        • os.F_OK (tests existence)
+        • os.R_OK (tests read)
+        • os.W_OK (tests write)
+        • os.X_OK (tests exec)
     @param parser <argparse.ArgumentParser() object>:
         Argparse parser object
     @param path <str>:
@@ -179,7 +180,7 @@ def parse_cli_arguments():
     )
     # Leafcutter effect sizes file
     parser.add_argument(
-        '-s', '--effect-sizes',
+        '-e', '--effect-sizes',
         type = lambda file: \
             check_permissions(parser, file, os.R_OK),
         required=True,
@@ -201,9 +202,9 @@ def parse_cli_arguments():
         required=True,
         help=argparse.SUPPRESS
     )
-    # Exon annotation file
+    # Splicing annotation file
     parser.add_argument(
-        '-e', '--exon-ann',
+        '-s', '--splicing-ann',
         type = lambda file: \
             check_permissions(parser, file, os.R_OK),
         required=True,
@@ -290,13 +291,13 @@ def index_file(file, keys, key_delim, values):
         be pulled by their name.   
     @return file_idx <dict[str]=str>:
         Nested dictionary where,
-            - key = 'key_delim'.join(keys)
-            - value = {val_col1: "A", val_col2:"B"}
+            • key = 'key_delim'.join(keys)
+            • value = {val_col1: "A", val_col2:"B"}
         Given,
             keys=["A","B"], values["C","D"], key_delim="|"
             returns {"A|B": {"C": "c_i", "D": "d_i"}}
     """
-    log("Started indexing input file: {0}".format(file))
+    log("Started indexing input file: ", file)
     file_idx = {}
     # Handler for opening files, i.e.
     # uncompressed or gzip files
@@ -314,6 +315,7 @@ def index_file(file, keys, key_delim, values):
             _k = key_delim.join([tokens[col_idx[k]] for k in keys])
             _v = {v: tokens[col_idx[v]]  for v in values}
             file_idx[_k] = _v
+    log("Completed indexing input file: ", file)
     return file_idx 
 
 
@@ -412,6 +414,10 @@ def get_additional_annotation_information(annotation_dict, first_key, values):
         key_delim=":"
     )
 
+    # Parse exon information from the
+    # splicing annotation file where:
+    #   key = {transcript}:{exonA_end}:{exonB_start}
+
     # Loop through effect sizes file
     # and add more detailed information
     log("Writing annotated output file: ", args.output)