Skip to content

assign-colors scripts are largely identical #286

@jameshadfield

Description

@jameshadfield

Suggest consolidation. While we're at it we should remove the code around forced_colors (originally implemented for ncov) as that's always felt confusing.

diff --git a/phylogenetic/scripts/assign-colors.py b/nextclade/scripts/assign-colors.py
index e7587f5..72f9dc6 100644
--- a/phylogenetic/scripts/assign-colors.py
+++ b/nextclade/scripts/assign-colors.py
@@ -1,21 +1,24 @@
 import argparse
-import pdb
+
 import pandas as pd
 
 # Forced colours MUST NOT appear in the ordering TSV
-forced_colors = {
-}
+forced_colors = {}
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Assign colors based on ordering",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
-    parser.add_argument('--ordering', type=str, required=True, help="input ordering file")
-    parser.add_argument('--color-schemes', type=str, required=True, help="input color schemes file")
-    parser.add_argument('--metadata', type=str, help="if provided, restrict colors to only those found in metadata")
-    parser.add_argument('--output', type=str, required=True, help="output colors tsv")
+    parser.add_argument("--ordering", type=str, required=True, help="input ordering file")
+    parser.add_argument("--color-schemes", type=str, required=True, help="input color schemes file")
+    parser.add_argument(
+        "--metadata",
+        type=str,
+        help="if provided, restrict colors to only those found in metadata",
+    )
+    parser.add_argument("--output", type=str, required=True, help="output colors tsv")
     args = parser.parse_args()
 
     assignment = {}
@@ -34,14 +37,18 @@ if __name__ == '__main__':
     # 1. remove assignments that don't exist in metadata
     # 2. remove assignments that have 'focal' set to 'False' in metadata
     if args.metadata:
-        metadata = pd.read_csv(args.metadata, delimiter='\t')
+        metadata = pd.read_csv(args.metadata, delimiter="\t")
         for name, trait in assignment.items():
             # Items not to exclude if not (yet) present in metadata to solve bootstrapping issue
-            if name in metadata and name not in ['clade_membership', 'outbreak', 'lineage']:
+            if name in metadata and name not in [
+                "clade_membership",
+                "outbreak",
+                "lineage",
+            ]:
                 subset_present = [x for x in assignment[name] if x in metadata[name].unique()]
                 assignment[name] = subset_present
-            if name in metadata and 'focal' in metadata:
-                focal_list = metadata.loc[metadata['focal'] == True, name].unique()
+            if name in metadata and "focal" in metadata:
+                focal_list = metadata.loc[metadata["focal"] == True, name].unique()
                 subset_focal = [x for x in assignment[name] if x in focal_list]
                 assignment[name] = subset_focal
 
@@ -53,28 +60,28 @@ if __name__ == '__main__':
             array = line.lstrip().rstrip().split("\t")
             schemes[counter] = array
 
-    with open(args.output, 'w') as f:
+    with open(args.output, "w") as f:
         for trait_name, trait_array in assignment.items():
-            if len(trait_array)==0:
+            if len(trait_array) == 0:
                 print(f"No traits found for {trait_name}")
                 continue
-            if len(schemes)<len(trait_array):
-              print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
-              remain = len(trait_array)
-              color_array = []
-              while(remain>0):
-                if (remain>len(schemes)):
-                  color_array = [*color_array, *schemes[len(schemes)]]
-                  remain -= len(schemes)
-                else:
-                  color_array = [*color_array, *schemes[remain]]
-                  remain = 0
+            if len(schemes) < len(trait_array):
+                print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
+                remain = len(trait_array)
+                color_array = []
+                while remain > 0:
+                    if remain > len(schemes):
+                        color_array = [*color_array, *schemes[len(schemes)]]
+                        remain -= len(schemes)
+                    else:
+                        color_array = [*color_array, *schemes[remain]]
+                        remain = 0
             else:
-              color_array = schemes[len(trait_array)]
+                color_array = schemes[len(trait_array)]
             extra_trait_values = list(forced_colors.get(trait_name, {}).keys())
             extra_color_values = list(forced_colors.get(trait_name, {}).values())
 
-            zipped = list(zip(trait_array+extra_trait_values, color_array+extra_color_values))
+            zipped = list(zip(trait_array + extra_trait_values, color_array + extra_color_values))
             for trait_value, color in zipped:
                 f.write(trait_name + "\t" + trait_value + "\t" + color + "\n")
             f.write("\n")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions