1+ import sys
2+ import numpy as np
3+ import scipy
4+ import pandas as pd
5+ from pathlib import Path
6+ from sklearn .metrics import adjusted_rand_score
7+ from sklearn .cluster import KMeans
8+ import scanpy as sc
9+ import anndata
10+ import logging
11+ logging .basicConfig (level = logging .INFO , format = "%(asctime)s - %(levelname)s - %(message)s" , datefmt = "%Y-%m-%d %H:%M:%S" )
12+ logger = logging .getLogger ()
13+ import copy
14+ from pathlib import Path
15+ import functools
16+ import subprocess
17+ from calicost .arg_parse import *
18+ from calicost .hmm_NB_BB_phaseswitch import *
19+ from calicost .utils_distribution_fitting import *
20+ from calicost .utils_hmrf import *
21+ from calicost .hmrf import *
22+ from calicost .phasing import *
23+ from calicost .utils_IO import *
24+ from calicost .find_integer_copynumber import *
25+ from calicost .parse_input import *
26+ from calicost .utils_plotting import *
27+ from calicost .sample_posterior_clone import *
28+ from calicost .hmm_NB_BB_nophasing_float import *
29+
30+
31+ def main (configuration_file ):
32+ try :
33+ config = read_configuration_file (configuration_file )
34+ except :
35+ config = read_joint_configuration_file (configuration_file )
36+ print ("Configurations:" )
37+ for k in sorted (list (config .keys ())):
38+ print (f"\t { k } : { config [k ]} " )
39+
40+ lengths , single_X , single_base_nb_mean , single_total_bb_RD , log_sitewise_transmat , df_bininfo , df_gene_snp , \
41+ barcodes , coords , single_tumor_prop , sample_list , sample_ids , adjacency_mat , smooth_mat , exp_counts = run_parse_n_load (config )
42+
43+ single_tumor_prop [np .isnan (single_tumor_prop )] = 0.05
44+
45+ copy_single_X_rdr = copy .copy (single_X [:,0 ,:])
46+ copy_single_base_nb_mean = copy .copy (single_base_nb_mean )
47+ single_X [:,0 ,:] = 0
48+ single_base_nb_mean [:,:] = 0
49+
50+ # # remove spots with tumor purity less than config['tumorprop_threshold']
51+ # single_X = single_X[:,:,single_tumor_prop > config['tumorprop_threshold']]
52+ # single_base_nb_mean = single_base_nb_mean[:,single_tumor_prop > config['tumorprop_threshold']]
53+ # single_total_bb_RD = single_total_bb_RD[:,single_tumor_prop > config['tumorprop_threshold']]
54+ # barcodes = barcodes[single_tumor_prop > config['tumorprop_threshold']]
55+ # coords = coords[single_tumor_prop > config['tumorprop_threshold'],:]
56+ # sample_ids = sample_ids[single_tumor_prop > config['tumorprop_threshold']]
57+ # adjacency_mat = adjacency_mat[single_tumor_prop > config['tumorprop_threshold'],:][:,single_tumor_prop > config['tumorprop_threshold']]
58+ # smooth_mat = smooth_mat[single_tumor_prop > config['tumorprop_threshold'],:][:,single_tumor_prop > config['tumorprop_threshold']]
59+ # single_tumor_prop = single_tumor_prop[single_tumor_prop > config['tumorprop_threshold']]
60+
61+ # Run HMRF
62+ for r_hmrf_initialization in range (config ["num_hmrf_initialization_start" ], config ["num_hmrf_initialization_end" ]):
63+ outdir = f"{ config ['output_dir' ]} /clone{ config ['n_clones' ]} _rectangle{ r_hmrf_initialization } _w{ config ['spatial_weight' ]:.1f} "
64+ if config ["tumorprop_file" ] is None :
65+ initial_clone_index = rectangle_initialize_initial_clone (coords , config ["n_clones" ], random_state = r_hmrf_initialization )
66+ else :
67+ initial_clone_index = rectangle_initialize_initial_clone_mix (coords , config ["n_clones" ], single_tumor_prop , threshold = config ["tumorprop_threshold" ], random_state = r_hmrf_initialization )
68+
69+ # create directory
70+ p = subprocess .Popen (f"mkdir -p { outdir } " , stdout = subprocess .PIPE , stderr = subprocess .PIPE , shell = True )
71+ out ,err = p .communicate ()
72+ # save clone initialization into npz file
73+ prefix = "vi_gibbs_hightp_allspots"
74+ initial_assignment = np .zeros (single_X .shape [2 ], dtype = int )
75+ for c ,idx in enumerate (initial_clone_index ):
76+ initial_assignment [idx ] = c
77+
78+ posterior_clones = np .zeros ((single_X .shape [2 ], config ['n_clones' ]))
79+ posterior_clones [ (np .arange (single_X .shape [2 ]), initial_assignment ) ] = 1
80+
81+ # run HMRF + HMM
82+ X , base_nb_mean , total_bb_RD , tumor_prop = merge_pseudobulk_by_index_mix (single_X , single_base_nb_mean , single_total_bb_RD , initial_clone_index , single_tumor_prop , threshold = config ["tumorprop_threshold" ])
83+ init_log_mu , init_p_binom = initialization_by_gmm (config ['n_states' ], np .vstack ([X [:,0 ,:].flatten ("F" ), X [:,1 ,:].flatten ("F" )]).T .reshape (- 1 ,2 ,1 ), \
84+ base_nb_mean .flatten ("F" ).reshape (- 1 ,1 ), total_bb_RD .flatten ("F" ).reshape (- 1 ,1 ), params = 'sp' , random_state = config ['gmm_random_state' ], in_log_space = False , only_minor = False )
85+
86+ list_posterior_clones , list_cna_states , list_emission_llf , list_log_mu ,list_alphas , list_p_binom , list_taus , list_log_startprob , list_elbo , list_h , list_labels = infer_all_v2 (
87+ single_X , lengths , single_base_nb_mean , single_total_bb_RD , single_tumor_prop , posterior_clones , config ['n_states' ],
88+ coords , adjacency_mat , config ['tumorprop_threshold' ], config ['spatial_weight' ], 'visium' , max_iter_outer = 20 , num_chains = 20 , burnin = 50 , sampling_tol = 1e-10 , temperature = 2.0 ,
89+ hmmclass = hmm_nophasing_float , hmm_params = 'sp' , hmm_t = config ['t' ], hmm_random_state = config ['gmm_random_state' ], hmm_max_iter = config ['max_iter' ], hmm_tol = config ['tol' ], hmm_num_draws = 200 , fun = block_gibbs_sampling_labels ,
90+ smooth_mat = smooth_mat , init_p_binom = init_p_binom )
91+
92+ # save results
93+ np .savez (f"{ outdir } /{ prefix } _nstates{ config ['n_states' ]} _sp.npz" ,
94+ list_posterior_clones = list_posterior_clones ,
95+ list_cna_states = list_cna_states ,
96+ list_emission_llf = list_emission_llf ,
97+ list_log_mu = list_log_mu ,
98+ list_p_binom = list_p_binom ,
99+ list_elbo = list_elbo ,
100+ list_h = list_h ,
101+ list_labels = list_labels )
102+
103+
104+ if __name__ == "__main__" :
105+ parser = argparse .ArgumentParser ()
106+ parser .add_argument ("-c" , "--configfile" , help = "configuration file of CalicoST" , required = True , type = str )
107+ args = parser .parse_args ()
108+
109+ main (args .configfile )
0 commit comments