Skip to content

Commit 3e883c7

Browse files
committed
initial code for running VI to the end using BAF
1 parent 0d62b3f commit 3e883c7

2 files changed

Lines changed: 116 additions & 1 deletion

File tree

src/calicost/sample_posterior_clone.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,10 @@ def infer_all_v2(single_X, lengths, single_base_nb_mean, single_total_bb_RD, sin
497497
list_cna_states = []
498498
list_emission_llf = []
499499
list_log_mu = []
500+
list_alphas = []
500501
list_p_binom = []
502+
list_taus = []
503+
list_log_startprob = []
501504
list_elbo = []
502505

503506
for r in range(max_iter_outer):
@@ -544,7 +547,10 @@ def infer_all_v2(single_X, lengths, single_base_nb_mean, single_total_bb_RD, sin
544547
# save results
545548
list_emission_llf.append( emission_llf )
546549
list_log_mu.append(res['new_log_mu'])
550+
list_alphas.append(res['new_alphas'])
547551
list_p_binom.append(res['new_p_binom'])
552+
list_taus.append(res['new_taus'])
553+
list_log_startprob.append(res['new_log_startprob'])
548554
list_cna_states.append( np.stack([res['pred_cnv'][(i*n_obs):(i*n_obs+n_obs)] for i in range(n_clones)]) )
549555

550556
# update last_log_mu, last_p_binom, last_alphas, last_taus
@@ -582,7 +588,7 @@ def infer_all_v2(single_X, lengths, single_base_nb_mean, single_total_bb_RD, sin
582588
# if r > 2:
583589
# temperature = max(1.0, temperature-2)
584590

585-
return list_posterior_clones, list_cna_states, list_emission_llf, list_log_mu, list_p_binom, list_elbo
591+
return list_posterior_clones, list_cna_states, list_emission_llf, list_log_mu,list_alphas, list_p_binom, list_taus, list_log_startprob, list_elbo, list_h, list_labels
586592

587593

588594

src/calicost/vi_main.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import sys
2+
import numpy as np
3+
import scipy
4+
import pandas as pd
5+
from pathlib import Path
6+
from sklearn.metrics import adjusted_rand_score
7+
from sklearn.cluster import KMeans
8+
import scanpy as sc
9+
import anndata
10+
import logging
11+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
12+
logger = logging.getLogger()
13+
import copy
14+
from pathlib import Path
15+
import functools
16+
import subprocess
17+
from calicost.arg_parse import *
18+
from calicost.hmm_NB_BB_phaseswitch import *
19+
from calicost.utils_distribution_fitting import *
20+
from calicost.utils_hmrf import *
21+
from calicost.hmrf import *
22+
from calicost.phasing import *
23+
from calicost.utils_IO import *
24+
from calicost.find_integer_copynumber import *
25+
from calicost.parse_input import *
26+
from calicost.utils_plotting import *
27+
from calicost.sample_posterior_clone import *
28+
from calicost.hmm_NB_BB_nophasing_float import *
29+
30+
31+
def main(configuration_file):
32+
try:
33+
config = read_configuration_file(configuration_file)
34+
except:
35+
config = read_joint_configuration_file(configuration_file)
36+
print("Configurations:")
37+
for k in sorted(list(config.keys())):
38+
print(f"\t{k} : {config[k]}")
39+
40+
lengths, single_X, single_base_nb_mean, single_total_bb_RD, log_sitewise_transmat, df_bininfo, df_gene_snp, \
41+
barcodes, coords, single_tumor_prop, sample_list, sample_ids, adjacency_mat, smooth_mat, exp_counts = run_parse_n_load(config)
42+
43+
single_tumor_prop[np.isnan(single_tumor_prop)] = 0.05
44+
45+
copy_single_X_rdr = copy.copy(single_X[:,0,:])
46+
copy_single_base_nb_mean = copy.copy(single_base_nb_mean)
47+
single_X[:,0,:] = 0
48+
single_base_nb_mean[:,:] = 0
49+
50+
# # remove spots with tumor purity less than config['tumorprop_threshold']
51+
# single_X = single_X[:,:,single_tumor_prop > config['tumorprop_threshold']]
52+
# single_base_nb_mean = single_base_nb_mean[:,single_tumor_prop > config['tumorprop_threshold']]
53+
# single_total_bb_RD = single_total_bb_RD[:,single_tumor_prop > config['tumorprop_threshold']]
54+
# barcodes = barcodes[single_tumor_prop > config['tumorprop_threshold']]
55+
# coords = coords[single_tumor_prop > config['tumorprop_threshold'],:]
56+
# sample_ids = sample_ids[single_tumor_prop > config['tumorprop_threshold']]
57+
# adjacency_mat = adjacency_mat[single_tumor_prop > config['tumorprop_threshold'],:][:,single_tumor_prop > config['tumorprop_threshold']]
58+
# smooth_mat = smooth_mat[single_tumor_prop > config['tumorprop_threshold'],:][:,single_tumor_prop > config['tumorprop_threshold']]
59+
# single_tumor_prop = single_tumor_prop[single_tumor_prop > config['tumorprop_threshold']]
60+
61+
# Run HMRF
62+
for r_hmrf_initialization in range(config["num_hmrf_initialization_start"], config["num_hmrf_initialization_end"]):
63+
outdir = f"{config['output_dir']}/clone{config['n_clones']}_rectangle{r_hmrf_initialization}_w{config['spatial_weight']:.1f}"
64+
if config["tumorprop_file"] is None:
65+
initial_clone_index = rectangle_initialize_initial_clone(coords, config["n_clones"], random_state=r_hmrf_initialization)
66+
else:
67+
initial_clone_index = rectangle_initialize_initial_clone_mix(coords, config["n_clones"], single_tumor_prop, threshold=config["tumorprop_threshold"], random_state=r_hmrf_initialization)
68+
69+
# create directory
70+
p = subprocess.Popen(f"mkdir -p {outdir}", stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
71+
out,err = p.communicate()
72+
# save clone initialization into npz file
73+
prefix = "vi_gibbs_hightp_allspots"
74+
initial_assignment = np.zeros(single_X.shape[2], dtype=int)
75+
for c,idx in enumerate(initial_clone_index):
76+
initial_assignment[idx] = c
77+
78+
posterior_clones = np.zeros((single_X.shape[2], config['n_clones']))
79+
posterior_clones[ (np.arange(single_X.shape[2]), initial_assignment) ] = 1
80+
81+
# run HMRF + HMM
82+
X, base_nb_mean, total_bb_RD, tumor_prop = merge_pseudobulk_by_index_mix(single_X, single_base_nb_mean, single_total_bb_RD, initial_clone_index, single_tumor_prop, threshold=config["tumorprop_threshold"])
83+
init_log_mu, init_p_binom = initialization_by_gmm(config['n_states'], np.vstack([X[:,0,:].flatten("F"), X[:,1,:].flatten("F")]).T.reshape(-1,2,1), \
84+
base_nb_mean.flatten("F").reshape(-1,1), total_bb_RD.flatten("F").reshape(-1,1), params='sp', random_state=config['gmm_random_state'], in_log_space=False, only_minor=False)
85+
86+
list_posterior_clones, list_cna_states, list_emission_llf, list_log_mu,list_alphas, list_p_binom, list_taus, list_log_startprob, list_elbo, list_h, list_labels = infer_all_v2(
87+
single_X, lengths, single_base_nb_mean, single_total_bb_RD, single_tumor_prop, posterior_clones, config['n_states'],
88+
coords, adjacency_mat, config['tumorprop_threshold'], config['spatial_weight'], 'visium', max_iter_outer=20, num_chains=20, burnin=50, sampling_tol=1e-10, temperature=2.0,
89+
hmmclass=hmm_nophasing_float, hmm_params='sp', hmm_t=config['t'], hmm_random_state=config['gmm_random_state'], hmm_max_iter=config['max_iter'], hmm_tol=config['tol'], hmm_num_draws=200, fun=block_gibbs_sampling_labels,
90+
smooth_mat=smooth_mat, init_p_binom=init_p_binom)
91+
92+
# save results
93+
np.savez(f"{outdir}/{prefix}_nstates{config['n_states']}_sp.npz",
94+
list_posterior_clones=list_posterior_clones,
95+
list_cna_states=list_cna_states,
96+
list_emission_llf=list_emission_llf,
97+
list_log_mu=list_log_mu,
98+
list_p_binom=list_p_binom,
99+
list_elbo=list_elbo,
100+
list_h=list_h,
101+
list_labels=list_labels)
102+
103+
104+
if __name__ == "__main__":
105+
parser = argparse.ArgumentParser()
106+
parser.add_argument("-c", "--configfile", help="configuration file of CalicoST", required=True, type=str)
107+
args = parser.parse_args()
108+
109+
main(args.configfile)

0 commit comments

Comments
 (0)