@@ -8,7 +8,6 @@ import calicost.parse_input
88rule all :
99 input :
1010 f"{ config ['output_snpinfo' ]} /cell_snp_Aallele.npz" ,
11- # expand(f"{config['outputdir_calicost']}/summary{{r}}", r=config['random_state'])
1211
1312
1413rule link_or_merge_bam :
@@ -26,8 +25,8 @@ rule link_or_merge_bam:
2625 if "bamlist" in config :
2726 # merged BAM file
2827 shell (f"python { config ['calicost_dir' ]} /utils/merge_bamfile.py -b { config ['bamlist' ]} -o { params .outputdir } / >> { log } 2>&1" )
29- shell (f"{ config [ ' samtools' ] } sort -m { params .samtools_sorting_mem } -o { output .bam } { params .outputdir } /unsorted_possorted_genome_bam.bam >> { log } 2>&1" )
30- shell (f"{ config [ ' samtools' ] } index { output .bam } " )
28+ shell (f"samtools sort -m { params .samtools_sorting_mem } -o { output .bam } { params .outputdir } /unsorted_possorted_genome_bam.bam >> { log } 2>&1" )
29+ shell (f"samtools index { output .bam } " )
3130 shell (f"rm -fr { params .outputdir } /unsorted_possorted_genome_bam.bam" )
3231
3332 # merged barcodes
@@ -65,7 +64,7 @@ rule genotype:
6564 "{outputdir}/logs/genotyping.log"
6665 run :
6766 shell (f"mkdir -p { params .outputdir } /genotyping" )
68- command = f"{ config [ 'cellsnplite' ] } -s { input .bam } " + \
67+ command = f"cellsnp-lite -s { input .bam } " + \
6968 f"-b { input .barcodefile } " + \
7069 f"-O { params .outputdir } /genotyping/ " + \
7170 f"-R { params .region_vcf } " + \
@@ -89,8 +88,8 @@ rule pre_phasing:
8988 print (f"python { config ['calicost_dir' ]} /utils/filter_snps_forphasing.py -c { params .outputdir } /genotyping -o { params .outputdir } /phasing" )
9089 shell (f"python { config ['calicost_dir' ]} /utils/filter_snps_forphasing.py -c { params .outputdir } /genotyping -o { params .outputdir } /phasing" )
9190 for chrname in config ["chromosomes" ]:
92- shell (f"{ config [ ' bgzip' ] } -f { params .outputdir } /phasing/chr{ chrname } .vcf" )
93- shell (f"{ config [ ' tabix' ] } -f { params .outputdir } /phasing/chr{ chrname } .vcf.gz" )
91+ shell (f"bgzip -f { params .outputdir } /phasing/chr{ chrname } .vcf" )
92+ shell (f"tabix -f { params .outputdir } /phasing/chr{ chrname } .vcf.gz" )
9493
9594
9695rule phasing :
@@ -131,84 +130,3 @@ rule parse_final_snp:
131130 f"-c { params .outputdir } /genotyping -e { params .outputdir } /phasing -b { params .outputdir } /barcodes.txt -o { params .outputdir } / >> { log } 2>&1"
132131 shell ( command )
133132
134-
135- rule write_calicost_configfile :
136- input :
137- f"{ config ['output_snpinfo' ]} /cell_snp_Aallele.npz" ,
138- f"{ config ['output_snpinfo' ]} /cell_snp_Ballele.npz" ,
139- f"{ config ['output_snpinfo' ]} /unique_snp_ids.npy" ,
140- output :
141- expand ("{{outputdir}}" + "/configfile{r}" , r = config ['random_state' ])
142- params :
143- outputdir = "{outputdir}" ,
144- threads : 1
145- run :
146- if "bamlist" in config :
147- calicost_config = calicost .arg_parse .get_default_config_joint ()
148- else :
149- calicost_config = calicost .arg_parse .get_default_config_single ()
150-
151- # update input
152- calicost_config ['snp_dir' ] = "/" .join ( input [0 ].split ("/" )[:- 1 ] )
153- calicost_config ['output_dir' ] = f"{ params .outputdir } "
154- if 'spaceranger_dir' in calicost_config :
155- assert 'spaceranger_dir' in config
156- calicost_config ['spaceranger_dir' ] = config ['spaceranger_dir' ]
157- if 'input_filelist' in calicost_config :
158- assert 'bamlist' in config
159- calicost_config ['input_filelist' ] = config ['bamlist' ]
160- if Path (f"{ config ['output_snpinfo' ]} /merged_deconvolution.tsv" ).exists ():
161- calicost_config ['tumorprop_file' ] = f"{ config ['output_snpinfo' ]} /merged_deconvolution.tsv"
162-
163- for k in calicost_config .keys ():
164- if k in config :
165- calicost_config [k ] = config [k ]
166-
167- for r in config ['random_state' ]:
168- calicost_config ["num_hmrf_initialization_start" ] = r
169- calicost_config ["num_hmrf_initialization_end" ] = r + 1
170- calicost .arg_parse .write_config_file (f"{ params .outputdir } /configfile{ r } " , calicost_config )
171-
172-
173- rule prepare_calicost_data :
174- input :
175- expand ("{{outputdir}}" + "/configfile{r}" , r = config ['random_state' ]),
176- output :
177- f"{{outputdir}}/parsed_inputs/table_bininfo.csv.gz" ,
178- f"{{outputdir}}/parsed_inputs/table_rdrbaf.csv.gz" ,
179- f"{{outputdir}}/parsed_inputs/table_meta.csv.gz" ,
180- f"{{outputdir}}/parsed_inputs/exp_counts.pkl" ,
181- f"{{outputdir}}/parsed_inputs/adjacency_mat.npz" ,
182- f"{{outputdir}}/parsed_inputs/smooth_mat.npz" ,
183- f"{{outputdir}}/initial_phase.npz"
184- params :
185- outputdir = "{outputdir}" ,
186- threads : 1
187- log :
188- "{outputdir}/logs/prepare_calicost_data.log"
189- run :
190- command = f"OMP_NUM_THREADS=1 python { config ['calicost_dir' ]} /src/calicost/parse_input.py -c { input [0 ]} >> { log } 2>&1"
191- shell (command )
192-
193-
194- rule run_calicost :
195- input :
196- f"{{outputdir}}/configfile{{r}}" ,
197- f"{{outputdir}}/parsed_inputs/table_bininfo.csv.gz" ,
198- f"{{outputdir}}/parsed_inputs/table_rdrbaf.csv.gz" ,
199- f"{{outputdir}}/parsed_inputs/table_meta.csv.gz" ,
200- f"{{outputdir}}/parsed_inputs/exp_counts.pkl" ,
201- f"{{outputdir}}/parsed_inputs/adjacency_mat.npz" ,
202- f"{{outputdir}}/parsed_inputs/smooth_mat.npz"
203- output :
204- f"{{outputdir}}/summary{{r}}" ,
205- params :
206- outputdir = "{outputdir}" ,
207- r = "{r}"
208- threads : 1
209- log :
210- "{outputdir}/logs/calicost_run_{r}.log"
211- run :
212- command = f"OMP_NUM_THREADS=1 python { config ['calicost_dir' ]} /src/calicost/calicost_main.py -c { input [0 ]} >> { log } 2>&1"
213- shell (command )
214- shell (f"echo { command } > { output } " )
0 commit comments