Skip to content

Commit 4696325

Browse files
authored
Merge pull request #3 from raphael-group/cleanprep
clean preprocessing and argparse
2 parents 4c7f960 + 380983e commit 4696325

2 files changed

Lines changed: 8 additions & 88 deletions

File tree

calicost.smk

Lines changed: 5 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import calicost.parse_input
88
rule all:
99
input:
1010
f"{config['output_snpinfo']}/cell_snp_Aallele.npz",
11-
# expand(f"{config['outputdir_calicost']}/summary{{r}}", r=config['random_state'])
1211

1312

1413
rule link_or_merge_bam:
@@ -26,8 +25,8 @@ rule link_or_merge_bam:
2625
if "bamlist" in config:
2726
# merged BAM file
2827
shell(f"python {config['calicost_dir']}/utils/merge_bamfile.py -b {config['bamlist']} -o {params.outputdir}/ >> {log} 2>&1")
29-
shell(f"{config['samtools']} sort -m {params.samtools_sorting_mem} -o {output.bam} {params.outputdir}/unsorted_possorted_genome_bam.bam >> {log} 2>&1")
30-
shell(f"{config['samtools']} index {output.bam}")
28+
shell(f"samtools sort -m {params.samtools_sorting_mem} -o {output.bam} {params.outputdir}/unsorted_possorted_genome_bam.bam >> {log} 2>&1")
29+
shell(f"samtools index {output.bam}")
3130
shell(f"rm -fr {params.outputdir}/unsorted_possorted_genome_bam.bam")
3231

3332
# merged barcodes
@@ -65,7 +64,7 @@ rule genotype:
6564
"{outputdir}/logs/genotyping.log"
6665
run:
6766
shell(f"mkdir -p {params.outputdir}/genotyping")
68-
command = f"{config['cellsnplite']} -s {input.bam} " + \
67+
command = f"cellsnp-lite -s {input.bam} " + \
6968
f"-b {input.barcodefile} " + \
7069
f"-O {params.outputdir}/genotyping/ " + \
7170
f"-R {params.region_vcf} " + \
@@ -89,8 +88,8 @@ rule pre_phasing:
8988
print(f"python {config['calicost_dir']}/utils/filter_snps_forphasing.py -c {params.outputdir}/genotyping -o {params.outputdir}/phasing")
9089
shell(f"python {config['calicost_dir']}/utils/filter_snps_forphasing.py -c {params.outputdir}/genotyping -o {params.outputdir}/phasing")
9190
for chrname in config["chromosomes"]:
92-
shell(f"{config['bgzip']} -f {params.outputdir}/phasing/chr{chrname}.vcf")
93-
shell(f"{config['tabix']} -f {params.outputdir}/phasing/chr{chrname}.vcf.gz")
91+
shell(f"bgzip -f {params.outputdir}/phasing/chr{chrname}.vcf")
92+
shell(f"tabix -f {params.outputdir}/phasing/chr{chrname}.vcf.gz")
9493

9594

9695
rule phasing:
@@ -131,84 +130,3 @@ rule parse_final_snp:
131130
f"-c {params.outputdir}/genotyping -e {params.outputdir}/phasing -b {params.outputdir}/barcodes.txt -o {params.outputdir}/ >> {log} 2>&1"
132131
shell( command )
133132

134-
135-
rule write_calicost_configfile:
136-
input:
137-
f"{config['output_snpinfo']}/cell_snp_Aallele.npz",
138-
f"{config['output_snpinfo']}/cell_snp_Ballele.npz",
139-
f"{config['output_snpinfo']}/unique_snp_ids.npy",
140-
output:
141-
expand("{{outputdir}}" + "/configfile{r}", r=config['random_state'])
142-
params:
143-
outputdir="{outputdir}",
144-
threads: 1
145-
run:
146-
if "bamlist" in config:
147-
calicost_config = calicost.arg_parse.get_default_config_joint()
148-
else:
149-
calicost_config = calicost.arg_parse.get_default_config_single()
150-
151-
# update input
152-
calicost_config['snp_dir'] = "/".join( input[0].split("/")[:-1] )
153-
calicost_config['output_dir'] = f"{params.outputdir}"
154-
if 'spaceranger_dir' in calicost_config:
155-
assert 'spaceranger_dir' in config
156-
calicost_config['spaceranger_dir'] = config['spaceranger_dir']
157-
if 'input_filelist' in calicost_config:
158-
assert 'bamlist' in config
159-
calicost_config['input_filelist'] = config['bamlist']
160-
if Path(f"{config['output_snpinfo']}/merged_deconvolution.tsv").exists():
161-
calicost_config['tumorprop_file'] = f"{config['output_snpinfo']}/merged_deconvolution.tsv"
162-
163-
for k in calicost_config.keys():
164-
if k in config:
165-
calicost_config[k] = config[k]
166-
167-
for r in config['random_state']:
168-
calicost_config["num_hmrf_initialization_start"] = r
169-
calicost_config["num_hmrf_initialization_end"] = r+1
170-
calicost.arg_parse.write_config_file(f"{params.outputdir}/configfile{r}", calicost_config)
171-
172-
173-
rule prepare_calicost_data:
174-
input:
175-
expand("{{outputdir}}" + "/configfile{r}", r=config['random_state']),
176-
output:
177-
f"{{outputdir}}/parsed_inputs/table_bininfo.csv.gz",
178-
f"{{outputdir}}/parsed_inputs/table_rdrbaf.csv.gz",
179-
f"{{outputdir}}/parsed_inputs/table_meta.csv.gz",
180-
f"{{outputdir}}/parsed_inputs/exp_counts.pkl",
181-
f"{{outputdir}}/parsed_inputs/adjacency_mat.npz",
182-
f"{{outputdir}}/parsed_inputs/smooth_mat.npz",
183-
f"{{outputdir}}/initial_phase.npz"
184-
params:
185-
outputdir="{outputdir}",
186-
threads: 1
187-
log:
188-
"{outputdir}/logs/prepare_calicost_data.log"
189-
run:
190-
command = f"OMP_NUM_THREADS=1 python {config['calicost_dir']}/src/calicost/parse_input.py -c {input[0]} >> {log} 2>&1"
191-
shell(command)
192-
193-
194-
rule run_calicost:
195-
input:
196-
f"{{outputdir}}/configfile{{r}}",
197-
f"{{outputdir}}/parsed_inputs/table_bininfo.csv.gz",
198-
f"{{outputdir}}/parsed_inputs/table_rdrbaf.csv.gz",
199-
f"{{outputdir}}/parsed_inputs/table_meta.csv.gz",
200-
f"{{outputdir}}/parsed_inputs/exp_counts.pkl",
201-
f"{{outputdir}}/parsed_inputs/adjacency_mat.npz",
202-
f"{{outputdir}}/parsed_inputs/smooth_mat.npz"
203-
output:
204-
f"{{outputdir}}/summary{{r}}",
205-
params:
206-
outputdir="{outputdir}",
207-
r="{r}"
208-
threads: 1
209-
log:
210-
"{outputdir}/logs/calicost_run_{r}.log"
211-
run:
212-
command = f"OMP_NUM_THREADS=1 python {config['calicost_dir']}/src/calicost/calicost_main.py -c {input[0]} >> {log} 2>&1"
213-
shell(command)
214-
shell(f"echo {command} > {output}")

src/calicost/arg_parse.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,9 @@ def read_configuration_file(filename):
155155
# warning that the argument is not a valid configuration parameter and continue
156156
logger.warning(f"{strs[0]} is not a valid configuration parameter! Configuration parameters are: {list(config.keys())}")
157157
continue
158-
if strs[1].upper() == "NONE":
158+
if len(strs) == 1:
159+
config[strs[0]] = []
160+
elif strs[1].upper() == "NONE":
159161
config[strs[0]] = None
160162
elif argument_type[strs[0]] == "str":
161163
config[strs[0]] = strs[1]

0 commit comments

Comments
 (0)