-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSnakefile
More file actions
94 lines (80 loc) · 4.53 KB
/
Snakefile
File metadata and controls
94 lines (80 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# ──────────────────────────────────────────────────────────────
# PRISM Snakemake Pipeline
# ──────────────────────────────────────────────────────────────
# Metagenomic pathogen detection pipeline.
# Restructured from PRISM.R into a modular Snakemake workflow.
#
# Usage:
# snakemake --configfile config/config.yaml -j 16
# snakemake -n --configfile config/config.yaml # dry run
# ──────────────────────────────────────────────────────────────
import os
import glob as _glob
# ── Resolve paths relative to Snakefile location ─────────────
PRISM_DIR = os.path.dirname(workflow.snakefile)
SCRIPTS = os.path.join(PRISM_DIR, "workflow", "scripts")
# ── Samples ──────────────────────────────────────────────────
# Two modes:
# A) Auto-discovery: set data_dir + fq1_suffix/fq2_suffix in config
# B) Manual: list each sample with fq1/fq2 paths under samples:
# Both can be combined; manual entries override auto-discovered ones.
def _discover_samples(cfg):
"""Build samples dict from config, with optional auto-discovery."""
samples = {}
# Auto-discover from data_dir
data_dir = cfg.get("data_dir", "")
if data_dir:
fq1_sfx = cfg.get("fq1_suffix", "_R1.fq.gz")
fq2_sfx = cfg.get("fq2_suffix", "_R2.fq.gz")
pattern = os.path.join(data_dir, f"*{fq1_sfx}")
for fq1_path in sorted(_glob.glob(pattern)):
basename = os.path.basename(fq1_path)
sample_name = basename[: -len(fq1_sfx)]
fq2_path = os.path.join(data_dir, f"{sample_name}{fq2_sfx}")
if not os.path.exists(fq2_path):
raise ValueError(
f"Auto-discovery: found R1 '{fq1_path}' but missing R2 '{fq2_path}'"
)
samples[sample_name] = {"fq1": fq1_path, "fq2": fq2_path}
# Manual entries override auto-discovered ones
manual = cfg.get("samples", {})
if manual:
samples.update(manual)
if not samples:
raise ValueError(
"No samples found. Set 'data_dir' for auto-discovery "
"or list samples under 'samples:' in config.yaml"
)
return samples
SAMPLE_DICT = _discover_samples(config)
SAMPLES = list(SAMPLE_DICT.keys())
# ── Output root ──────────────────────────────────────────────
OUT_ROOT = config.get("output_dir", "results")
def out(sample, *parts):
"""Build output path: {output_dir}/{sample}_prism/data/{parts}"""
return os.path.join(OUT_ROOT, f"{sample}_prism", "data", *parts)
def out_final(sample, *parts):
"""Build final output path: {output_dir}/{sample}_prism/{parts}"""
return os.path.join(OUT_ROOT, f"{sample}_prism", *parts)
# ── Paired-end flag ──────────────────────────────────────────
PAIRED = config.get("paired", True)
# ── Include rule files ───────────────────────────────────────
include: "workflow/rules/step01_kraken.smk"
include: "workflow/rules/step02_minimap.smk"
include: "workflow/rules/step03_star.smk"
include: "workflow/rules/step04_subsample.smk"
include: "workflow/rules/step04a_customdb.smk"
include: "workflow/rules/step05_blast_sub.smk"
include: "workflow/rules/step06_multimapping.smk"
include: "workflow/rules/step07_blast_full.smk"
include: "workflow/rules/step08_filter.smk"
include: "workflow/rules/step09_genbank.smk"
include: "workflow/rules/step10_profile.smk"
include: "workflow/rules/step11_output.smk"
# ── Target rule ──────────────────────────────────────────────
rule all:
input:
expand(out_final("{sample}", "{sample}-results.csv"), sample=SAMPLES),
expand(out_final("{sample}", "{sample}-counts.csv"), sample=SAMPLES),
expand(out_final("{sample}", "{sample}_1.fa"), sample=SAMPLES),
expand(out("{sample}", "customdb_cleaned.flag"), sample=SAMPLES),