Skip to content

Commit 904e96c

Browse files
authored
Merge pull request #151 from NCI-RBL/dev
complete feature branch merge
2 parents b89dae8 + 4fb9392 commit 904e96c

7 files changed

Lines changed: 164 additions & 97 deletions

File tree

.tests/cluster_config.yaml

Lines changed: 87 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -2,125 +2,137 @@
22
__default__:
33
gres: lscratch:96
44
mem: 40g
5-
partition: norm
6-
time: 00-02:00:00
5+
partition: ccr,norm
6+
time: 00-08:00:00
77
threads: 32
88
output: .%j.{wildcards}.out
99
error: .%j.{wildcards}.err
1010

1111
qc_barcode:
12-
threads: 3
13-
mem: 3g
12+
threads: 8
13+
mem: 75g
1414
time: 00-04:00:00
1515

1616
demultiplex:
17-
threads: 3
18-
mem: 3g
19-
time: 04-00:00:00
17+
threads: 56
18+
mem: 32g
19+
gres: lscratch:800
20+
time: 00-05:00:00
2021

21-
remove_adaptors:
22-
threads: 3
23-
time: 1-00:00:00
24-
mem: 3g
25-
26-
qc_fastq_pre:
27-
threads: 3
28-
mem: 3g
29-
time: 00-03:00:00
22+
nondemux:
23+
time: 00-01:00:00
3024

31-
qc_fastq_post:
32-
threads: 3
25+
qc_fastq:
26+
threads: 4
3327
mem: 3g
3428
time: 00-03:00:00
3529

3630
qc_screen_validator:
37-
mem: 15g
31+
mem: 32g
3832
time: 00-03:00:00
3933

40-
split_files:
41-
threads: 3
42-
mem: 3g
43-
time: 00-03:00:00
44-
45-
novoalign:
46-
mem: 50g
47-
time: 10-00:00:00
48-
49-
cleanup_conversion:
50-
threads: 5
51-
mem: 30g
52-
time: 00-3:00:00
53-
54-
merge_unmapped_splits:
55-
time: 01-00:00:00
56-
mem: 75g
57-
58-
create_bam_mm_unique:
59-
threads: 6
60-
gres: lscratch:256
61-
mem: 30g
34+
star:
6235
time: 04-00:00:00
63-
64-
merge_splits_unique_mm:
65-
mem: 512g
66-
time: 02-06:00:00
67-
partition: largemem
68-
69-
merge_mm_and_unique:
70-
threads: 2
71-
gres: lscratch:256
72-
mem: 5g
73-
time: 02-00:00:00
74-
75-
qc_alignment:
76-
mem: 10g
36+
gres: lscratch:800
37+
threads: 16
38+
mem: 120g
39+
40+
index_stats:
41+
threads: 8
42+
gres: lscratch:800
43+
mem: 200g
44+
time: 01-00:00:00
7745

7846
qc_troubleshoot:
79-
threads: 3
47+
threads: 4
8048
mem: 3g
8149

8250
dedup:
83-
threads: 2
84-
mem: 64g
51+
threads: 8
52+
mem: 200g
8553
gres: lscratch:256
86-
time: 01-00:00:00
54+
time: 02-00:00:00
8755

8856
create_beds_safs:
89-
mem: 350g
90-
gres: lscratch:256
57+
mem: 200g
58+
gres: lscratch:512
59+
threads: 8
60+
61+
bgzip_beds:
62+
mem: 100g
9163
threads: 4
92-
partition: largemem
64+
65+
feature_counts:
66+
threads: 8
67+
mem: 200g
9368

9469
project_annotations:
9570
threads: 2
9671
mem: 10g
9772
time: 00-01:00:00
9873

99-
peak_annotations:
100-
threads: 3
74+
peak_junctions:
75+
threads: 10
76+
gres: lscratch:128
77+
mem: 36g
78+
time: 04-00:00:00
79+
80+
peak_Transcripts:
81+
threads: 4
82+
gres: lscratch:128
83+
mem: 30g
84+
time: 04-00:00:00
85+
86+
peak_ExonIntron:
87+
threads: 4
10188
gres: lscratch:128
10289
mem: 30g
103-
time: 00-12:00:00
90+
time: 04-00:00:00
10491

92+
peak_RMSK:
93+
threads: 4
94+
gres: lscratch:128
95+
mem: 30g
96+
time: 04-00:00:00
97+
10598
annotation_report:
106-
mem: 10g
99+
threads: 4
100+
gres: lscratch:128
101+
mem: 30g
102+
time: 00-12:00:00
103+
104+
MANORM_beds:
105+
threads: 4
106+
mem: 30g
107107

108108
MANORM_analysis:
109109
threads: 4
110110
mem: 30g
111+
time: 04-00:00:00
111112

112113
MANORM_post_processing:
113114
threads: 2
114-
mem: 2g
115-
time: 00-01:00:00
115+
mem: 30g
116+
time: 00-12:00:00
116117

117118
MANORM_RMD:
118119
threads: 2
119-
mem: 3g
120-
time: 00-01:00:00
120+
mem: 30g
121+
time: 00-02:00:00
121122

122-
mapq_recalc:
123-
mem: 1TB
124-
gres: lscratch:256
125-
partition: largemem
126-
time: 00-06:00:00
123+
DIFFBIND_beds:
124+
threads: 4
125+
mem: 30g
126+
127+
DIFFBIND_preprocess:
128+
threads: 4
129+
mem: 30g
130+
131+
DIFFBIND_analysis:
132+
threads: 4
133+
mem: 30g
134+
135+
136+
DIFFBIND_report:
137+
threads: 4
138+
mem: 30g

.tests/multiplex_hg38_full.tsv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
file_name multiplex
2-
test_6.fastq.gz test_6
1+
file_name,multiplex
2+
test_6.fastq.gz,test_6

.tests/sample_hg38_full.tsv

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
multiplex sample group barcode adaptor
2-
test_6 Ro_Clip CLIP NNNNNCACTGTNNNN AGATCGGAAGAGCGTCGTG
3-
test_6 Control_Clip CNTRL NNNNNATTGGCNNNN AGATCGGAAGAGCGTCGTG
1+
multiplex,sample,group,barcode,adaptor
2+
test_6,Ro_Clip,CLIP,NNNNNCACTGTNNNN,AGATCGGAAGAGCGTCGTG
3+
test_6,Control_Clip,CNTRL,NNNNNATTGGCNNNN,AGATCGGAAGAGCGTCGTG

.tests/snakemake_config.yaml

Lines changed: 66 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,102 @@
1+
#########################################################################################
12
# Global configuration file for the pipeline
3+
#########################################################################################
4+
5+
#########################################################################################
6+
#Folders and Paths
7+
#########################################################################################
28
#path to snakemake file
39
sourceDir: ""
4-
510
#path to output directory
611
outputDir: "hg38_full/"
7-
12+
#path to fastq files
13+
fastqDir: ".tests/"
814
#path to manifest files
915
sampleManifest: ".tests/sample_hg38_full.tsv"
1016
multiplexManifest: ".tests/multiplex_hg38_full.tsv"
1117
contrastManifest: ".test/contrasts_example.tsv"
1218

13-
#path to fastq files
14-
fastqDir: ".tests/"
15-
19+
########################################################################################
1620
#user parameters
17-
filterlength: 20 #minimum read length to include in analysis [any int >20]
21+
#########################################################################################
1822
multiplexflag: "Y" #flag that samples are multiplexed ["Y","N"]
23+
umiSeparator: "rbc:" #required for nondemultiplexed samples to determine delimiter for deduplication [":", "_", "rbc:"]
1924
mismatch: 1 #number of bp mismatches allowed in demultiplexing [1,2,3]
25+
barcode_qc_flag: "PROCESS" #barcodes will undergo QC to ensure uniformity within samples; ["PROCESS", "IGNORE"]
26+
min_reads_mapped: 0.5 #minimum percent of reads that should be mapped; IE .5 for 50% of all reads must be mapped [0.5]
2027
reference: "hg38" #reference organism ["mm10", "hg38"]
21-
spliceaware: "N" #whether to run splice_aware part of the pipeline ['y', 'n']
28+
filterlength: 20 #minimum read length to include in analysis [any int >20]
29+
phredQuality: 20 #minimum quality score for 3’ end trimming
2230
includerRNA: "N" #include refseq rRNA's in annotations ["Y", "N"]
23-
spliceBPlength: 75 #length of splice index to use [50, 75, 150]
2431
splicejunction: "N" #include splice junctions in peak calls: "manorm"
25-
condenseexon: "N" #whether to collapse exons
32+
AnnoAnchor: "max_total" #whether annotations for spliced peaks will be based on either 5' most region or region with max reads ["max","5prime"]
2633
mincount: 3 #minimum number of matches to count as a peak [1,2,3]
2734
ntmerge: 50 #minimum distance of nucleotides to merge peaks [10,20,30,40,50,60]
2835
peakid: "ALL" #report peaks for unique peaks only or unique and fractional mm ["unique","all"]
2936
DEmethod: "none" #choose DE method ["manorm","none"]
37+
MANormWidth: 50 #Width of window to calculate read density. [any integer >1; default 50]
38+
MNormDistance: 25 #Summit-to-summit distance cutoff for common peaks. [ any integer >1; default MANormWidth/2]
3039
sampleoverlap: 1 #if DEmethod DIFFBIND, minimum number of samples a peak must be found in to be counted [>1]
3140
pval: 0.005 #if DEmethod, pval cutoff for significance
3241
fc: 1 #if DEmethod, fold change cut off for significance
42+
single_qc_threshold: 95 #maximum threshold for unmampped reads in any single sample
43+
project_qc_threshold: 50 #maximum threshold for unmapped reads across average of all project samples
44+
45+
#########################################################################################
46+
# STAR parameters
47+
#########################################################################################
48+
alignEndsType: "Local" #type of read ends alignment ["Local", "EndToEnd", "Extend5pOfRead1", "Extend5pOfReads12"]
49+
alignIntronMax: 50000 #maximum intron length
50+
alignSJDBoverhangMin: 3 # minimum overhang value for annotated spliced junctions
51+
alignSJoverhangMin: 5 # minimum overhang value for non-cannonical splied junctions
52+
alignTranscriptsPerReadNmax: 10000 #max number of different alignments per read to consider [int>0]
53+
alignWindowsPerReadNmax: 10000 #max number of windows per read [int>0]
54+
limitOutSJcollapsed: 1000000 # max number of collapsed junctions [int>0]
55+
outFilterMatchNmin: 15 # alignment will be output only if the number of matched bases is higher than or equal to this value.
56+
outFilterMatchNminOverLread: 0.9 #alignment will be output only if the number of matched bases is >= to value; normalized to sum of mates’ lengths for paired-end reads
57+
outFilterMismatchNmax: 999 #alignment will be output only if it has no more mismatches than this value.
58+
outFilterMismatchNoverReadLmax: 0.04 #alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.
59+
outFilterMultimapNmax: 10000 #max number of multiple alignments allowed for a read: if exceeded, the read is considered unmapped
60+
outFilterMultimapScoreRange: 0 #the score range below the maximum score for multimapping alignments
61+
outFilterScoreMin: 0 #alignment will be output only if its score is higher than or equal to this value.
62+
outFilterType: "Normal" #type of filtering ["Normal", "BySJout"]
63+
outSAMattributes: "All" #a string of desired SAM attributes, in the order desired for the output SAM
64+
outSAMunmapped: "None" #output of unmapped reads in the SAM format ["None", "Within"]
65+
outSJfilterCountTotalMin: "3,1,1,1" #minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif
66+
outSJfilterOverhangMin: "30,12,12,12" #minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif
67+
outSJfilterReads: "All" #which reads to consider for collapsed splice junctions output ["All", "Unique"]
68+
seedMultimapNmax: 10000 #only pieces that map fewer than this value are utilized in the stitching procedure [int>0]
69+
seedNoneLociPerWindow: 20 #max number of one seed loci per window [int>0]
70+
seedPerReadNmax: 10000 #max number of seeds per read
71+
seedPerWindowNmax: 500 #max number of seeds per window
72+
sjdbScore: 2 #extra alignment score for alignmets that cross database junctions
73+
winAnchorMultimapNmax: 500 #max number of loci anchors are allowed to map to
74+
3375

76+
#########################################################################################
77+
# modules, container parameters
78+
#########################################################################################
3479
#modules, container parameters
3580
containerDir: "/data/CCBR_Pipeliner/iCLIP/container"
3681
fastq_val: "/data/CCBR_Pipeliner/db/PipeDB/bin/fastQValidator"
82+
3783
bedtools: "bedtools/2.29.2"
3884
bowtie2: "bowtie/2-2.3.4"
3985
fastq_screen: "fastq_screen/0.14.0"
4086
fastqc: "fastqc/0.11.9"
41-
java: "java/12.0.1"
4287
manorm: "manorm/1.1.4"
4388
multiqc: "multiqc/1.9"
44-
novocraft: "novocraft/4.03.01"
4589
perl: "perl/5.24.3"
46-
python: "python/3.7"
47-
Qt: "Qt/5.13.2"
48-
singularity: "singularity"
90+
python: "python/3.8"
91+
R: "R/4.0"
4992
samtools: "samtools/1.11"
50-
umitools: "umitools/1.1.1"
93+
star: "STAR/2.7.8a"
5194
subread: "subread/2.0.1"
52-
R: "R/4.0"
95+
ultraplex: "ultraplex/1.2.5"
96+
umitools: "umitools/1.1.1"
97+
98+
#########################################################################################
99+
# dev
100+
#########################################################################################
101+
#testing parameter
102+
testing_option: "N"

config/snakemake_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ seedPerReadNmax: 10000 #max number of seeds per read
7171
seedPerWindowNmax: 500 #max number of seeds per window
7272
sjdbScore: 2 #extra alignment score for alignmets that cross database junctions
7373
winAnchorMultimapNmax: 500 #max number of loci anchors are allowed to map to
74+
quantmod: 'TranscriptomeSAM' #additionnal alignment on transcriptome
7475

7576
#########################################################################################
7677
# modules, container parameters

workflow/Snakefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ star_seed_read = config['seedPerReadNmax']
8989
star_seed_wind = config['seedPerWindowNmax']
9090
star_sj = config['sjdbScore']
9191
star_win_anchor = config['winAnchorMultimapNmax']
92+
star_quantmod = config['quantmod']
9293

9394
# modules, container
9495
cont_dir = config['containerDir']
@@ -771,6 +772,7 @@ rule star:
771772
s_wind = star_seed_wind,
772773
s_sj = star_sj,
773774
s_anchor = star_win_anchor,
775+
s_quantmod = star_quantmod,
774776
out_prefix = '{sp}_'
775777
envmodules:
776778
config['star'],
@@ -824,7 +826,8 @@ rule star:
824826
--seedPerReadNmax {params.s_read} \
825827
--seedPerWindowNmax {params.s_wind} \
826828
--sjdbScore {params.s_sj} \
827-
--winAnchorMultimapNmax {params.s_anchor}
829+
--winAnchorMultimapNmax {params.s_anchor} \
830+
--quantMode {params.s_quantmod}
828831
829832
# sort file
830833
samtools sort -m 80G -T $tmp_dir $tmp_dir/{params.out_prefix}Aligned.out.bam -o $tmp_dir/{params.out_prefix}Aligned.sortedByCoord.out.bam

workflow/scripts/02_barcode_qc.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ barcode_input = args$barcode_input
2929
output_dir = args$output_dir
3030
mismatch = as.integer(args$mismatch)
3131
mpid = args$mpid
32+
qc_dir = args$qc_dir
3233

3334
#test input
3435
testing="N"

0 commit comments

Comments
 (0)