Skip to content

Commit ad585e7

Browse files
pinin4fjordsclaude
andauthored
feat(rpbp/extractorfprofiles): add module (#11959)
* feat(rpbp/extractorfprofiles): add module [skip ci] * ci: trigger tests * test(rpbp/extractorfprofiles): fix test-data path and assert output content Point inputs at params.modules_testdata_base_path (nf-core/test-datasets, modules branch) instead of a deleted personal-fork branch that 404s and fails CI on every profile. Switch the real test to snapshot(process.out).match(). The output is deterministic across cpu counts and byte-identical between the conda and container toolchains, so the content snapshot guards against regressions and matches the convention across the rpbp series. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(tests): add sanitizeOutput to test snapshots [skip ci] Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(tests): strip numeric snapshot keys stripped by sanitizeOutput [skip ci] Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(rpbp/extractorfprofiles): move descriptor into default prefix per nf-core convention Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 617c552 commit ad585e7

5 files changed

Lines changed: 287 additions & 0 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
channels:
2+
- conda-forge
3+
- bioconda
4+
dependencies:
5+
- bioconda::rpbp=4.0.1
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
process RPBP_EXTRACTORFPROFILES {
2+
tag "$meta.id"
3+
label 'process_medium'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
7+
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/14/146c3f15abf184a5ec13531d2a040ba7b9235c1091723aa37c7a119817411367/data' :
8+
'community.wave.seqera.io/library/rpbp:4.0.1--71297b462026e13b' }"
9+
10+
input:
11+
tuple val(meta), path(bam), path(bai), path(lengths_offsets)
12+
tuple val(meta2), path(orfs_genomic_bed)
13+
tuple val(meta3), path(exons_bed)
14+
15+
output:
16+
tuple val(meta), path("${prefix}.mtx.gz"), emit: profiles
17+
tuple val("${task.process}"), val('rpbp'), eval('python -c "import rpbp; print(rpbp.__version__)"'), emit: versions_rpbp, topic: versions
18+
19+
when:
20+
task.ext.when == null || task.ext.when
21+
22+
script:
23+
def args = task.ext.args ?: ''
24+
prefix = task.ext.prefix ?: "${meta.id}.profiles"
25+
"""
26+
LENGTHS=\$(tail -n +2 ${lengths_offsets} | cut -f1 | tr '\\n' ' ')
27+
OFFSETS=\$(tail -n +2 ${lengths_offsets} | cut -f2 | tr '\\n' ' ')
28+
29+
extract-orf-profiles \\
30+
${bam} \\
31+
${orfs_genomic_bed} \\
32+
${exons_bed} \\
33+
${prefix}.mtx.gz \\
34+
--lengths \$LENGTHS \\
35+
--offsets \$OFFSETS \\
36+
--num-cpus ${task.cpus} \\
37+
${args}
38+
"""
39+
40+
stub:
41+
prefix = task.ext.prefix ?: "${meta.id}.profiles"
42+
"""
43+
echo "" | gzip > ${prefix}.mtx.gz
44+
"""
45+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
name: "rpbp_extractorfprofiles"
2+
description: |
3+
Build a per-ORF P-site count vector for every candidate open reading
4+
frame (ORF) in the catalogue. For each ORF, walks the spliced exons
5+
in 3-nucleotide codon steps and counts the P-site positions
6+
(read 5'-end coordinate plus the length-specific offset selected
7+
upstream) that fall in each codon. Counts are summed across all read
8+
lengths that passed the periodicity filter from
9+
`rpbp/getperiodiclengthsoffsets`.
10+
11+
The resulting per-ORF vectors are the input to Bayesian translation
12+
scoring in `rpbp/estimateorfbayesfactors`: a translated ORF should
13+
show P-site density concentrated at codon-start positions, while a
14+
non-translated region should look flat or noisy. Emitted as a sparse
15+
matrix (one row per ORF, columns indexed by codon position).
16+
keywords:
17+
- rpbp
18+
- orf
19+
- psite
20+
- profile
21+
- riboseq
22+
tools:
23+
- "rpbp":
24+
description: "Rp-Bp - Bayesian inference of ribosome profiling data for identifying translated open reading frames"
25+
homepage: "https://github.com/dieterich-lab/rp-bp"
26+
documentation: "https://rp-bp.readthedocs.io"
27+
tool_dev_url: "https://github.com/dieterich-lab/rp-bp"
28+
doi: "10.1093/nar/gkw1350"
29+
licence:
30+
- "MIT"
31+
identifier: ""
32+
input:
33+
- - meta:
34+
type: map
35+
description: |
36+
Groovy Map containing sample information, e.g. `[ id:'sample1' ]`.
37+
- bam:
38+
type: file
39+
description: Sorted Ribo-seq BAM.
40+
pattern: "*.bam"
41+
ontologies: []
42+
- bai:
43+
type: file
44+
description: BAM index.
45+
pattern: "*.bai"
46+
ontologies: []
47+
- lengths_offsets:
48+
type: file
49+
description: Per-read-length offsets TSV from `rpbp/getperiodiclengthsoffsets`.
50+
pattern: "*.periodic_lengths_offsets.tsv"
51+
ontologies: []
52+
- - meta2:
53+
type: map
54+
description: |
55+
Groovy Map identifying the reference (e.g. `[ id:'reference' ]`).
56+
- orfs_genomic_bed:
57+
type: file
58+
description: Per-ORF genomic BED from `rpbp/preparegenome`.
59+
pattern: "*.orfs-genomic.annotated.bed.gz"
60+
ontologies: []
61+
- - meta3:
62+
type: map
63+
description: |
64+
Groovy Map identifying the reference (e.g. `[ id:'reference' ]`).
65+
- exons_bed:
66+
type: file
67+
description: Per-ORF exons BED from `rpbp/preparegenome`.
68+
pattern: "*.orfs-exons.annotated.bed.gz"
69+
ontologies: []
70+
output:
71+
profiles:
72+
- - meta:
73+
type: map
74+
description: Groovy Map inherited from input meta.
75+
- "${prefix}.mtx.gz":
76+
type: file
77+
description: Per-ORF P-site profile sparse matrix.
78+
pattern: "*.mtx.gz"
79+
ontologies: []
80+
versions_rpbp:
81+
- - ${task.process}:
82+
type: string
83+
description: The name of the process
84+
- rpbp:
85+
type: string
86+
description: The name of the tool
87+
- python -c "import rpbp; print(rpbp.__version__)":
88+
type: eval
89+
description: The expression to obtain the version of the tool
90+
topics:
91+
versions:
92+
- - ${task.process}:
93+
type: string
94+
description: The name of the process
95+
- rpbp:
96+
type: string
97+
description: The name of the tool
98+
- python -c "import rpbp; print(rpbp.__version__)":
99+
type: eval
100+
description: The expression to obtain the version of the tool
101+
authors:
102+
- "@pinin4fjords"
103+
maintainers:
104+
- "@pinin4fjords"
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
nextflow_process {
2+
3+
name "Test Process RPBP_EXTRACTORFPROFILES"
4+
script "../main.nf"
5+
process "RPBP_EXTRACTORFPROFILES"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "rpbp"
10+
tag "rpbp/extractorfprofiles"
11+
12+
test("homo_sapiens chr20 - extract orf profiles") {
13+
14+
when {
15+
process {
16+
"""
17+
input[0] = Channel.of([
18+
[ id:'test', single_end:true, strandedness:'forward' ],
19+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true),
20+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true),
21+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/rpbp/SRX11780888_chr20.periodic_lengths_offsets.tsv", checkIfExists: true)
22+
])
23+
input[1] = [
24+
[ id:'reference' ],
25+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/rpbp/reference.orfs-genomic.annotated.bed.gz", checkIfExists: true)
26+
]
27+
input[2] = [
28+
[ id:'reference' ],
29+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/rpbp/reference.orfs-exons.annotated.bed.gz", checkIfExists: true)
30+
]
31+
"""
32+
}
33+
}
34+
35+
then {
36+
assertAll(
37+
{ assert process.success },
38+
{ assert snapshot(sanitizeOutput(process.out)).match() }
39+
)
40+
}
41+
}
42+
43+
test("homo_sapiens chr20 - extract orf profiles - stub") {
44+
45+
options '-stub'
46+
47+
when {
48+
process {
49+
"""
50+
input[0] = Channel.of([
51+
[ id:'test', single_end:true, strandedness:'forward' ],
52+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true),
53+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true),
54+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/rpbp/SRX11780888_chr20.periodic_lengths_offsets.tsv", checkIfExists: true)
55+
])
56+
input[1] = [
57+
[ id:'reference' ],
58+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/rpbp/reference.orfs-genomic.annotated.bed.gz", checkIfExists: true)
59+
]
60+
input[2] = [
61+
[ id:'reference' ],
62+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/rpbp/reference.orfs-exons.annotated.bed.gz", checkIfExists: true)
63+
]
64+
"""
65+
}
66+
}
67+
68+
then {
69+
assertAll(
70+
{ assert process.success },
71+
{ assert snapshot(sanitizeOutput(process.out)).match() }
72+
)
73+
}
74+
}
75+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"homo_sapiens chr20 - extract orf profiles - stub": {
3+
"content": [
4+
{
5+
"profiles": [
6+
[
7+
{
8+
"id": "test",
9+
"single_end": true,
10+
"strandedness": "forward"
11+
},
12+
"test.profiles.mtx.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
13+
]
14+
],
15+
"versions_rpbp": [
16+
[
17+
"RPBP_EXTRACTORFPROFILES",
18+
"rpbp",
19+
"4.0.1"
20+
]
21+
]
22+
}
23+
],
24+
"timestamp": "2026-06-10T16:44:43.898520726",
25+
"meta": {
26+
"nf-test": "0.9.5",
27+
"nextflow": "26.04.3"
28+
}
29+
},
30+
"homo_sapiens chr20 - extract orf profiles": {
31+
"content": [
32+
{
33+
"profiles": [
34+
[
35+
{
36+
"id": "test",
37+
"single_end": true,
38+
"strandedness": "forward"
39+
},
40+
"test.profiles.mtx.gz:md5,7e1ca4eecd50b189f5f4fc362a896d8b"
41+
]
42+
],
43+
"versions_rpbp": [
44+
[
45+
"RPBP_EXTRACTORFPROFILES",
46+
"rpbp",
47+
"4.0.1"
48+
]
49+
]
50+
}
51+
],
52+
"timestamp": "2026-06-10T16:44:38.531400548",
53+
"meta": {
54+
"nf-test": "0.9.5",
55+
"nextflow": "26.04.3"
56+
}
57+
}
58+
}

0 commit comments

Comments
 (0)