-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.sh
More file actions
executable file
·173 lines (132 loc) · 5.82 KB
/
run.sh
File metadata and controls
executable file
·173 lines (132 loc) · 5.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/bin/bash
# This script processes two FASTQ files and execute a transcriptome analysis pipeline.
# Usage: ./run.sh <sample_name> <forward_fastq> <reverse_fastq>
if [ "$#" -ne 3 ]; then
echo "Usage: $0 <sample_name> <forward_fastq> <reverse_fastq>"
exit 1
fi
ROOT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
export ROOT_PATH
# Assign arguments to variables
SAMPLE_NAME=$1
FORWARD_FASTQ=$2
REVERSE_FASTQ=$3
SETUP_FILE=$4
# Check if the config.sh file exists
if [ -f "${SETUP_FILE}" ]; then
echo "Loading configuration from ${SETUP_FILE} for sample: ${SAMPLE_NAME}"
source "${SETUP_FILE}"
# Execute the ./src/setup.sh script
SETUP_CMD="setup_analysis_structure ${SAMPLE_NAME} ${FORWARD_FASTQ} ${REVERSE_FASTQ}"
# echo "Executing setup command: ${SETUP_CMD}"
eval "${SETUP_CMD}" || { echo "Failed to execute setup script"; exit 1; }
else
echo "Error: ./src/setup.sh not found at: ${SETUP_FILE}"
exit 1
fi
# Load config_docker.sh file for Docker paths and variables
source ./src/setup_docker.sh
source ${PATHS["SRC"]}/steps.sh
# Print the start time
echo "Starting pipeline for sample: ${SAMPLE_NAME} at $(date)"
# Setup the paths from the PATHS associative array
SAMPLE_DIR=${PATHS["SAMPLE_DIR"]}
R1=${PATHS["RAW_R1"]}
R2=${PATHS["RAW_R2"]}
QC_FASTQC_RAW=${PATHS["QC_FASTQC_RAW"]}
# Check if the output directory exists, if not create it
if [ ! -d "$QC_FASTQC_RAW" ]; then
echo "Output directory does not exist. Creating $QC_FASTQC_RAW..."
mkdir -p "$QC_FASTQC_RAW"
fi
# ==========================================================
# Step 1: Quality Control using FastQC and Trimmomatic
# ==========================================================
# Setup the paths for the raw FASTQ files and output directory
MAPPED_FORWARD_FASTQ=$(echo "$R1" | sed "s|${PATHS["BASE_DIR_INPUT"]}|/raw|")
MAPPED_REVERSE_FASTQ=$(echo "$R2" | sed "s|${PATHS["BASE_DIR_INPUT"]}|/raw|")
MAPPED_RAW_FASTQC_OUTPUT_DIR=$(echo "$QC_FASTQC_RAW" | sed "s|$SAMPLE_DIR|/data|")
# >>> RUN: FastQC on the raw FASTQ files
source ${PATHS["SRC"]}/fastq_quality.sh \
"${MAPPED_FORWARD_FASTQ}" \
"${MAPPED_REVERSE_FASTQ}" \
"${MAPPED_RAW_FASTQC_OUTPUT_DIR}" \
${STEPS["raw_qc"]}
# ==========================================================
# Setup the paths for the trimmed FASTQ files and output directory
TRIMMED_DIR=${PATHS["TRIMMED"]}
if [ ! -d "$TRIMMED_DIR" ]; then
echo "Output directory does not exist. Creating $TRIMMED_DIR..."
mkdir -p "$TRIMMED_DIR"
fi
MAPPED_TRIMMED_OUTPUT_DIR=$(echo "$TRIMMED_DIR" | sed "s|$SAMPLE_DIR|/data|")
# >>> RUN: Trimmomatic to trim the FASTQ files
source ${PATHS["SRC"]}/trimming.sh \
"${MAPPED_FORWARD_FASTQ}" \
"${MAPPED_REVERSE_FASTQ}" \
"${MAPPED_TRIMMED_OUTPUT_DIR}" \
${STEPS["trim"]}
# ==========================================================
# Setup the paths for the trimmed FASTQ files and FastQC output directory
QC_FASTQC_TRIMMED=${PATHS["QC_FASTQC_TRIMMED"]}
if [ ! -d "$QC_FASTQC_TRIMMED" ]; then
echo "Output directory does not exist. Creating $QC_FASTQC_TRIMMED..."
mkdir -p "$QC_FASTQC_TRIMMED"
fi
MAPPED_TRIMMED_FORWARD_FASTQ=$(echo "${MAPPED_TRIMMED_OUTPUT_DIR}/trimmed_forward.fastq.gz")
MAPPED_TRIMMED_REVERSE_FASTQ=$(echo "${MAPPED_TRIMMED_OUTPUT_DIR}/trimmed_reverse.fastq.gz")
MAPPED_TRIMMED_FASTQC_OUTPUT_DIR=$(echo "$QC_FASTQC_TRIMMED" | sed "s|$SAMPLE_DIR|/data|")
# >>> RUN: FastQC on the trimmed FASTQ files
source ${PATHS["SRC"]}/fastq_quality.sh \
"${MAPPED_TRIMMED_FORWARD_FASTQ}" \
"${MAPPED_TRIMMED_REVERSE_FASTQ}" \
"${MAPPED_TRIMMED_FASTQC_OUTPUT_DIR}" \
${STEPS["trim_qc"]}
# ==========================================================
# ==========================================================
# Step 2: De Novo Transcriptome Assembly
# ==========================================================
# Setup the paths for the assembly output directories
ASSEMBLY_DIR=${PATHS["ASSEMBLY"]}
TRINITY_ASSEMBLY_DIR=${PATHS["TRINITY_ASSEMBLY_DIR"]}
MAPPED_ASSEMBLY_OUTPUT_DIR=$(echo "$ASSEMBLY_DIR" | sed "s|$SAMPLE_DIR|/data|")
MAPPED_TRINITY_ASSEMBLY_DIR=$(echo "$TRINITY_ASSEMBLY_DIR" | sed "s|$SAMPLE_DIR|/data|")
## >>> RUN: Trinity assembly
source ${PATHS["SRC"]}/assembly.sh \
"${MAPPED_TRIMMED_FORWARD_FASTQ}" \
"${MAPPED_TRIMMED_REVERSE_FASTQ}" \
"${MAPPED_ASSEMBLY_OUTPUT_DIR}" \
"${MAPPED_TRINITY_ASSEMBLY_DIR}" \
${STEPS["assembly"]}
TRINITY_FASTA=${PATHS["TRINITY_ASSEMBLY_FASTA"]}
# ==========================================================
# ==========================================================
# Step 3: Assembly Quality Control
# ==========================================================
# Setup the paths for the BUSCO output directory
MAPPED_TRINITY_FASTA=$(echo "$TRINITY_FASTA" | sed "s|$SAMPLE_DIR|/data|")
ASSEMBLY_QC_BUSCO=${PATHS["ASSEMBLY_QC_BUSCO"]}
if [ ! -d "$ASSEMBLY_QC_BUSCO" ]; then
echo "Output directory does not exist. Creating $ASSEMBLY_QC_BUSCO..."
mkdir -p "$ASSEMBLY_QC_BUSCO"
fi
MAPPED_BUSCO_OUTPUT_DIR=$(echo "$ASSEMBLY_QC_BUSCO" | sed "s|$SAMPLE_DIR|.|")
BUSCO_PLOT=${PATHS["ASSEMBLY_QC_BUSCO_PLOT"]}
if [ ! -d "$BUSCO_PLOT" ]; then
echo "Output directory does not exist. Creating $BUSCO_PLOT..."
mkdir -p "$BUSCO_PLOT"
fi
MAPPED_BUSCO_PLOT=$(echo "$BUSCO_PLOT" | sed "s|$SAMPLE_DIR|.|")
## >>> RUN: BUSCO for assembly quality control
#source ${PATHS["SRC"]}/assembly_qc_busco.sh \
#"${MAPPED_TRINITY_FASTA}" \
#"${MAPPED_BUSCO_OUTPUT_DIR}" \
#"${MAPPED_BUSCO_PLOT}" \
#${STEPS["assembly_qc_busco"]}
# ==========================================================
# >>> RUN: Transrate for assembly quality control
source ${PATHS["SRC"]}/assembly_qc_transrate.sh ${STEPS["assembly_qc_transrate"]}
# ==========================================================
# >>> RUN: TransDecoder for coding region prediction
source ${PATHS["SRC"]}/coding_regions_transdecoder.sh ${STEPS["coding_region_prediction"]}
# ==========================================================