Skip to content

Commit 0464d64

Browse files
committed
A couple of changes related to filetypes
1 parent 625be66 commit 0464d64

2 files changed

Lines changed: 13 additions & 11 deletions

File tree

neat/bacterial_wrapper/runner.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,16 @@ def bacterial_wrapper(reference_file, bacteria_name, ref_config_file, output_dir
125125

126126
# Stitching all outputs together - Keshav's script
127127

128-
def concat_fq(input_files: List[Path], dest: BgzfWriter) -> None:
128+
def concat_fq(input_files: List[Path], dest: Path) -> None:
129129

130130
if not input_files:
131+
# Nothing to do, and no error to throw
131132
return
132-
133-
for input_file in input_files:
134-
with bgzf.BgzfReader(input_file) as in_f:
135-
shutil.copyfileobj(in_f, dest)
133+
134+
with gzip.open(dest, 'wt') as out_f:
135+
for input_file in input_files:
136+
with gzip.open(input_file, 'rt') as in_f:
137+
shutil.copyfileobj(in_f, out_f)
136138

137139
def merge_bam(bams: List[Path], dest: Path, threads: int) -> None:
138140

@@ -172,19 +174,19 @@ def stitch_all_outputs(files: List[Path], output_dir) -> None:
172174
fq2_list.append(file)
173175
elif "r1.fastq" in file_name or ".fastq" in suffixes:
174176
fq1_list.append(file)
175-
elif ".vcf" in suffixes:
177+
elif ".vcf" in suffixes and ".tbi" not in suffixes:
176178
vcf_list.append(file)
177-
elif ".bam" in suffixes:
179+
elif ".bam" in suffixes and ".bai" not in suffixes:
178180
bam_list.append(file)
179181

180-
dest_fq1 = bgzf.BgzfWriter(f"{output_dir}/stitched_fq1.bgzf")
182+
dest_fq1 = Path(f"{output_dir}/stitched_fq1.gz")
181183
dest_bam = Path(f"{output_dir}/stitched.bam")
182184
dest_vcf = Path(f"{output_dir}/stitched.vcf")
183185

184186
concat_fq(fq1_list, dest_fq1)
185187

186188
if (fq2_list):
187-
dest_fq2 = bgzf.BgzfWriter(f"{output_dir}/stitched_fq2.bgzf")
189+
dest_fq2 = Path(f"{output_dir}/stitched_fq2.gz")
188190
concat_fq(fq2_list, dest_fq2)
189191

190192
merge_bam(bam_list, dest_bam, 2)

neat/read_simulator/utils/stitch_outputs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
def concat(files_to_join: List[Path], dest_file: BgzfWriter) -> None:
2121
if not files_to_join:
2222
# Nothing to do, and no error to throw
23-
_LOG.warn(f"Concat called but there are no files to join: {files_to_join}" )
23+
_LOG.warning(f"Concat called but there are no files to join: {files_to_join}" )
2424
return
2525

2626
for f in files_to_join:
@@ -38,7 +38,7 @@ def merge_vcfs(vcfs: List[Path], ofw: OutputFileWriter) -> None:
3838
def merge_bam(bam_files: List[Path], ofw: OutputFileWriter, threads: int):
3939
merged_file = ofw.tmp_dir / "temp_merged.bam"
4040
intermediate_files = []
41-
# Note 1000 is abritrary. May need to be a user parameter/adjustable/a function
41+
# Note 1000 is arbitrary. May need to be a user parameter/adjustable/a function
4242
for i in range(0, len(bam_files), 500):
4343
temp_file = str(ofw.tmp_dir / f"temp_merged_{i}.bam")
4444
pysam.merge("--no-PG", "-f", temp_file, *map(str, bam_files[i:i+500]))

0 commit comments

Comments
 (0)