Skip to content

Commit ba0baf3

Browse files
joshfactorialclaude
andcommitted
Update tests to match v4.4.3 API changes; CI was failing on develop
Three sets of tests were exercising contracts that the v4.4.3 work changed: 1. tests/test_read_simulator/test_generate_reads.py (9 tests) generate_reads no longer returns a list of (Read, Read|None) pairs — it streams Read objects directly to the output_file_writer as part of the bounded-memory rework. Added a small `_CollectingOFW` stand-in that captures write_bam_record calls; the affected tests now enable produce_bam=True, pass a CollectingOFW, and assert on ofw.bam_records. For paired-end tests, the pair-structure assertions were replaced with forward/reverse-count assertions since pairs are no longer kept together in memory (mates are interleaved by position via a heap buffer). 2. tests/test_read_simulator/test_options.py (1 test) test_default_values expected parallel_block_size == 500000. The v4.4.3 default is 0 — a sentinel that triggers auto-tuning from genome length and thread count in runner.py. Test updated to assert the sentinel. 3. tests/test_read_simulator/test_stitch_outputs.py (11 tests) - concat() now takes a destination Path (not a gzip handle) and writes raw bytes — concatenated gzip streams are valid gzip per the spec. The concat tests now pass a path and read it back via gzip.open. - merge_bam now calls pysam.cat (not pysam.merge + pysam.sort). The four old merge/sort/temp-file/chunk-batching tests were replaced with three cat-based equivalents (call_count, output path, full-input ordering). - The main() tests that asserted against StringIO buffers were updated to read back from the actual destination paths, since byte-level concat writes through the path, not through the handle. All 599 tests pass locally (`pytest tests/`); should unblock the GitHub Actions failures on develop. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent a1ebd4d commit ba0baf3

3 files changed

Lines changed: 139 additions & 92 deletions

File tree

tests/test_read_simulator/test_generate_reads.py

Lines changed: 89 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -321,13 +321,37 @@ def _make_options(paired=False, seed=0):
321321
opts.read_len = _READ_LEN
322322
opts.paired_ended = paired
323323
opts.coverage = 5
324+
# Enable BAM so generate_reads streams Read objects to the collecting OFW for
325+
# inspection. FASTQ stays off — we don't need its bytes for these tests.
324326
opts.produce_fastq = False
325-
opts.produce_bam = False
327+
opts.produce_bam = True
326328
opts.produce_vcf = False
327329
opts.overwrite_output = True
328330
return opts
329331

330332

333+
class _CollectingOFW:
334+
"""
335+
Minimal OutputFileWriter stand-in for generate_reads tests.
336+
337+
generate_reads now streams Read objects to its output_file_writer instead of
338+
returning a list. Tests that want to inspect the generated reads enable
339+
produce_bam=True and pass an instance of this class as the ofw. Every
340+
``write_bam_record`` call is captured into ``bam_records`` for assertion.
341+
"""
342+
def __init__(self):
343+
# generate_reads dereferences ofw.bam and ofw.files_to_write[ofw.bam].
344+
# Provide a stub handle so those lookups succeed; we don't use the handle.
345+
self.bam = "_collecting_bam"
346+
self.fq1 = None
347+
self.fq2 = None
348+
self.files_to_write = {self.bam: SimpleNamespace(write=lambda *a, **kw: None)}
349+
self.bam_records = []
350+
351+
def write_bam_record(self, read, contig_id, bam_handle, read_length):
352+
self.bam_records.append(read)
353+
354+
331355
def _make_models(read_len=_READ_LEN, frag_mean=300):
332356
error_model = SequencingErrorModel(read_length=read_len)
333357
qual_model = TraditionalQualityModel()
@@ -407,46 +431,55 @@ def test_generate_reads_single_ended_returns_read_none_pairs():
407431
err, qual, frag = _make_models()
408432
opts = _make_options(paired=False)
409433
cv = ContigVariants()
434+
ofw = _CollectingOFW()
410435

411-
results = generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
412-
_all_span_targeted(), _nothing_discarded(),
413-
opts, None, "chr1", 0, 0)
436+
generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
437+
_all_span_targeted(), _nothing_discarded(),
438+
opts, ofw, "chr1", 0, 0)
414439

415-
assert isinstance(results, list)
416-
assert len(results) > 0
417-
for read1, read2 in results:
418-
assert isinstance(read1, Read)
419-
assert read2 is None
440+
# In SE mode only read1 records are emitted — all forward strand.
441+
assert len(ofw.bam_records) > 0
442+
for read in ofw.bam_records:
443+
assert isinstance(read, Read)
444+
assert read.is_reverse is False
420445

421446

422447
def test_generate_reads_paired_ended_returns_read_read_pairs():
423448
ref = _make_reference()
424449
err, qual, frag = _make_models()
425450
opts = _make_options(paired=True)
426451
cv = ContigVariants()
452+
ofw = _CollectingOFW()
427453

428-
results = generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
429-
_all_span_targeted(), _nothing_discarded(),
430-
opts, None, "chr1", 0, 0)
454+
generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
455+
_all_span_targeted(), _nothing_discarded(),
456+
opts, ofw, "chr1", 0, 0)
431457

432-
assert len(results) > 0
433-
for read1, read2 in results:
434-
assert isinstance(read1, Read)
435-
assert isinstance(read2, Read)
458+
# PE mode emits both forward (r1) and reverse (r2) records, interleaved by
459+
# position via the heap-buffer in generate_reads. Verify both strands appear.
460+
assert len(ofw.bam_records) > 0
461+
forwards = [r for r in ofw.bam_records if not r.is_reverse]
462+
reverses = [r for r in ofw.bam_records if r.is_reverse]
463+
assert len(forwards) > 0
464+
assert len(reverses) > 0
465+
# Each fragment contributes one r1 and one r2.
466+
assert len(forwards) == len(reverses)
436467

437468

438469
def test_generate_reads_read_length_matches_options():
439470
ref = _make_reference()
440471
err, qual, frag = _make_models()
441472
opts = _make_options(paired=False)
442473
cv = ContigVariants()
474+
ofw = _CollectingOFW()
443475

444-
results = generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
445-
_all_span_targeted(), _nothing_discarded(),
446-
opts, None, "chr1", 0, 0)
476+
generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
477+
_all_span_targeted(), _nothing_discarded(),
478+
opts, ofw, "chr1", 0, 0)
447479

448-
for read1, _ in results:
449-
assert len(read1.read_sequence) == _READ_LEN
480+
assert len(ofw.bam_records) > 0
481+
for read in ofw.bam_records:
482+
assert len(read.read_sequence) == _READ_LEN
450483

451484

452485
# ---------------------------------------------------------------------------
@@ -459,13 +492,14 @@ def test_generate_reads_targeted_region_flag_false_filters_all():
459492
err, qual, frag = _make_models()
460493
opts = _make_options(paired=False)
461494
cv = ContigVariants()
495+
ofw = _CollectingOFW()
462496
no_target = [(0, _SPAN, False)]
463497

464-
results = generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
465-
no_target, _nothing_discarded(),
466-
opts, None, "chr1", 0, 0)
498+
generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
499+
no_target, _nothing_discarded(),
500+
opts, ofw, "chr1", 0, 0)
467501

468-
assert results == []
502+
assert ofw.bam_records == []
469503

470504

471505
def test_generate_reads_discard_region_removes_all():
@@ -474,13 +508,14 @@ def test_generate_reads_discard_region_removes_all():
474508
err, qual, frag = _make_models()
475509
opts = _make_options(paired=False)
476510
cv = ContigVariants()
511+
ofw = _CollectingOFW()
477512
discard_all = [(0, _SPAN, True)]
478513

479-
results = generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
480-
_all_span_targeted(), discard_all,
481-
opts, None, "chr1", 0, 0)
514+
generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
515+
_all_span_targeted(), discard_all,
516+
opts, ofw, "chr1", 0, 0)
482517

483-
assert results == []
518+
assert ofw.bam_records == []
484519

485520

486521
def test_generate_reads_discard_flag_false_keeps_reads():
@@ -489,12 +524,13 @@ def test_generate_reads_discard_flag_false_keeps_reads():
489524
err, qual, frag = _make_models()
490525
opts = _make_options(paired=False)
491526
cv = ContigVariants()
527+
ofw = _CollectingOFW()
492528

493-
results = generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
494-
_all_span_targeted(), _nothing_discarded(),
495-
opts, None, "chr1", 0, 0)
529+
generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
530+
_all_span_targeted(), _nothing_discarded(),
531+
opts, ofw, "chr1", 0, 0)
496532

497-
assert len(results) > 0
533+
assert len(ofw.bam_records) > 0
498534

499535

500536
# ---------------------------------------------------------------------------
@@ -506,6 +542,7 @@ def test_generate_reads_variants_populated_on_reads():
506542
ref = _make_reference()
507543
err, qual, frag = _make_models()
508544
opts = _make_options(paired=False)
545+
ofw = _CollectingOFW()
509546

510547
cv = ContigVariants()
511548
snv = SingleNucleotideVariant(
@@ -516,11 +553,11 @@ def test_generate_reads_variants_populated_on_reads():
516553
)
517554
cv.add_variant(snv)
518555

519-
results = generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
520-
_all_span_targeted(), _nothing_discarded(),
521-
opts, None, "chr1", 0, 0)
556+
generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
557+
_all_span_targeted(), _nothing_discarded(),
558+
opts, ofw, "chr1", 0, 0)
522559

523-
reads_with_mutations = [r1 for r1, _ in results if r1.mutations]
560+
reads_with_mutations = [r for r in ofw.bam_records if r.mutations]
524561
assert len(reads_with_mutations) > 0
525562

526563

@@ -540,30 +577,33 @@ def test_generate_reads_paired_discard_region_removes_all():
540577
err, qual, frag = _make_models()
541578
opts = _make_options(paired=True)
542579
cv = ContigVariants()
580+
ofw = _CollectingOFW()
543581
discard_all = [(0, _SPAN, True)]
544582

545-
results = generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
546-
_all_span_targeted(), discard_all,
547-
opts, None, "chr1", 0, 0)
583+
generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
584+
_all_span_targeted(), discard_all,
585+
opts, ofw, "chr1", 0, 0)
548586

549-
assert results == []
587+
assert ofw.bam_records == []
550588

551589

552590
def test_generate_reads_paired_no_discard_produces_read_pairs():
553-
"""Paired-end run without discard produces (Read, Read) pairs (regression guard)."""
591+
"""Paired-end run without discard produces both forward and reverse read records."""
554592
ref = _make_reference()
555593
err, qual, frag = _make_models()
556594
opts = _make_options(paired=True)
557595
cv = ContigVariants()
596+
ofw = _CollectingOFW()
558597

559-
results = generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
560-
_all_span_targeted(), _nothing_discarded(),
561-
opts, None, "chr1", 0, 0)
598+
generate_reads(0, ref, err, 3, qual, frag, get_uniform_gc_model(), cv,
599+
_all_span_targeted(), _nothing_discarded(),
600+
opts, ofw, "chr1", 0, 0)
562601

563-
assert len(results) > 0
564-
for read1, read2 in results:
565-
assert isinstance(read1, Read)
566-
assert isinstance(read2, Read)
602+
forwards = [r for r in ofw.bam_records if not r.is_reverse]
603+
reverses = [r for r in ofw.bam_records if r.is_reverse]
604+
assert len(forwards) > 0
605+
assert len(reverses) > 0
606+
assert len(forwards) == len(reverses)
567607

568608

569609
# ---------------------------------------------------------------------------

tests/test_read_simulator/test_options.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,9 @@ def test_default_values():
158158
assert opts.quality_offset == 33
159159
assert opts.threads == 1
160160
assert opts.parallel_mode == "contig"
161-
assert opts.parallel_block_size == 500000
161+
# Default is 0 (sentinel for auto-tune from genome length and thread count).
162+
# An explicit positive int in YAML overrides; see runner for the auto-tune logic.
163+
assert opts.parallel_block_size == 0
162164
assert opts.cleanup_splits is True
163165
assert opts.reuse_splits is False
164166
assert opts.overwrite_output is False

0 commit comments

Comments
 (0)