Skip to content

Commit cb4bde9

Browse files
Merge pull request #263 from ncsa/feature/claude-assisted-tests
Feature/claude assisted tests
2 parents 3d4a7c8 + 265d4e8 commit cb4bde9

26 files changed

Lines changed: 4447 additions & 546 deletions

.gitignore

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,31 @@
11
# Ignore filetypes
22
*.pyc
3+
*.pyo
4+
*.pyd
5+
__pycache__/
6+
7+
# Virtual environments
38
/python2env/
9+
/.venv/
10+
/venv/
11+
/env/
12+
13+
# IDEs
414
/.ipynb_checkpoints/
515
/.vscode/
6-
/.idea/
16+
/.idea/
17+
18+
# Test & coverage artifacts
19+
.coverage
20+
.coverage.*
21+
htmlcov/
22+
.pytest_cache/
23+
24+
# NEAT log files
25+
*.log
26+
27+
# Build / packaging
28+
dist/
29+
build/
30+
*.egg-info/
31+
*.egg

ChangeLog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# NEAT has a new home
22
NEAT is now a part of the NCSA github and active development will continue here. Please direct issues, comments, and requests to the NCSA issue tracker. Submit pull requests here insead of the old repo.
33

4+
# NEAT v4.3.6
5+
- Multiple bug fixes, fixes to outputs. See release for full notes.
6+
47
# NEAT v4.3.5
58
- An improvement rather than a bug fix this time. We moved vcf processing into the threaded portion, as our speeds were better than single threaded, but very slow on the vcf writing portion. This sped things up considerably, so we tested and confirmed that it is working as desired and are updating to a new version with improved VCF production in parallel mode.
69

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# The NEAT Project v4.3.5
1+
# The NEAT Project v4.3.6
22

3-
Welcome to the NEAT project, the NExt-generation sequencing Analysis Toolkit, version 4.3.5. This release of NEAT 4.3.5 includes several fixes and a little bit of restructuring, including a parallel process for running `neat read-simulator`. Our tests show much improved performance. If the logs seem excessive, you might try using the `--log-level ERROR` to reduce the output from the logs. See the [ChangeLog](ChangeLog.md) for notes. NEAT 4.3.5 is the official release of NEAT 4.0. It represents a lot of hard work from several contributors at NCSA and beyond. With the addition of parallel processing, we feel that the code is ready for production, and future releases will focus on compatibility, bug fixes, and testing. Future releases for the time being will be enumerations of 4.3.X.
3+
Welcome to the NEAT project, the NExt-generation sequencing Analysis Toolkit, version 4.3.6. This release of NEAT 4.3.5 includes several fixes and a little bit of restructuring, including a parallel process for running `neat read-simulator`. Our tests show much improved performance. If the logs seem excessive, you might try using the `--log-level ERROR` to reduce the output from the logs. See the [ChangeLog](ChangeLog.md) for notes. NEAT 4.3.5 is the official release of NEAT 4.0. It represents a lot of hard work from several contributors at NCSA and beyond. With the addition of parallel processing, we feel that the code is ready for production, and future releases will focus on compatibility, bug fixes, and testing. Future releases for the time being will be enumerations of 4.3.X.
44

55
## NEAT v4.3.5
66

@@ -22,7 +22,7 @@ To cite this work, please use:
2222
2323
## Table of Contents
2424

25-
* [The NEAT Project v4.3.5](#the-neat-project-v435)
25+
* [The NEAT Project v4.3.6](#the-neat-project-v436)
2626
* [NEAT v4.3.5](#neat-v435)
2727
* [Table of Contents](#table-of-contents)
2828
* [Prerequisites](#prerequisites)
@@ -77,7 +77,7 @@ To install NEAT, you must create a virtual environment using a tool such as `con
7777
First, clone the environment and move to the NEAT directory:
7878

7979
```bash
80-
$ git clone git@github.com:ncsa/NEAT.git
80+
$ git clone https://github.com/ncsa/NEAT.git
8181
$ cd NEAT
8282
```
8383

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
name: neat
22

33
channels:
4-
- bioconda
54
- conda-forge
5+
- bioconda
66

77
dependencies:
88
- python==3.11.*

neat/read_simulator/utils/vcf_func.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def parse_input_vcf(
161161
# Retrieve the GT from the first sample in the record
162162
genotype = retrieve_genotype(record)
163163

164-
elif "WP" in [x.split('=')[0] for x in record[7].split(';')]:
164+
elif "WP" in [x.split('=')[0] for x in record[7].split(';') if '=' in x]:
165165
"""
166166
"WP" is the legacy code NEAT used for genotype it added. It was found in the INFO field.
167167
We're just going to make a sample column in this version of NEAT
@@ -171,10 +171,13 @@ def parse_input_vcf(
171171
format_column = f"GT:{record[8]}"
172172
sample_field = record[9]
173173
for info_item in record[7].split(';'):
174-
if info_item.startswith('WP'):
174+
if info_item.startswith('WP') and '=' in info_item:
175175
genotype = info_item.split('=')[1].replace('/', '|').split('|')
176176
genotype = np.array([int(x) for x in genotype])
177177
normal_sample_field = f"{get_genotype_string(genotype)}:{sample_field}"
178+
elif info_item.startswith('WP'):
179+
_LOG.error(f'Malformed WP field in INFO (missing value): {record[7]}')
180+
sys.exit(1)
178181

179182
else:
180183
format_column = 'GT:' + record[8]
@@ -183,7 +186,7 @@ def parse_input_vcf(
183186
gt_field = get_genotype_string(genotype)
184187
normal_sample_field = f'{gt_field}:{record[9]}'
185188

186-
elif "WP" in [x.split('=')[0] for x in record[7].split(';')]:
189+
elif "WP" in [x.split('=')[0] for x in record[7].split(';') if '=' in x]:
187190
"""
188191
"WP" is the legacy code NEAT used for genotype it added. It was found in the INFO field.
189192
We're just going to make a sample column in this version of NEAT
@@ -192,10 +195,13 @@ def parse_input_vcf(
192195
"""
193196
format_column = "GT"
194197
for info_item in record[7].split(';'):
195-
if info_item.startswith('WP'):
198+
if info_item.startswith('WP') and '=' in info_item:
196199
genotype = info_item.split('=')[1].replace('/', '|').split('|')
197200
genotype = np.array([int(x) for x in genotype])
198201
normal_sample_field = get_genotype_string(genotype)
202+
elif info_item.startswith('WP'):
203+
_LOG.error(f'Malformed WP field in INFO (missing value): {record[7]}')
204+
sys.exit(1)
199205

200206
else:
201207
# If there was no format column, there's no sample column, so we'll generate one

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "neat"
3-
version = "4.3.5"
3+
version = "4.3.6"
44
description = "NGS Simulation toolkit"
55
authors = ["Joshua Allen <jallen17@illinois.edu>"]
66
license = "BSD 3-Clause License"

tests/conftest.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import logging
2+
import pytest
3+
4+
5+
@pytest.fixture(autouse=True)
6+
def _isolate_neat_logging():
7+
"""
8+
Close and remove any FileHandlers attached to NEAT loggers before each test.
9+
Prevents 'ValueError: I/O operation on closed file' errors when a FileHandler
10+
from a previous test is still attached after its underlying file is closed.
11+
Propagation is left intact so caplog can capture NEAT log output.
12+
"""
13+
def _close_file_handlers(logger):
14+
for h in list(logger.handlers):
15+
if isinstance(h, logging.FileHandler):
16+
logger.removeHandler(h)
17+
try:
18+
h.close()
19+
except Exception:
20+
pass
21+
22+
for name, logger in list(logging.Logger.manager.loggerDict.items()):
23+
if (name == "neat" or name.startswith("neat.")) and isinstance(logger, logging.Logger):
24+
_close_file_handlers(logger)
25+
26+
yield
27+
28+
for name, logger in list(logging.Logger.manager.loggerDict.items()):
29+
if (name == "neat" or name.startswith("neat.")) and isinstance(logger, logging.Logger):
30+
_close_file_handlers(logger)

tests/test_cli/test_basic_cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def test_basic_cli():
3636
stdout=subprocess.PIPE,
3737
stderr=subprocess.PIPE,
3838
text=True,
39+
cwd=str(td),
3940
)
4041
assert proc.returncode == 0, f"STDERR:\n{proc.stderr}"
4142
assert out.exists()

tests/test_models/test_error_and_mut_models.py

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -41,26 +41,8 @@ def test_mutation_model_generate_snv_trinuc():
4141
assert snv.alt in ["A", "C", "G", "T"]
4242

4343

44-
def test_sequencing_error_model_zero_error_returns_none_or_empty():
45-
"""
46-
avg_seq_error == 0 should yield no errors.
47-
"""
48-
rng = default_rng(4)
49-
sem = SequencingErrorModel(avg_seq_error=0.0)
50-
ref = SeqRecord(Seq("A" * 40), id="chr1")
51-
quals = np.array([40] * 40, dtype=int)
52-
result = sem.get_sequencing_errors(
53-
padding=20,
54-
reference_segment=ref,
55-
quality_scores=quals,
56-
rng=rng,
57-
)
58-
if isinstance(result, tuple):
59-
introduced, pad = result
60-
assert introduced == []
61-
assert pad >= 0
62-
else:
63-
assert result == []
44+
# test_sequencing_error_model_zero_error_returns_none_or_empty removed:
45+
# duplicate of test_error_models.py::test_sem_zero_error_rate_returns_empty
6446

6547

6648
def test_traditional_quality_model_shapes_and_range():
@@ -135,16 +117,8 @@ def test_mutation_model_snv_does_not_keep_reference_base():
135117
assert snv.alt != central
136118

137119

138-
def test_traditional_quality_model_reproducible_with_seed():
139-
"""Quality model should be deterministic given the same RNG state."""
140-
rng1 = default_rng(8)
141-
rng2 = default_rng(8)
142-
qm = TraditionalQualityModel(average_error=0.01)
143-
144-
qs1 = qm.get_quality_scores(model_read_length=151, length=100, rng=rng1)
145-
qs2 = qm.get_quality_scores(model_read_length=151, length=100, rng=rng2)
146-
147-
assert np.array_equal(qs1, qs2)
120+
# test_traditional_quality_model_reproducible_with_seed removed:
121+
# duplicate of test_error_models.py::test_tqm_get_quality_scores_reproducible
148122

149123

150124
def test_sequencing_error_model_reproducible_with_seed():

0 commit comments

Comments
 (0)