Skip to content

Commit c6cfef1

Browse files
committed
conda env pytest contig table prep fix
1 parent 9df099b commit c6cfef1

6 files changed

Lines changed: 50 additions & 7 deletions

File tree

.github/workflows/build.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ jobs:
7171
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
7272
- name: Test package installation and imports
7373
run: |
74+
# Full test suite (including BLAST/primer3 integration) runs in conda-build only.
7475
# Verify package can be imported
7576
python -c "import PROBESt; print('PROBESt imported successfully')"
7677
python -c "from PROBESt.args import arguments_parse; print('Args module imported successfully')"

app/templates/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
<div class="logo-title">
1515
<img src="{{ url_for('static', filename='ctlab_probest_white.png') }}" alt="PROBESt" style="width: 30vw;">
1616
<div>
17-
<h1>PROBESt v0.2.0</h1>
17+
<h1>PROBESt v0.2.2</h1>
1818
<p class="subtitle">ITMO probe generation and optimization tool</p>
1919
</div>
2020
</div>

extraction/environment.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ dependencies:
129129
- typing_extensions=4.15.0=pyhcf101f3_0
130130
- typing_utils=0.1.0=pyhd8ed1ab_1
131131
- uri-template=1.3.0=pyhd8ed1ab_1
132-
- wcwidth=0.2.14=pyhd8ed1ab_0
132+
- wcwidth=0.2.24=pyhd8ed1ab_0
133133
- webcolors=24.11.1=pyhd8ed1ab_0
134134
- webencodings=0.5.1=pyhd8ed1ab_3
135135
- websocket-client=1.8.0=pyhd8ed1ab_1
@@ -220,7 +220,7 @@ dependencies:
220220
- opencv-python-headless==4.11.0.86
221221
- openpyxl==3.1.5
222222
- outlines==1.2.5
223-
- outlines-core==0.2.11
223+
- outlines-core==0.2.21
224224
- pandas==2.3.3
225225
- pdfminer-six==20250327
226226
- pdfplumber==0.11.6

src/PROBESt/prepare_blast.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@
4242
FASTA_EXTENSIONS = {'.fa', '.fasta', '.fna', '.fa.gz', '.fasta.gz', '.fna.gz'}
4343

4444

45+
def _logical_fasta_key(basename: str) -> str:
46+
"""Plain and gzip of the same file share a key (e.g. a.fna and a.fna.gz -> a.fna)."""
47+
return basename[:-3] if basename.endswith(".gz") else basename
48+
49+
4550
def deduplicate_contig_table(contig_table_path: str, contig_id_col: int = 1) -> None:
4651
"""
4752
Deduplicate a contig table in-place by contig ID (second column by default).
@@ -112,17 +117,35 @@ def get_fasta_files(directory: str) -> List[str]:
112117
"""
113118
Get all FASTA files from a directory.
114119
120+
If both ``assembly.fna`` and ``assembly.fna.gz`` exist, keep only the ``.gz``
121+
path so directories may ship compressed assets (CI) while developers keep
122+
uncompressed copies locally.
123+
115124
Args:
116125
directory: Path to the directory containing FASTA files.
117126
118127
Returns:
119128
List of paths to FASTA files.
120129
"""
121-
fasta_files = []
130+
seen: List[str] = []
122131
for ext in FASTA_EXTENSIONS:
123132
pattern = os.path.join(directory, f"*{ext}")
124-
fasta_files.extend(glob.glob(pattern))
125-
return sorted(fasta_files)
133+
seen.extend(glob.glob(pattern))
134+
135+
by_key: Dict[str, str] = {}
136+
for path in sorted(set(seen)):
137+
key = _logical_fasta_key(os.path.basename(path))
138+
prev = by_key.get(key)
139+
if prev is None:
140+
by_key[key] = path
141+
elif path.endswith(".gz") and not prev.endswith(".gz"):
142+
by_key[key] = path
143+
elif prev.endswith(".gz") and not path.endswith(".gz"):
144+
pass
145+
else:
146+
by_key[key] = min(path, prev)
147+
148+
return sorted(by_key.values())
126149

127150

128151
def prepare_blast_database(

tests/PROBESt/test_prepare_blast.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import sys
3232
import tempfile
3333
import shutil
34+
import gzip
3435
from pathlib import Path
3536

3637
# Add the project root directory to the Python path
@@ -182,6 +183,20 @@ def test_includes_all_supported_extensions(self, tmp_path):
182183
fasta_files = get_fasta_files(str(ext_dir))
183184
assert len(fasta_files) == 3
184185

186+
def test_prefers_gzip_when_plain_and_gzip_same_stem(self, tmp_path):
187+
"""Same assembly as .fna and .fna.gz should count once; prefer .gz."""
188+
d = tmp_path / "dup"
189+
d.mkdir()
190+
plain = d / "GCA_asm.fna"
191+
plain.write_text(">ctg1\nATGC\n")
192+
gzpath = d / "GCA_asm.fna.gz"
193+
with gzip.open(gzpath, "wb") as gz:
194+
gz.write(b">ctg1\nATGC\n")
195+
196+
paths = get_fasta_files(str(d))
197+
assert len(paths) == 1
198+
assert paths[0].endswith(".fna.gz")
199+
185200

186201
class TestPrepareBlastDatabase:
187202
"""Tests for prepare_blast_database function."""

tests/test_pipeline_general_integration.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@ def _require_blast_tools():
4343

4444
@pytest.mark.integration
4545
def test_integration_fish(tmp_path):
46-
"""Same CLI as: …/test.fna, fasta_base true/false dirs, -N 3, --visualize True --AI True."""
46+
"""Same CLI as: …/test.fna, fasta_base true/false dirs, -N 3, --visualize True --AI True.
47+
48+
True-base genomes must be present in git as ``*.fna.gz`` under ``fasta_base/true_base``
49+
(plain ``*.fna`` is gitignored); false bases already use gz in the repo.
50+
"""
4751
_require_blast_tools()
4852

4953
output = tmp_path / "output"

0 commit comments

Comments
 (0)