Skip to content

Commit 6fad809

Browse files
test: cover auto-replace and intra-model paths
Add mock-based orchestration tests for auto_replace_tag and direct behavior tests for intra_model checks/noncanonical routing. Assisted-by: GitHub Copilot (GPT-5.4)
1 parent 6535eba commit 6fad809

2 files changed

Lines changed: 261 additions & 0 deletions

File tree

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import os
2+
import tempfile
3+
import unittest
4+
from unittest import mock
5+
6+
from gff3tool.lib.gff3_merge import auto_replace_tag
7+
8+
9+
class _DummyProc:
10+
def __init__(self, cmd):
11+
self.cmd = cmd
12+
13+
def wait(self):
14+
return 0
15+
16+
17+
class FakeGff:
18+
def __init__(self, lines):
19+
self.lines = lines
20+
self.written_path = None
21+
22+
def write(self, path):
23+
self.written_path = path
24+
with open(path, "w") as fh:
25+
fh.write("##gff-version 3\n")
26+
27+
28+
def _make_root_with_transcript(root_id, tx_id, has_cds=False):
29+
child_feature = {
30+
"line_type": "feature",
31+
"attributes": {"ID": tx_id, "Parent": [root_id], "replace": ["NA"]},
32+
"type": "mRNA",
33+
"children": [],
34+
}
35+
if has_cds:
36+
child_feature["children"].append({"type": "CDS"})
37+
else:
38+
child_feature["children"].append({"type": "exon"})
39+
40+
root = {
41+
"line_type": "feature",
42+
"attributes": {"ID": root_id, "replace": ["NA"]},
43+
"type": "gene",
44+
"children": [child_feature],
45+
}
46+
return [root]
47+
48+
49+
class TestAutoReplaceTag(unittest.TestCase):
50+
def _fake_popen_factory(self, tmpdir):
51+
def _fake_popen(cmd, stdout=None):
52+
if "-out" in cmd:
53+
out_file = cmd[cmd.index("-out") + 1]
54+
with open(out_file, "w") as fh:
55+
fh.write("")
56+
57+
if cmd and cmd[0] == "perl":
58+
script = os.path.basename(cmd[1])
59+
if script == "create_annotation_summaries_nov21-7.pl":
60+
with open(cmd[4], "w") as fh:
61+
fh.write("")
62+
elif script == "find_match.pl":
63+
with open(cmd[5], "w") as fh:
64+
fh.write("")
65+
elif script == "gen_spreadsheet.pl":
66+
with open(cmd[5], "w") as fh:
67+
fh.write("")
68+
69+
return _DummyProc(cmd)
70+
71+
return _fake_popen
72+
73+
def test_main_default_path_runs_extract_and_alignment_pipeline(self):
74+
gff1 = FakeGff(_make_root_with_transcript("gene1", "tx1", has_cds=False))
75+
gff2 = FakeGff(_make_root_with_transcript("gene2", "tx2", has_cds=True))
76+
77+
with tempfile.TemporaryDirectory() as tmpdir:
78+
gff1_path = os.path.join(tmpdir, "new.gff3")
79+
gff2_path = os.path.join(tmpdir, "ref.gff3")
80+
fasta_path = os.path.join(tmpdir, "ref.fa")
81+
for p in [gff1_path, gff2_path, fasta_path]:
82+
with open(p, "w") as fh:
83+
fh.write("\n")
84+
85+
with mock.patch.object(auto_replace_tag, "Gff3", autospec=True, side_effect=[gff1, gff2]), \
86+
mock.patch.object(auto_replace_tag.gff3_to_fasta, "main", autospec=True) as to_fasta_main, \
87+
mock.patch.object(auto_replace_tag.subprocess, "Popen", side_effect=self._fake_popen_factory(tmpdir)) as popen_mock:
88+
auto_replace_tag.main(
89+
gff1=gff1_path,
90+
gff2=gff2_path,
91+
fasta=fasta_path,
92+
outdir=tmpdir,
93+
scode="TEMP",
94+
logger=mock.Mock(),
95+
all_assign=False,
96+
user_defined1=None,
97+
user_defined2=None,
98+
)
99+
100+
self.assertEqual(to_fasta_main.call_count, 6)
101+
self.assertTrue(any("makeblastdb" in " ".join(c.args[0]) for c in popen_mock.call_args_list))
102+
self.assertTrue(any("blastn" in " ".join(c.args[0]) for c in popen_mock.call_args_list))
103+
self.assertTrue(os.path.exists(os.path.join(tmpdir, "check1.txt")))
104+
105+
def test_main_all_assign_rewrites_gff_and_drops_replace_attrs(self):
106+
gff1 = FakeGff(_make_root_with_transcript("gene1", "tx1", has_cds=False))
107+
gff2 = FakeGff(_make_root_with_transcript("gene2", "tx2", has_cds=True))
108+
109+
with tempfile.TemporaryDirectory() as tmpdir:
110+
gff1_path = os.path.join(tmpdir, "new.gff3")
111+
gff2_path = os.path.join(tmpdir, "ref.gff3")
112+
fasta_path = os.path.join(tmpdir, "ref.fa")
113+
for p in [gff1_path, gff2_path, fasta_path]:
114+
with open(p, "w") as fh:
115+
fh.write("\n")
116+
117+
with mock.patch.object(auto_replace_tag, "Gff3", autospec=True, side_effect=[gff1, gff2]), \
118+
mock.patch.object(auto_replace_tag.gff3_to_fasta, "main", autospec=True), \
119+
mock.patch.object(auto_replace_tag.subprocess, "Popen", side_effect=self._fake_popen_factory(tmpdir)):
120+
auto_replace_tag.main(
121+
gff1=gff1_path,
122+
gff2=gff2_path,
123+
fasta=fasta_path,
124+
outdir=tmpdir,
125+
scode="TEMP",
126+
logger=mock.Mock(),
127+
all_assign=True,
128+
user_defined1=None,
129+
user_defined2=None,
130+
)
131+
132+
self.assertIsNotNone(gff1.written_path)
133+
self.assertTrue(gff1.written_path.endswith(os.path.join("tmp", "gff1_mod.gff3")))
134+
for line in gff1.lines:
135+
self.assertNotIn("replace", line["attributes"])
136+
137+
138+
if __name__ == "__main__":
139+
unittest.main()
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import unittest
2+
from unittest import mock
3+
4+
from gff3tool.lib.intra_model import intra_model
5+
6+
7+
class DummyGff:
8+
def __init__(self, lines=None):
9+
self.lines = lines or []
10+
self.line_errors = []
11+
12+
def add_line_error(self, line, error, log_level=None):
13+
self.line_errors.append((line, error, log_level))
14+
15+
@staticmethod
16+
def overlap(a, b):
17+
return not (a["end"] < b["start"] or b["end"] < a["start"])
18+
19+
20+
def _cds(line_index, start, end):
21+
return {
22+
"line_type": "feature",
23+
"line_index": line_index,
24+
"type": "CDS",
25+
"start": start,
26+
"end": end,
27+
"attributes": {"ID": f"cds{line_index}"},
28+
"children": [],
29+
}
30+
31+
32+
class TestIntraModelEngine(unittest.TestCase):
33+
def test_check_incomplete_flags_gene_without_mrna(self):
34+
root = {
35+
"line_type": "feature",
36+
"line_index": 0,
37+
"type": "gene",
38+
"attributes": {"ID": "gene1"},
39+
"children": [{"type": "exon", "children": []}],
40+
}
41+
gff = DummyGff()
42+
43+
result = intra_model.check_incomplete(gff, root)
44+
45+
self.assertIsNotNone(result)
46+
self.assertEqual(result[0]["eCode"], "Ema0004")
47+
self.assertEqual(len(gff.line_errors), 1)
48+
49+
def test_check_merged_gene_parent_flags_non_overlapping_isoforms(self):
50+
tx1 = {
51+
"line_index": 1,
52+
"attributes": {"ID": "tx1"},
53+
"children": [_cds(2, 1, 5)],
54+
}
55+
tx2 = {
56+
"line_index": 3,
57+
"attributes": {"ID": "tx2"},
58+
"children": [_cds(4, 20, 30)],
59+
}
60+
root = {
61+
"line_type": "feature",
62+
"line_index": 0,
63+
"type": "gene",
64+
"attributes": {"ID": "gene1"},
65+
"children": [tx1, tx2],
66+
}
67+
gff = DummyGff()
68+
69+
result = intra_model.check_merged_gene_parent(gff, root)
70+
71+
self.assertIsNotNone(result)
72+
self.assertEqual(result[0]["eCode"], "Ema0009")
73+
self.assertEqual(len(gff.line_errors), 1)
74+
75+
def test_main_noncanonical_skips_internal_stop_and_isoform_checks(self):
76+
root = {
77+
"line_type": "feature",
78+
"line_index": 0,
79+
"type": "gene",
80+
"start": 1,
81+
"end": 100,
82+
"attributes": {"ID": "gene1"},
83+
"children": [],
84+
}
85+
gff = DummyGff(lines=[root])
86+
87+
with mock.patch.object(intra_model.function4gff, "FIX_MISSING_ATTR", autospec=True), \
88+
mock.patch.object(intra_model, "check_internal_stop", autospec=True) as internal_stop, \
89+
mock.patch.object(intra_model, "check_distinct_isoform", autospec=True) as distinct_isoform, \
90+
mock.patch.object(intra_model, "check_merged_gene_parent", autospec=True) as merged_parent:
91+
intra_model.main(gff=gff, logger=mock.Mock(), noncanonical_gene=True)
92+
93+
internal_stop.assert_not_called()
94+
distinct_isoform.assert_not_called()
95+
merged_parent.assert_not_called()
96+
97+
def test_main_canonical_collects_reported_errors(self):
98+
root = {
99+
"line_type": "feature",
100+
"line_index": 0,
101+
"type": "gene",
102+
"start": 1,
103+
"end": 100,
104+
"attributes": {"ID": "gene1"},
105+
"children": [],
106+
}
107+
gff = DummyGff(lines=[root])
108+
109+
with mock.patch.object(intra_model.function4gff, "FIX_MISSING_ATTR", autospec=True), \
110+
mock.patch.object(intra_model, "check_pseudo_child_type", autospec=True, return_value=[{"eCode": "Ema0005"}]), \
111+
mock.patch.object(intra_model, "check_redundant_length", autospec=True, return_value=[{"eCode": "Ema0001"}]), \
112+
mock.patch.object(intra_model, "check_incomplete", autospec=True, return_value=[{"eCode": "Ema0004"}]), \
113+
mock.patch.object(intra_model, "check_internal_stop", autospec=True, return_value=[{"eCode": "Ema0002"}]), \
114+
mock.patch.object(intra_model, "check_distinct_isoform", autospec=True, return_value=[{"eCode": "Ema0008"}]), \
115+
mock.patch.object(intra_model, "check_merged_gene_parent", autospec=True, return_value=[{"eCode": "Ema0009"}]):
116+
result = intra_model.main(gff=gff, logger=mock.Mock(), noncanonical_gene=False)
117+
118+
self.assertEqual([r["eCode"] for r in result], ["Ema0005", "Ema0001", "Ema0004", "Ema0002", "Ema0008", "Ema0009"])
119+
120+
121+
if __name__ == "__main__":
122+
unittest.main()

0 commit comments

Comments
 (0)