Skip to content

Commit acbf1a1

Browse files
committed
fix: fix AI errors
1 parent b45fb8c commit acbf1a1

4 files changed

Lines changed: 185 additions & 57 deletions

File tree

evaluation_function/correction/correction.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212
from evaluation_function.schemas.params import Params
1313

1414
# Schema imports
15-
from ..schemas import FSA, ValidationError, ErrorCode
15+
from ..schemas import FSA, ValidationError, ErrorCode, ValidationResult
1616
from ..schemas.result import Result, FSAFeedback, StructuralInfo, LanguageComparison
1717

1818
# Validation imports
1919
from ..validation.validation import (
20+
are_isomorphic,
2021
is_valid_fsa,
2122
is_deterministic,
2223
is_complete,
@@ -125,7 +126,7 @@ def analyze_fsa_correction(
125126
# Step 1: Validate student FSA structure
126127
# -------------------------------------------------------------------------
127128
student_result = is_valid_fsa(student_fsa)
128-
if not student_result.ok():
129+
if not student_result.ok:
129130
summary = (
130131
"Your FSA has a structural problem that needs to be fixed first."
131132
if len(student_result.errors) == 1
@@ -147,7 +148,7 @@ def analyze_fsa_correction(
147148
# Step 2: Validate expected FSA (should never fail)
148149
# -------------------------------------------------------------------------
149150
expected_result = is_valid_fsa(expected_fsa)
150-
if not expected_result.ok():
151+
if not expected_result.ok:
151152
return Result(
152153
is_correct=False,
153154
feedback="Oops! There's an issue with the expected answer. Please contact your instructor."
@@ -158,7 +159,7 @@ def analyze_fsa_correction(
158159
# -------------------------------------------------------------------------
159160
if params.expected_type == "DFA":
160161
det_result = is_deterministic(student_fsa)
161-
if not det_result.ok():
162+
if not det_result.ok:
162163
summary = "Your automaton must be deterministic (a DFA)."
163164
return Result(
164165
is_correct=False,
@@ -177,15 +178,17 @@ def analyze_fsa_correction(
177178
# -------------------------------------------------------------------------
178179
if params.check_completeness:
179180
comp_result = is_complete(student_fsa)
180-
if not comp_result.ok():
181+
if not comp_result.ok:
181182
validation_errors.extend(comp_result.errors)
182183

183184
# -------------------------------------------------------------------------
184185
# Step 5: Optional minimality check
185186
# -------------------------------------------------------------------------
187+
validation_result = None
186188
if params.check_minimality:
187-
min_errors = is_minimal(student_fsa)
188-
validation_errors.extend(min_errors)
189+
validation_result = is_minimal(student_fsa)
190+
if not validation_result.ok:
191+
validation_errors.extend(validation_result.errors)
189192

190193
# -------------------------------------------------------------------------
191194
# Step 6: Structural analysis (for feedback only)
@@ -198,20 +201,26 @@ def analyze_fsa_correction(
198201
equivalence_result = fsas_accept_same_language(
199202
student_fsa, expected_fsa
200203
)
201-
equivalence_errors = equivalence_result.errors
204+
equivalence_errors.extend(equivalence_result.errors)
202205

203206
# -------------------------------------------------------------------------
204-
# Step 8: Decide correctness based on evaluation mode
207+
# Step 8: Isomorphism
208+
# -------------------------------------------------------------------------
209+
iso_result = are_isomorphic(student_fsa, expected_fsa)
210+
equivalence_errors.extend(iso_result.errors)
211+
212+
# -------------------------------------------------------------------------
213+
# Step 9: Decide correctness based on evaluation mode
205214
# -------------------------------------------------------------------------
206215
if params.evaluation_mode == "strict":
207-
is_correct = not validation_errors and equivalence_result.ok()
216+
is_correct = validation_result is not None and validation_result.ok and equivalence_result.ok and iso_result.ok
208217
elif params.evaluation_mode == "lenient":
209-
is_correct = equivalence_result.ok()
218+
is_correct = validation_result is not None and validation_result.ok and equivalence_result.ok
210219
else: # partial # I dont know what the partial is meant for, always mark as incorrect?
211220
is_correct = False
212221

213222
# -------------------------------------------------------------------------
214-
# Step 9: Build summary
223+
# Step 10: Build summary
215224
# -------------------------------------------------------------------------
216225
if is_correct:
217226
feedback = (
@@ -226,9 +235,10 @@ def analyze_fsa_correction(
226235
else "Your FSA has some issues to address."
227236
)
228237
feedback = summary
238+
print(equivalence_errors)
229239

230240
# -------------------------------------------------------------------------
231-
# Step 10: Return result
241+
# Step 11: Return result
232242
# -------------------------------------------------------------------------
233243
return Result(
234244
is_correct=is_correct,

evaluation_function/test/test_correction.py

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88
from evaluation_function.schemas import ValidationError, ErrorCode
99
from evaluation_function.schemas.utils import make_fsa
1010
from evaluation_function.schemas.result import Result, FSAFeedback
11+
from evaluation_function.schemas.params import Params
1112
from evaluation_function.correction import analyze_fsa_correction
1213

1314

1415
# =============================================================================
15-
# Fixtures
16+
# Fixtures - DFAs
1617
# =============================================================================
1718

1819
@pytest.fixture
@@ -72,36 +73,54 @@ def equivalent_dfa():
7273
)
7374

7475

76+
# =============================================================================
77+
# Helper: Default Params
78+
# =============================================================================
79+
80+
@pytest.fixture
81+
def default_params():
82+
"""Default Params object for analyze_fsa_correction."""
83+
return Params(
84+
expected_type="DFA",
85+
check_completeness=True,
86+
check_minimality=True,
87+
evaluation_mode="strict",
88+
highlight_errors=True,
89+
feedback_verbosity="detailed"
90+
)
91+
92+
7593
# =============================================================================
7694
# Test Main Pipeline - Returns Result
7795
# =============================================================================
7896

7997
class TestAnalyzeFsaCorrection:
8098
"""Test the main analysis pipeline returns Result."""
8199

82-
def test_equivalent_fsas_correct(self, dfa_accepts_a, equivalent_dfa):
83-
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa)
100+
def test_equivalent_fsas_correct(self, dfa_accepts_a, equivalent_dfa, default_params):
101+
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, default_params)
102+
print(result)
84103
assert isinstance(result, Result)
85104
assert result.is_correct is True
86105
assert "Correct" in result.feedback
87106

88-
def test_different_fsas_incorrect(self, dfa_accepts_a, dfa_accepts_a_or_b):
89-
result = analyze_fsa_correction(dfa_accepts_a, dfa_accepts_a_or_b)
107+
def test_different_fsas_incorrect(self, dfa_accepts_a, dfa_accepts_a_or_b, default_params):
108+
result = analyze_fsa_correction(dfa_accepts_a, dfa_accepts_a_or_b, default_params)
90109
assert isinstance(result, Result)
91110
assert result.is_correct is False
92111

93-
def test_result_has_fsa_feedback(self, dfa_accepts_a, equivalent_dfa):
94-
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa)
112+
def test_result_has_fsa_feedback(self, dfa_accepts_a, equivalent_dfa, default_params):
113+
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, default_params)
95114
assert result.fsa_feedback is not None
96115
assert isinstance(result.fsa_feedback, FSAFeedback)
97116

98-
def test_fsa_feedback_has_structural_info(self, dfa_accepts_a, equivalent_dfa):
99-
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa)
117+
def test_fsa_feedback_has_structural_info(self, dfa_accepts_a, equivalent_dfa, default_params):
118+
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, default_params)
100119
assert result.fsa_feedback.structural is not None
101120
assert result.fsa_feedback.structural.num_states == 3
102121

103-
def test_different_fsas_have_errors(self, dfa_accepts_a, dfa_accepts_a_or_b):
104-
result = analyze_fsa_correction(dfa_accepts_a, dfa_accepts_a_or_b)
122+
def test_different_fsas_have_errors(self, dfa_accepts_a, dfa_accepts_a_or_b, default_params):
123+
result = analyze_fsa_correction(dfa_accepts_a, dfa_accepts_a_or_b, default_params)
105124
assert result.fsa_feedback is not None
106125
assert len(result.fsa_feedback.errors) > 0
107126

@@ -113,36 +132,48 @@ def test_different_fsas_have_errors(self, dfa_accepts_a, dfa_accepts_a_or_b):
113132
class TestInvalidFsas:
114133
"""Test handling of invalid FSAs."""
115134

116-
def test_invalid_initial_state(self):
135+
def test_invalid_initial_state(self, default_params):
117136
invalid = make_fsa(
118137
states=["q0"],
119138
alphabet=["a"],
120139
transitions=[],
121140
initial="invalid",
122141
accept=[]
123142
)
124-
result = analyze_fsa_correction(invalid, invalid)
143+
result = analyze_fsa_correction(invalid, invalid, default_params)
125144
assert result.is_correct is False
126145
assert result.fsa_feedback is not None
127146
assert len(result.fsa_feedback.errors) > 0
128147

129-
def test_invalid_accept_state(self):
148+
def test_invalid_accept_state(self, default_params):
130149
invalid = make_fsa(
131150
states=["q0"],
132151
alphabet=["a"],
133152
transitions=[],
134153
initial="q0",
135154
accept=["invalid"]
136155
)
137-
result = analyze_fsa_correction(invalid, invalid)
156+
result = analyze_fsa_correction(invalid, invalid, default_params)
138157
assert result.is_correct is False
139158

140159

160+
# =============================================================================
161+
# Test Minimality
162+
# =============================================================================
163+
141164
class TestAnalyzeFsaCorrectionMinimality:
142165
"""Test analyze_fsa_correction with minimality checking."""
143166

144167
def test_minimal_fsa_passes(self, dfa_accepts_a, equivalent_dfa):
145-
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, require_minimal=True)
168+
params = Params(
169+
expected_type="DFA",
170+
check_completeness=True,
171+
check_minimality=True,
172+
evaluation_mode="strict",
173+
highlight_errors=True,
174+
feedback_verbosity="detailed"
175+
)
176+
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, params)
146177
assert result.is_correct is True
147178

148179
def test_non_minimal_fsa_fails_when_required(self, equivalent_dfa):
@@ -162,9 +193,18 @@ def test_non_minimal_fsa_fails_when_required(self, equivalent_dfa):
162193
initial="q0",
163194
accept=["q1"]
164195
)
165-
result = analyze_fsa_correction(non_minimal, equivalent_dfa, require_minimal=True)
196+
params = Params(
197+
expected_type="DFA",
198+
check_completeness=True,
199+
check_minimality=True,
200+
evaluation_mode="strict",
201+
highlight_errors=True,
202+
feedback_verbosity="detailed"
203+
)
204+
result = analyze_fsa_correction(non_minimal, equivalent_dfa, params)
166205
# Should have minimality error
167206
assert result.fsa_feedback is not None
207+
assert any(e.code == ErrorCode.NOT_MINIMAL for e in result.fsa_feedback.errors)
168208

169209

170210
if __name__ == "__main__":

0 commit comments

Comments
 (0)