Skip to content

Commit 0f9d2d7

Browse files
authored
Merge pull request #52 from lambda-feedback/payload
fix: let the output be structued even if the input is invalid
2 parents 2c430db + ea83798 commit 0f9d2d7

2 files changed

Lines changed: 14 additions & 19 deletions

File tree

evaluation_function/evaluation.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from evaluation_function.schemas.params import Params
55
from .schemas import FSA, FSAFrontend
6-
from .schemas.result import Result
6+
from .schemas.result import FSAFeedback, Result
77
from .correction import analyze_fsa_correction
88
import json
99

@@ -52,12 +52,18 @@ def evaluation_function(
5252
)
5353

5454
except Exception as e:
55-
# Always return LFResult with raw payload for debugging
55+
result: Result = Result(
56+
is_correct=False,
57+
feedback=f"Error during evaluation: {str(e)}",
58+
fsa_feedback=FSAFeedback(
59+
summary=f"Error during evaluation: {str(e)}",
60+
errors=[]
61+
)
62+
)
5663
return LFResult(
5764
is_correct=False,
5865
feedback_items=[(
5966
"error",
60-
f"Invalid FSA format: {str(e)}\n\n"
61-
f"response: {response}\nanswer: {answer}\nparams: {params}"
67+
result.fsa_feedback.model_dump_json()
6268
)]
6369
)

evaluation_function/schemas/result.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,13 @@ class LanguageComparison(BaseModel):
212212
description="True if student FSA accepts the same language as expected"
213213
)
214214

215+
# not used, for extension
215216
counterexample: Optional[str] = Field(
216217
default=None,
217218
description="A string where student FSA differs from expected (if languages not equivalent)"
218219
)
219220

221+
# not used, for extension
220222
counterexample_type: Optional[Literal["should_accept", "should_reject"]] = Field(
221223
default=None,
222224
description="Whether the counterexample should be accepted or rejected"
@@ -255,6 +257,7 @@ class FSAFeedback(BaseModel):
255257
description="Language equivalence comparison with counterexample if applicable"
256258
)
257259

260+
# not used
258261
test_results: List[TestResult] = Field(
259262
default_factory=list,
260263
description="Results of individual test cases"
@@ -277,7 +280,6 @@ class Result(BaseModel):
277280
{
278281
"is_correct": false,
279282
"feedback": "Your FSA rejects 'ab' but it should accept it.",
280-
"score": 0.75,
281283
"fsa_feedback": {
282284
"summary": "Language mismatch - incorrect behavior on some inputs",
283285
"errors": [
@@ -324,20 +326,7 @@ class Result(BaseModel):
324326
description="Human-readable feedback message for the student"
325327
)
326328

327-
score: Optional[float] = Field(
328-
default=None,
329-
ge=0.0,
330-
le=1.0,
331-
description="Normalized score (0.0-1.0) for partial credit, null if not using partial credit"
332-
)
333-
334329
fsa_feedback: Optional[FSAFeedback] = Field(
335330
default=None,
336331
description="Detailed structured feedback with element highlighting for UI"
337-
)
338-
339-
# this is dev only
340-
input_data: Optional[FSA] = Field(
341-
default=None,
342-
description="The parsed FSA input data (for development/debugging purposes only)"
343-
)
332+
)

0 commit comments

Comments
 (0)