Skip to content

Commit e9f2d9a

Browse files
authored
Merge pull request #28 from lambda-feedback/file_recover
revert: recover the file revert
2 parents 2531bb4 + 78c2f0b commit e9f2d9a

6 files changed

Lines changed: 780 additions & 106 deletions

File tree

Dockerfile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ ENV FUNCTION_COMMAND="python"
3131
# Args to start the evaluation function with
3232
ENV FUNCTION_ARGS="-m,evaluation_function.main"
3333

34-
# The transport to use for the RPC server
35-
ENV FUNCTION_RPC_TRANSPORT="ipc"
34+
# Use file-based communication interface instead of RPC
35+
# This handles larger payloads better (shimmy writes input to file, reads output from file)
36+
# shimmy will append input/output file paths as the last two arguments
37+
ENV FUNCTION_INTERFACE="file"
3638

3739
ENV LOG_LEVEL="debug"

evaluation_function/correction/correction.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,12 @@ def _check_minimality(fsa: FSA) -> Tuple[bool, Optional[ValidationError]]:
3333
try:
3434
minimized = hopcroft_minimization(fsa)
3535
if len(minimized.states) < len(fsa.states):
36+
diff = len(fsa.states) - len(minimized.states)
3637
return False, ValidationError(
37-
message=f"FSA is not minimal: has {len(fsa.states)} states but can be reduced to {len(minimized.states)}",
38+
message=f"Your FSA works correctly, but it's not minimal! You have {len(fsa.states)} states, but only {len(minimized.states)} are needed. You could remove {diff} state(s).",
3839
code=ErrorCode.NOT_MINIMAL,
3940
severity="error",
40-
suggestion="Minimize your FSA by merging equivalent states"
41+
suggestion="Look for states that behave identically (same transitions and acceptance) - these can be merged into one"
4142
)
4243
return True, None
4344
except Exception:
@@ -69,9 +70,11 @@ def _build_feedback(
6970
hints = [e.suggestion for e in all_errors if e.suggestion]
7071
if structural_info:
7172
if structural_info.unreachable_states:
72-
hints.append("Consider removing unreachable states")
73+
unreachable = ", ".join(structural_info.unreachable_states)
74+
hints.append(f"Tip: States {{{unreachable}}} can't be reached from your start state - you might want to remove them or add transitions to them")
7375
if structural_info.dead_states:
74-
hints.append("Dead states can never lead to acceptance")
76+
dead = ", ".join(structural_info.dead_states)
77+
hints.append(f"Tip: States {{{dead}}} can never lead to acceptance - this might be intentional (trap states) or a bug")
7578

7679
# Build language comparison
7780
language = LanguageComparison(are_equivalent=len(equivalence_errors) == 0)
@@ -92,17 +95,20 @@ def _summarize_errors(errors: List[ValidationError]) -> str:
9295
for error in errors:
9396
msg = error.message.lower()
9497
if "alphabet" in msg:
95-
error_types.add("alphabet mismatch")
96-
elif "state" in msg and "count" in msg:
97-
error_types.add("state count mismatch")
98-
elif "accepting" in msg or "incorrectly marked" in msg:
99-
error_types.add("acceptance error")
100-
elif "transition" in msg:
101-
error_types.add("transition error")
98+
error_types.add("alphabet issue")
99+
elif "states" in msg and ("many" in msg or "few" in msg or "needed" in msg):
100+
error_types.add("incorrect number of states")
101+
elif "accepting" in msg or "accept" in msg:
102+
error_types.add("accepting states issue")
103+
elif "transition" in msg or "reading" in msg:
104+
error_types.add("transition issue")
102105

103-
if error_types:
104-
return f"Languages differ: {', '.join(error_types)}"
105-
return f"Languages differ: {len(errors)} issue(s)"
106+
if len(error_types) == 1:
107+
issue = list(error_types)[0]
108+
return f"Almost there! Your FSA has an {issue}. Check the details below."
109+
elif error_types:
110+
return f"Your FSA doesn't quite match the expected language. Issues found: {', '.join(error_types)}"
111+
return f"Your FSA doesn't accept the correct language. Found {len(errors)} issue(s) to fix."
106112

107113

108114
# =============================================================================
@@ -134,7 +140,11 @@ def analyze_fsa_correction(
134140
# Step 1: Validate student FSA structure
135141
student_errors = is_valid_fsa(student_fsa)
136142
if student_errors:
137-
summary = "FSA has structural errors"
143+
num_errors = len(student_errors)
144+
if num_errors == 1:
145+
summary = "Your FSA has a structural problem that needs to be fixed first. See the details below."
146+
else:
147+
summary = f"Your FSA has {num_errors} structural problems that need to be fixed first. See the details below."
138148
return Result(
139149
is_correct=False,
140150
feedback=summary,
@@ -146,7 +156,7 @@ def analyze_fsa_correction(
146156
if expected_errors:
147157
return Result(
148158
is_correct=False,
149-
feedback="Internal error: expected FSA is invalid"
159+
feedback="Oops! There's an issue with the expected answer. Please contact your instructor."
150160
)
151161

152162
# Step 3: Check minimality if required
@@ -162,15 +172,18 @@ def analyze_fsa_correction(
162172
equivalence_errors = fsas_accept_same_language(student_fsa, expected_fsa)
163173

164174
if not equivalence_errors and not validation_errors:
175+
# Success message with some stats
176+
state_count = len(student_fsa.states)
177+
feedback = f"Correct! Your FSA with {state_count} state(s) accepts exactly the right language. Well done!"
165178
return Result(
166179
is_correct=True,
167-
feedback="Correct! FSA accepts the expected language.",
168-
fsa_feedback=_build_feedback("FSA is correct", [], [], structural_info)
180+
feedback=feedback,
181+
fsa_feedback=_build_feedback("Your FSA is correct!", [], [], structural_info)
169182
)
170183

171184
# Build result with errors
172185
is_correct = len(equivalence_errors) == 0 and len(validation_errors) == 0
173-
summary = _summarize_errors(equivalence_errors) if equivalence_errors else "FSA has issues"
186+
summary = _summarize_errors(equivalence_errors) if equivalence_errors else "Your FSA has some issues to address."
174187

175188
return Result(
176189
is_correct=is_correct,

evaluation_function/main.py

Lines changed: 112 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,127 @@
1+
"""
2+
Main entry point for the FSA evaluation function.
3+
4+
Supports two communication modes with shimmy:
5+
1. File-based (recommended for large payloads): shimmy passes input/output file paths as args
6+
2. RPC/IPC (default): Uses lf_toolkit's server for stdio/IPC communication
7+
"""
8+
9+
import sys
10+
import json
11+
from typing import Any, Dict
112

213
from lf_toolkit import create_server, run
14+
from lf_toolkit.evaluation import Params, Result as LFResult
315

416
from .evaluation import evaluation_function
517
from .preview import preview_function
618

19+
20+
def handle_file_based_communication(input_path: str, output_path: str) -> None:
21+
"""
22+
Handle file-based communication with shimmy.
23+
24+
Reads input JSON from input_path, processes it, and writes result to output_path.
25+
This is used when shimmy is configured with --interface file.
26+
27+
Args:
28+
input_path: Path to the input JSON file
29+
output_path: Path to write the output JSON file
30+
"""
31+
# Read input from file
32+
with open(input_path, 'r', encoding='utf-8') as f:
33+
input_data = json.load(f)
34+
35+
# Extract command and request data
36+
command = input_data.get('command', 'eval')
37+
request_id = input_data.get('$id')
38+
39+
# Build response structure
40+
response_data: Dict[str, Any] = {}
41+
if request_id is not None:
42+
response_data['$id'] = request_id
43+
response_data['command'] = command
44+
45+
try:
46+
if command == 'eval':
47+
# Extract evaluation inputs
48+
response = input_data.get('response')
49+
answer = input_data.get('answer')
50+
params_dict = input_data.get('params', {})
51+
52+
# Create params object
53+
params = Params(**params_dict) if params_dict else Params()
54+
55+
# Call evaluation function
56+
result = evaluation_function(response, answer, params)
57+
58+
# Convert result to dict
59+
if hasattr(result, 'to_dict'):
60+
response_data['result'] = result.to_dict()
61+
elif isinstance(result, dict):
62+
response_data['result'] = result
63+
else:
64+
response_data['result'] = {'is_correct': False, 'feedback': str(result)}
65+
66+
elif command == 'preview':
67+
# Extract preview inputs
68+
response = input_data.get('response')
69+
params_dict = input_data.get('params', {})
70+
71+
params = Params(**params_dict) if params_dict else Params()
72+
73+
# Call preview function
74+
result = preview_function(response, params)
75+
76+
if hasattr(result, 'to_dict'):
77+
response_data['result'] = result.to_dict()
78+
elif isinstance(result, dict):
79+
response_data['result'] = result
80+
else:
81+
response_data['result'] = {'preview': str(result)}
82+
83+
else:
84+
response_data['result'] = {
85+
'is_correct': False,
86+
'feedback': f'Unknown command: {command}'
87+
}
88+
89+
except Exception as e:
90+
response_data['result'] = {
91+
'is_correct': False,
92+
'feedback': f'Error processing request: {str(e)}'
93+
}
94+
95+
# Write output to file
96+
with open(output_path, 'w', encoding='utf-8') as f:
97+
json.dump(response_data, f, ensure_ascii=False)
98+
99+
7100
def main():
8-
"""Run the IPC server with the evaluation and preview functions.
9101
"""
102+
Run the evaluation function.
103+
104+
Detects communication mode based on command-line arguments:
105+
- If 2+ args provided: File-based communication (last 2 args are input/output paths)
106+
- Otherwise: RPC/IPC server mode using lf_toolkit
107+
"""
108+
# Check for file-based communication
109+
# shimmy passes input and output file paths as the last two arguments
110+
if len(sys.argv) >= 3:
111+
input_path = sys.argv[-2]
112+
output_path = sys.argv[-1]
113+
114+
# Verify they look like file paths (basic check)
115+
if not input_path.startswith('-') and not output_path.startswith('-'):
116+
handle_file_based_communication(input_path, output_path)
117+
return
118+
119+
# Fall back to RPC/IPC server mode
10120
server = create_server()
11-
12121
server.eval(evaluation_function)
13122
server.preview(preview_function)
14-
15123
run(server)
16124

125+
17126
if __name__ == "__main__":
18127
main()

0 commit comments

Comments
 (0)