bloom-learn-app/test_difficulty_manual.py at main · AnmolTomer/bloom-learn-app · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
Manual test script for difficulty levels feature.
Run this after starting the backend server to verify functionality.

Usage:
    python test_difficulty_manual.py
"""
import asyncio
import uuid
from app.tutor.engine import TutorEngine, is_off_topic_response
from app.memory.state_store import RedisStateStore
from app.llm.config import get_llm_config
from app.prompts.service import PromptService

async def test_off_topic_detection():
    """Test off-topic detection function."""
    print("\n=== Testing Off-Topic Detection ===")

    test_cases = [
        ("A function is defined using def", "Python Functions", False),
        ("I like pizza", "Python Functions", True),
        ("ok", "Python Functions", True),
        ("def foo(): pass", "Python Functions", False),
        ("[1, 2, 3]", "Python Lists", False),
    ]

    for message, topic, expected_off_topic in test_cases:
        result = is_off_topic_response(message, topic)
        status = "[PASS]" if result == expected_off_topic else "[FAIL]"
        print(f"{status} '{message[:30]}...' -> Off-topic: {result} (expected: {expected_off_topic})")

async def test_difficulty_levels():
    """Test difficulty level initialization in TutorState."""
    print("\n=== Testing Difficulty Levels ===")

    from app.tutor.models import TutorState

    # Test default difficulty
    state1 = TutorState(session_id=uuid.uuid4(), topic="Python Lists")
    print(f"[PASS] Default difficulty: {state1.difficulty_level}")
    assert state1.difficulty_level == "MEDIUM"

    # Test custom difficulty
    for diff in ["EASY", "MEDIUM", "HARD"]:
        state = TutorState(session_id=uuid.uuid4(), topic="Python Lists", difficulty_level=diff)
        print(f"[PASS] Custom difficulty: {state.difficulty_level}")
        assert state.difficulty_level == diff

async def test_judge_dependencies():
    """Test JudgeDependencies with difficulty."""
    print("\n=== Testing Judge Dependencies ===")

    from app.llm.judge_agent import JudgeDependencies, DIFFICULTY_INSTRUCTIONS
    from app.bloom.models import BloomLevel

    # Test default difficulty
    deps1 = JudgeDependencies(
        bloom_level=BloomLevel.REMEMBER,
        tutor_question="What is a list?",
        user_response="A collection",
        context=""
    )
    print(f"[PASS] Default difficulty: {deps1.difficulty_level}")
    assert deps1.difficulty_level == "MEDIUM"

    # Test custom difficulties
    for diff in ["EASY", "MEDIUM", "HARD"]:
        deps = JudgeDependencies(
            bloom_level=BloomLevel.REMEMBER,
            tutor_question="What is a list?",
            user_response="A collection",
            context="",
            difficulty_level=diff
        )
        print(f"[PASS] Custom difficulty: {deps.difficulty_level}")
        assert deps.difficulty_level == diff

    # Test instructions exist
    print(f"[PASS] Difficulty instructions loaded: {len(DIFFICULTY_INSTRUCTIONS)} levels")

async def main():
    """Run all tests."""
    print("=" * 60)
    print("Difficulty Levels Feature - Manual Test Suite")
    print("=" * 60)

    try:
        await test_off_topic_detection()
        await test_difficulty_levels()
        await test_judge_dependencies()

        print("\n" + "=" * 60)
        print("[SUCCESS] All tests passed!")
        print("=" * 60)
        print("\nNext steps:")
        print("1. Start the backend: uvicorn app.main:app --reload")
        print("2. Start the frontend: cd frontend && npm run dev")
        print("3. Test the UI with different difficulty levels")
        print("4. Try sending off-topic responses to verify detection")

    except Exception as e:
        print(f"\n[FAILED] Test failed: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    asyncio.run(main())