-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathroad_crosser.py
More file actions
125 lines (103 loc) · 4.29 KB
/
road_crosser.py
File metadata and controls
125 lines (103 loc) · 4.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
import random
# Define the states and actions
states = ['start', 'look_left', 'look_right', 'look_left_again', 'cross', 'safe', 'hit']
actions = ['look_left', 'look_right', 'cross']
# Initialize Q-table with zeros
q_table = np.zeros((len(states), len(actions)))
# Learning parameters
alpha = 0.1 # Learning rate
gamma = 0.6 # Discount factor
epsilon = 0.1 # Exploration rate
# Reward system
rewards = {
('start', 'look_left'): 1,
('look_left', 'look_right'): 1,
('look_right', 'look_left_again'): 1,
('look_left_again', 'cross'): 10, # Big reward for correct sequence
('cross', 'safe'): 50, # Successfully crossed
('cross', 'hit'): -100 # Got hit by car
}
# Training function
def train(num_episodes):
for episode in range(num_episodes):
state = 'start'
sequence = []
while state not in ['safe', 'hit']:
# Epsilon-greedy action selection
if random.uniform(0, 1) < epsilon:
action = random.choice(actions) # Explore
else:
state_idx = states.index(state)
action_idx = np.argmax(q_table[state_idx])
action = actions[action_idx] # Exploit
sequence.append((state, action))
# Determine next state and reward
if state == 'start' and action == 'look_left':
next_state = 'look_left'
reward = rewards.get((state, action), 0)
elif state == 'look_left' and action == 'look_right':
next_state = 'look_right'
reward = rewards.get((state, action), 0)
elif state == 'look_right' and action == 'look_left_again':
next_state = 'look_left_again'
reward = rewards.get((state, action), 0)
elif state == 'look_left_again' and action == 'cross':
# 90% chance of crossing safely if following the correct sequence
if random.random() < 0.9:
next_state = 'safe'
else:
next_state = 'hit'
reward = rewards.get((state, action), 0)
else:
# Wrong action sequence - likely to get hit
if random.random() < 0.7:
next_state = 'hit'
else:
next_state = random.choice(states)
reward = -10
# Q-learning update
state_idx = states.index(state)
next_state_idx = states.index(next_state)
action_idx = actions.index(action)
old_value = q_table[state_idx, action_idx]
next_max = np.max(q_table[next_state_idx])
new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
q_table[state_idx, action_idx] = new_value
state = next_state
# Print progress
if episode % 100 == 0:
print(f"Episode: {episode}, Final state: {state}, Sequence: {sequence}")
# Test the trained model
def test():
state = 'start'
sequence = []
print("Testing the trained model:")
while state not in ['safe', 'hit']:
state_idx = states.index(state)
action_idx = np.argmax(q_table[state_idx])
action = actions[action_idx]
sequence.append((state, action))
# Simulate state transition
if state == 'start' and action == 'look_left':
next_state = 'look_left'
elif state == 'look_left' and action == 'look_right':
next_state = 'look_right'
elif state == 'look_right' and action == 'look_left_again':
next_state = 'look_left_again'
elif state == 'look_left_again' and action == 'cross':
next_state = 'safe' # Assume success in test
else:
next_state = 'hit'
state = next_state
print("Sequence executed:")
for step in sequence:
print(f"- {step[0]}: {step[1]}")
print(f"Final result: {state}")
# Train the model
train(1000)
# Test the model
test()
# Show the learned Q-table
print("\nLearned Q-table:")
print(q_table)