-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRandomAgent.py
More file actions
34 lines (27 loc) · 1.32 KB
/
RandomAgent.py
File metadata and controls
34 lines (27 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import random
import pickle
class QLearningAgent:
def __init__(self, actions, learning_rate=0.1, discount_factor=0.95, exploration_rate=0.1):
self.q_table = {} # (state, action) -> value
self.actions = actions
self.lr = learning_rate
self.df = discount_factor
self.epsilon = exploration_rate
def choose_action(self, state):
state_key = self._state_to_key(state)
if random.random() < self.epsilon or state_key not in self.q_table:
return random.choice(self.actions)
return max(self.q_table[state_key], key=self.q_table[state_key].get)
def learn(self, state, action, reward, next_state):
state_key = self._state_to_key(state)
next_key = self._state_to_key(next_state)
if state_key not in self.q_table:
self.q_table[state_key] = {a: 0.0 for a in self.actions}
if next_key not in self.q_table:
self.q_table[next_key] = {a: 0.0 for a in self.actions}
current_q = self.q_table[state_key][action]
max_future_q = max(self.q_table[next_key].values())
updated_q = current_q + self.lr * (reward + self.df * max_future_q - current_q)
self.q_table[state_key][action] = updated_q
def _state_to_key(self, state):
return (state["distance"] , state["food"] , state["health"])