Son

You might also like

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 2

import numpy as np

class SONAgent:
def __init__(self, num_cells, num_actions):
self.num_cells = num_cells
self.num_actions = num_actions
self.q_table = np.zeros((num_cells, num_actions))

def choose_action(self, cell_idx):


return np.argmax(self.q_table[cell_idx])

def update_q_table(self, cell_idx, action, reward, next_cell_idx,


learning_rate, discount_factor):
current_q = self.q_table[cell_idx, action]
max_next_q = np.max(self.q_table[next_cell_idx])
new_q = current_q + learning_rate * (reward + discount_factor * max_next_q
- current_q)
self.q_table[cell_idx, action] = new_q

class Testbed:
def __init__(self, num_cells):
self.num_cells = num_cells
self.current_cell = np.random.randint(num_cells)

def take_action(self, action):


# Simulate taking action and receiving reward
reward = np.random.rand() # Placeholder for actual reward calculation
next_cell = self.current_cell + action
next_cell = max(0, min(self.num_cells - 1, next_cell)) # Ensure next_cell
is within bounds
self.current_cell = next_cell
return reward, next_cell

# Parameters
num_cells = 10
num_actions = 3
learning_rate = 0.1
discount_factor = 0.9
num_episodes = 1000

# Initialize SON agent and testbed


agent = SONAgent(num_cells, num_actions)
testbed = Testbed(num_cells)

# Training loop
for episode in range(num_episodes):
total_reward = 0
testbed.current_cell = np.random.randint(num_cells) # Random initial cell for
each episode

for _ in range(100): # Limiting steps per episode to avoid infinite loops


current_cell_idx = testbed.current_cell
action = agent.choose_action(current_cell_idx)
reward, next_cell_idx = testbed.take_action(action)
agent.update_q_table(current_cell_idx, action, reward, next_cell_idx,
learning_rate, discount_factor)
total_reward += reward

if episode % 100 == 0:
print(f"Episode {episode}, Total Reward: {total_reward}")

You might also like