Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 1

##################

## Example 11.2: A-split example

##################

import numpy as np

# Construct state transition probability matrix and reward matrix.

# states: [U, B, A, C, L] = [0, 1, 2, 3, 4]

n1 = 3 # number of states
n2 = n1 + 2 # number of working states

tran_prob_mat = np.zeros((n2, n2))


reward_mat = np.zeros((n2, n2))

tran_prob_mat[0, 0] = 1.0
tran_prob_mat[4, 4] = 1.0
tran_prob_mat[2, 1] = 0.5
tran_prob_mat[2, 3] = 0.5
tran_prob_mat[1, 0] = 1.0
tran_prob_mat[3, 4] = 1.0

reward_mat[1, 0] = 1.0

# Find state-value function by solving Bellman equation (3.14).

discount = 1

A = discount * tran_prob_mat
for i in range(n2):
A[i, i] = -1

b = np.zeros(n2)
for i in range(n2):
value = 0.0
for j in range(n2):
value = value + tran_prob_mat[i, j] * reward_mat[i, j]
b[i] = -value

v = np.linalg.solve(A, b)

state_value_bellman = v

state_value_bellman

##################

You might also like