Professional Documents
Culture Documents
Py Code Example 11 2 Bellman
Py Code Example 11 2 Bellman
##################
import numpy as np
n1 = 3 # number of states
n2 = n1 + 2 # number of working states
tran_prob_mat[0, 0] = 1.0
tran_prob_mat[4, 4] = 1.0
tran_prob_mat[2, 1] = 0.5
tran_prob_mat[2, 3] = 0.5
tran_prob_mat[1, 0] = 1.0
tran_prob_mat[3, 4] = 1.0
reward_mat[1, 0] = 1.0
discount = 1
A = discount * tran_prob_mat
for i in range(n2):
A[i, i] = -1
b = np.zeros(n2)
for i in range(n2):
value = 0.0
for j in range(n2):
value = value + tran_prob_mat[i, j] * reward_mat[i, j]
b[i] = -value
v = np.linalg.solve(A, b)
state_value_bellman = v
state_value_bellman
##################