Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 3

env = gym.

make(env_name, render_mode="rgb_array")
vid = VideoRecorder(env, path=f"video/{env_name}_pretraining.mp4")
observation = env.reset()[0]

total_reward = 0
done = False
while not done:
frame = env.render()
vid.capture_frame()
action, states = model.predict(observation, deterministic=True)
observation, reward, done, info, _ = env.step(action)
total_reward += reward
vid.close()
env.close()
print(f"\nTotal reward: {total_reward}")

#show video
html = render_mp4(f"video/{env_name}_pretraining.mp4")
HTML(html)

env = gym.make(env_name, render_mode="rgb_array")


vid = VideoRecorder(env, path=f"video/{env_name}_learned.mp4")
observation = env.reset()[0]

total_reward = 0
done = False
while not done:
frame = env.render()
vid.capture_frame()
action, states = model.predict(observation, deterministic=True)
observation, reward, done, info, _ = env.step(action)
total_reward += reward
vid.close()
env.close()
print(f"\nTotal reward: {total_reward}")

# show video
html = render_mp4(f"video/{env_name}_learned.mp4")
HTML(html)

# Taken from
https://stable-baselines3.readthedocs.io/en/master/guide/custom_env.htm
l
class CustomEnv(gym.Env):
"""Custom Environment that follows gym interface"""
metadata = {'render.modes': ['human']}
def __init__(self, arg1, arg2):
super(CustomEnv, self).__init__()
# Define action and observation space
# They must be gym.spaces objects
# Example when using discrete actions:
self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
# Example for using image as input (channel-first; channel-last
also works):
self.observation_space = spaces.Box(low=0, high=255,
shape=(N_CHANNELS, HEIGHT,
WIDTH), dtype=np.uint8)

def step(self, action):


...
return observation, reward, done, info
def reset(self):
...
return observation # reward, done, info can't be included
def render(self, mode='human'):
...
def close (self):
...

def step(self, actions):


...
...
...
reward = 0
shaping = (
-100 * np.sqrt(state[0] * state[0] + state[1] * state[1])
- 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])
- 100 * abs(state[4])
+ 10 * state[6]
+ 10 * state[7]
) # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping

reward -= (
m_power * 0.30
) # less fuel spent is better, about -30 for heuristic landing. You
should modify these values.
reward -= s_power * 0.03

done = False
if self.game_over or abs(state[0]) >= 1.0:
done = True
reward = -100
if not self.lander.awake:
done = True
reward = +100
return np.array(state, dtype=np.float32), reward, done, {}

def step(self, actions):


...
...
...
state = [ # Remove one component at a time to investigate the effect
on performance!
(pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE /
2),
(pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H
/ SCALE / 2),
vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
self.lander.angle,
20.0 * self.lander.angularVelocity / FPS,
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0,
]

You might also like