Professional Documents
Culture Documents
Lander
Lander
make(env_name, render_mode="rgb_array")
vid = VideoRecorder(env, path=f"video/{env_name}_pretraining.mp4")
observation = env.reset()[0]
total_reward = 0
done = False
while not done:
frame = env.render()
vid.capture_frame()
action, states = model.predict(observation, deterministic=True)
observation, reward, done, info, _ = env.step(action)
total_reward += reward
vid.close()
env.close()
print(f"\nTotal reward: {total_reward}")
#show video
html = render_mp4(f"video/{env_name}_pretraining.mp4")
HTML(html)
total_reward = 0
done = False
while not done:
frame = env.render()
vid.capture_frame()
action, states = model.predict(observation, deterministic=True)
observation, reward, done, info, _ = env.step(action)
total_reward += reward
vid.close()
env.close()
print(f"\nTotal reward: {total_reward}")
# show video
html = render_mp4(f"video/{env_name}_learned.mp4")
HTML(html)
# Taken from
https://stable-baselines3.readthedocs.io/en/master/guide/custom_env.htm
l
class CustomEnv(gym.Env):
"""Custom Environment that follows gym interface"""
metadata = {'render.modes': ['human']}
def __init__(self, arg1, arg2):
super(CustomEnv, self).__init__()
# Define action and observation space
# They must be gym.spaces objects
# Example when using discrete actions:
self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
# Example for using image as input (channel-first; channel-last
also works):
self.observation_space = spaces.Box(low=0, high=255,
shape=(N_CHANNELS, HEIGHT,
WIDTH), dtype=np.uint8)
reward -= (
m_power * 0.30
) # less fuel spent is better, about -30 for heuristic landing. You
should modify these values.
reward -= s_power * 0.03
done = False
if self.game_over or abs(state[0]) >= 1.0:
done = True
reward = -100
if not self.lander.awake:
done = True
reward = +100
return np.array(state, dtype=np.float32), reward, done, {}