Source code for openrl.envs.wrappers.monitor
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""""""
import time
from typing import List, Union
import numpy as np
from openrl.envs.wrappers.base_wrapper import BaseWrapper
[docs]class Monitor(BaseWrapper):
"""
A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
:param env: The environment
"""
def __init__(self, env):
super().__init__(env=env)
self.t_start = time.time()
self.rewards = []
self.episode_returns: List[float] = []
self.episode_lengths: List[int] = []
self.episode_times: List[float] = []
self.total_steps = 0
[docs] def reset(self, **kwargs):
"""
Calls the Gym environment reset.
:param kwargs: Extra keywords saved for the next episode. only if defined by reset_keywords
:return: the first observation of the environment
"""
self.rewards = []
return self.env.reset(**kwargs)
[docs] def step(self, action: Union[np.ndarray, int]):
"""
Step the environment with the given action
:param action: the action
:return: observation, reward, done, information or observation, reward, terminal, truncated, information
"""
returns = self.env.step(action)
if len(returns) == 4:
done = returns[2]
elif len(returns) == 5:
done = returns[2] or returns[3]
else:
raise ValueError(
"returns should have length 4 or 5, got length {}".format(len(returns))
)
# print("step", len(self.rewards), "rewards:", returns[1], "done:", done)
self.rewards.append(returns[1])
info = returns[-1]
if np.all(done):
ep_rew = np.sum(self.rewards)
ep_len = len(self.rewards)
ep_info = {
"r": round(ep_rew, 6),
"l": ep_len,
"t": round(time.time() - self.t_start, 6),
}
self.episode_returns.append(ep_rew)
self.episode_lengths.append(ep_len)
self.episode_times.append(time.time() - self.t_start)
info["episode"] = ep_info
self.total_steps += 1
return *returns[:-1], info
[docs] def get_total_steps(self) -> int:
"""
Returns the total number of timesteps
:return:
"""
return self.total_steps
[docs] def get_episode_rewards(self) -> List[float]:
"""
Returns the rewards of all the episodes
:return:
"""
return self.episode_returns
[docs] def get_episode_lengths(self) -> List[int]:
"""
Returns the number of timesteps of all the episodes
:return:
"""
return self.episode_lengths
[docs] def get_episode_times(self) -> List[float]:
"""
Returns the runtime in seconds of all the episodes
:return:
"""
return self.episode_times