Shortcuts

Source code for openrl.envs.wrappers.monitor

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""""""
import time
from typing import List, Union

import numpy as np

from openrl.envs.wrappers.base_wrapper import BaseWrapper


[docs]class Monitor(BaseWrapper): """ A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data. :param env: The environment """ def __init__(self, env): super().__init__(env=env) self.t_start = time.time() self.rewards = [] self.episode_returns: List[float] = [] self.episode_lengths: List[int] = [] self.episode_times: List[float] = [] self.total_steps = 0
[docs] def reset(self, **kwargs): """ Calls the Gym environment reset. :param kwargs: Extra keywords saved for the next episode. only if defined by reset_keywords :return: the first observation of the environment """ self.rewards = [] return self.env.reset(**kwargs)
[docs] def step(self, action: Union[np.ndarray, int]): """ Step the environment with the given action :param action: the action :return: observation, reward, done, information or observation, reward, terminal, truncated, information """ returns = self.env.step(action) if len(returns) == 4: done = returns[2] elif len(returns) == 5: done = returns[2] or returns[3] else: raise ValueError( "returns should have length 4 or 5, got length {}".format(len(returns)) ) # print("step", len(self.rewards), "rewards:", returns[1], "done:", done) self.rewards.append(returns[1]) info = returns[-1] if np.all(done): ep_rew = np.sum(self.rewards) ep_len = len(self.rewards) ep_info = { "r": round(ep_rew, 6), "l": ep_len, "t": round(time.time() - self.t_start, 6), } self.episode_returns.append(ep_rew) self.episode_lengths.append(ep_len) self.episode_times.append(time.time() - self.t_start) info["episode"] = ep_info self.total_steps += 1 return *returns[:-1], info
[docs] def get_total_steps(self) -> int: """ Returns the total number of timesteps :return: """ return self.total_steps
[docs] def get_episode_rewards(self) -> List[float]: """ Returns the rewards of all the episodes :return: """ return self.episode_returns
[docs] def get_episode_lengths(self) -> List[int]: """ Returns the number of timesteps of all the episodes :return: """ return self.episode_lengths
[docs] def get_episode_times(self) -> List[float]: """ Returns the runtime in seconds of all the episodes :return: """ return self.episode_times