Source code for openrl.envs.wrappers.base_wrapper
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""""""
from typing import Any, Dict, Optional, SupportsFloat, Tuple, TypeVar, Union
import gymnasium as gym
from gymnasium.core import ActType, ObsType, WrapperObsType
ArrayType = TypeVar("ArrayType")
[docs]class BaseWrapper(gym.Wrapper):
def __init__(self, env, cfg=None, reward_class=None) -> None:
super().__init__(env)
self.cfg = cfg
self.reward_class = reward_class
@property
def env_name(self):
if hasattr(self.env, "env_name"):
return self.env.env_name
return self.env.unwrapped.spec.id
@property
def agent_num(self):
if hasattr(self.env, "agent_num"):
return self.env.agent_num
else:
raise NotImplementedError("Not support agent_num")
@property
def use_monitor(self):
return False
@property
def has_auto_reset(self):
if hasattr(self.env, "has_auto_reset"):
return self.env.has_auto_reset
else:
return False
[docs] def set_render_mode(self, render_mode: Union[None, str]):
if hasattr(self.env, "set_render_mode"):
self.env.set_render_mode(render_mode)
[docs]class BaseObservationWrapper(BaseWrapper):
[docs] def reset(
self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None
) -> Tuple[WrapperObsType, Dict[str, Any]]:
"""Modifies the :attr:`env` after calling :meth:`reset`, returning a modified observation using :meth:`self.observation`."""
obs, info = self.env.reset(seed=seed, options=options)
return self.observation(obs), info
[docs] def step(
self, action: ActType
) -> Tuple[WrapperObsType, SupportsFloat, bool, bool, Dict[str, Any]]:
"""Modifies the :attr:`env` after calling :meth:`step` using :meth:`self.observation` on the returned observations."""
results = self.env.step(action)
observation = results[0]
new_obs = self.observation(observation)
return new_obs, *results[1:]
[docs] def observation(self, observation: ObsType) -> WrapperObsType:
"""Returns a modified observation.
Args:
observation: The :attr:`env` observation
Returns:
The modified observation
"""
raise NotImplementedError
[docs]class BaseRewardWrapper(BaseWrapper):
def __init__(self, env, cfg=None):
super().__init__(env, cfg)
[docs] def step(
self, action: ActType
) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict[str, Any]]:
"""Modifies the :attr:`env` :meth:`step` reward using :meth:`self.reward`."""
returns = self.env.step(action)
return returns[0], self.reward(returns[1]), *returns[2:]
[docs] def reward(self, reward: ArrayType) -> ArrayType:
"""Returns a modified environment ``reward``.
Args:
reward: The :attr:`env` :meth:`step` reward
Returns:
The modified `reward`
"""
raise NotImplementedError