Shortcuts

openrl.runners.common package

Submodules

openrl.runners.common.a2c_agent module

class openrl.runners.common.a2c_agent.A2CAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'GAILAgent')[source]

Bases: openrl.runners.common.ppo_agent.PPOAgent

train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.a2c.A2CAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None, driver_class: Type[openrl.drivers.base_driver.BaseDriver] = <class 'openrl.drivers.onpolicy_driver.OnPolicyDriver'>) None[source]

openrl.runners.common.base_agent module

class openrl.runners.common.base_agent.BaseAgent[source]

Bases: abc.ABC

get_env() Optional[openrl.envs.vec_env.base_venv.BaseVecEnv][source]

Returns the current environment (can be None if not defined).

Returns

The current environment

abstract load(path: Union[str, pathlib.Path, io.BufferedIOBase])[source]
property logger: openrl.utils.logger.Logger

Getter for the logger object.

abstract save(path: Union[str, pathlib.Path, io.BufferedIOBase]) None[source]

openrl.runners.common.bc_agent module

class openrl.runners.common.bc_agent.BCAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'GAILAgent')[source]

Bases: openrl.runners.common.ppo_agent.PPOAgent

train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.behavior_cloning.BCAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None, driver_class: Type[openrl.drivers.base_driver.BaseDriver] = <class 'openrl.drivers.offline_driver.OfflineDriver'>) None[source]

openrl.runners.common.chat_agent module

class openrl.runners.common.chat_agent.Chat6BAgent(model, tokenizer, device=None)[source]

Bases: openrl.runners.common.chat_agent.ChatAgent

chat(input: str, history: List[str])[source]
classmethod load(agent_path: Union[str, pathlib.Path, io.BufferedIOBase], device='cuda:0') openrl.runners.common.base_agent.SelfAgent[source]
class openrl.runners.common.chat_agent.ChatAgent(model, tokenizer, device=None)[source]

Bases: openrl.runners.common.base_agent.BaseAgent

chat(input: str, history: List[str])[source]
classmethod load(agent_path: Union[str, pathlib.Path, io.BufferedIOBase], tokenizer: Optional[Union[str, pathlib.Path, io.BufferedIOBase]] = None, disable_cuda: Optional[bool] = True) openrl.runners.common.base_agent.SelfAgent[source]
save(path: Union[str, pathlib.Path, io.BufferedIOBase]) None[source]

openrl.runners.common.ddpg_agent module

class openrl.runners.common.ddpg_agent.DDPGAgent(net: Optional[torch.nn.modules.module.Module] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False)[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], deterministic: bool) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.ddpg.DDPGAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None) None[source]

openrl.runners.common.dqn_agent module

class openrl.runners.common.dqn_agent.DQNAgent(net: Optional[torch.nn.modules.module.Module] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False)[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], deterministic: bool = True) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.dqn.DQNAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None) None[source]

openrl.runners.common.gail_agent module

class openrl.runners.common.gail_agent.GAILAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'GAILAgent')[source]

Bases: openrl.runners.common.ppo_agent.PPOAgent

train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.gail.GAILAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None) None[source]

openrl.runners.common.mat_agent module

class openrl.runners.common.mat_agent.MATAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'PPOAgent')[source]

Bases: openrl.runners.common.ppo_agent.PPOAgent

train(total_time_steps: int, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.mat.MATAlgorithm'>) None[source]

openrl.runners.common.ppo_agent module

class openrl.runners.common.ppo_agent.PPOAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'PPOAgent')[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], info: Optional[List[Dict[str, Any]]] = None, deterministic: bool = True, episode_starts: Optional[numpy.ndarray] = None) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.ppo.PPOAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None, driver_class: Type[openrl.drivers.base_driver.BaseDriver] = <class 'openrl.drivers.onpolicy_driver.OnPolicyDriver'>) None[source]

openrl.runners.common.rl_agent module

class openrl.runners.common.rl_agent.RLAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'RLAgent')[source]

Bases: openrl.runners.common.base_agent.BaseAgent

abstract act(**kwargs) None[source]
load(path: Union[str, pathlib.Path, io.BufferedIOBase]) None[source]
load_policy(path: Union[str, pathlib.Path, io.BufferedIOBase]) None[source]
reset()[source]
save(path: Union[str, pathlib.Path, io.BufferedIOBase]) None[source]
set_env(env: Union[gym.core.Env, str])[source]
abstract train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None) None[source]

openrl.runners.common.sac_agent module

class openrl.runners.common.sac_agent.SACAgent(net: Optional[torch.nn.modules.module.Module] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'SACAgent')[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], deterministic=True) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.sac.SACAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None, driver_class: Type[openrl.drivers.base_driver.BaseDriver] = <class 'openrl.drivers.offpolicy_driver.OffPolicyDriver'>) None[source]

openrl.runners.common.vdn_agent module

class openrl.runners.common.vdn_agent.VDNAgent(net: Optional[torch.nn.modules.module.Module] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False)[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], deterministic=None) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.vdn.VDNAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None) None[source]

Module contents

class openrl.runners.common.A2CAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'GAILAgent')[source]

Bases: openrl.runners.common.ppo_agent.PPOAgent

train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.a2c.A2CAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None, driver_class: Type[openrl.drivers.base_driver.BaseDriver] = <class 'openrl.drivers.onpolicy_driver.OnPolicyDriver'>) None[source]
class openrl.runners.common.BCAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'GAILAgent')[source]

Bases: openrl.runners.common.ppo_agent.PPOAgent

train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.behavior_cloning.BCAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None, driver_class: Type[openrl.drivers.base_driver.BaseDriver] = <class 'openrl.drivers.offline_driver.OfflineDriver'>) None[source]
class openrl.runners.common.Chat6BAgent(model, tokenizer, device=None)[source]

Bases: openrl.runners.common.chat_agent.ChatAgent

chat(input: str, history: List[str])[source]
classmethod load(agent_path: Union[str, pathlib.Path, io.BufferedIOBase], device='cuda:0') openrl.runners.common.base_agent.SelfAgent[source]
class openrl.runners.common.ChatAgent(model, tokenizer, device=None)[source]

Bases: openrl.runners.common.base_agent.BaseAgent

chat(input: str, history: List[str])[source]
classmethod load(agent_path: Union[str, pathlib.Path, io.BufferedIOBase], tokenizer: Optional[Union[str, pathlib.Path, io.BufferedIOBase]] = None, disable_cuda: Optional[bool] = True) openrl.runners.common.base_agent.SelfAgent[source]
save(path: Union[str, pathlib.Path, io.BufferedIOBase]) None[source]
class openrl.runners.common.DDPGAgent(net: Optional[torch.nn.modules.module.Module] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False)[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], deterministic: bool) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.ddpg.DDPGAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None) None[source]
class openrl.runners.common.DQNAgent(net: Optional[torch.nn.modules.module.Module] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False)[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], deterministic: bool = True) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.dqn.DQNAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None) None[source]
class openrl.runners.common.GAILAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'GAILAgent')[source]

Bases: openrl.runners.common.ppo_agent.PPOAgent

train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.gail.GAILAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None) None[source]
class openrl.runners.common.MATAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'PPOAgent')[source]

Bases: openrl.runners.common.ppo_agent.PPOAgent

train(total_time_steps: int, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.mat.MATAlgorithm'>) None[source]
class openrl.runners.common.PPOAgent(net: Optional[Union[torch.nn.modules.module.Module, openrl.modules.common.base_net.BaseNet]] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'PPOAgent')[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], info: Optional[List[Dict[str, Any]]] = None, deterministic: bool = True, episode_starts: Optional[numpy.ndarray] = None) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.ppo.PPOAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None, driver_class: Type[openrl.drivers.base_driver.BaseDriver] = <class 'openrl.drivers.onpolicy_driver.OnPolicyDriver'>) None[source]
class openrl.runners.common.SACAgent(net: Optional[torch.nn.modules.module.Module] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False, project_name: str = 'SACAgent')[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], deterministic=True) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.sac.SACAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None, driver_class: Type[openrl.drivers.base_driver.BaseDriver] = <class 'openrl.drivers.offpolicy_driver.OffPolicyDriver'>) None[source]
class openrl.runners.common.VDNAgent(net: Optional[torch.nn.modules.module.Module] = None, env: Union[gym.core.Env, str] = None, run_dir: Optional[str] = None, env_num: Optional[int] = None, rank: int = 0, world_size: int = 1, use_wandb: bool = False, use_tensorboard: bool = False)[source]

Bases: openrl.runners.common.rl_agent.RLAgent

act(observation: Union[numpy.ndarray, Dict[str, numpy.ndarray]], deterministic=None) Tuple[numpy.ndarray, Optional[Tuple[numpy.ndarray, ...]]][source]
train(total_time_steps: int, callback: Union[None, Callable, List[openrl.utils.callbacks.callbacks.BaseCallback], openrl.utils.callbacks.callbacks.BaseCallback] = None, train_algo_class: Type[openrl.algorithms.base_algorithm.BaseAlgorithm] = <class 'openrl.algorithms.vdn.VDNAlgorithm'>, logger: Optional[openrl.utils.logger.Logger] = None) None[source]