Shortcuts

openrl.modules package

Subpackages

Submodules

openrl.modules.base_module module

class openrl.modules.base_module.BaseModule(cfg)[source]

Bases: abc.ABC

convert_distributed_model() None[source]
abstract lr_decay(episode: int, episodes: int) None[source]
abstract restore(model_dir: str) None[source]
abstract save(save_dir: str) None[source]

openrl.modules.bc_module module

class openrl.modules.bc_module.BCModule(cfg, policy_input_space: gym.spaces.box.Box, critic_input_space: gym.spaces.box.Box, act_space: gym.spaces.box.Box, share_model: bool = False, device: Union[str, torch.device] = 'cpu', rank: Optional[int] = None, world_size: Optional[int] = None, model_dict: Optional[Dict[str, Any]] = None)[source]

Bases: openrl.modules.rl_module.RLModule

act(obs, rnn_states_actor, masks, action_masks=None, deterministic=False)[source]
evaluate_actions(critic_obs, obs, rnn_states_actor, rnn_states_critic, action, masks, action_masks=None, active_masks=None, critic_masks_batch=None)[source]
get_actions(critic_obs, obs, rnn_states_actor, rnn_states_critic, masks, action_masks=None, deterministic=False)[source]
get_critic_value_normalizer()[source]
get_model_configs(cfg) Dict[str, Any][source]
get_values(critic_obs, rnn_states_critic, masks)[source]
static init_rnn_states(rollout_num: int, agent_num: int, rnn_layers: int, hidden_size: int)[source]
lr_decay(episode, episodes)[source]

openrl.modules.ddpg_module module

class openrl.modules.ddpg_module.DDPGModule(cfg, input_space: gym.spaces.box.Box, act_space: gym.spaces.box.Box, device: Union[str, torch.device] = 'cpu', rank: Optional[int] = None, world_size: Optional[int] = None, model_dict: Optional[Dict[str, Any]] = None)[source]

Bases: openrl.modules.rl_module.RLModule

act(obs, deterministic: bool)[source]
evaluate_actions(obs_batch, next_obs_batch, rnn_states_batch, rewards_batch, actions_batch, masks, action_masks=None, masks_batch=None)[source]
evaluate_actor_loss(obs_batch, next_obs_batch, rnn_states_batch, rewards_batch, actions_batch, masks, action_masks=None, masks_batch=None)[source]
evaluate_critic_loss(obs_batch, next_obs_batch, rnn_states_batch, rewards_batch, actions_batch, masks, next_masks_batch, action_masks=None, masks_batch=None)[source]
get_actions(obs)[source]
get_critic_value_normalizer()[source]
get_values(obs, action, rnn_states_critic, masks)[source]
static init_rnn_states(rollout_num: int, agent_num: int, rnn_layers: int, hidden_size: int)[source]
lr_decay(episode, episodes)[source]

openrl.modules.dqn_module module

class openrl.modules.dqn_module.DQNModule(cfg, input_space: gym.spaces.box.Box, act_space: gym.spaces.box.Box, device: Union[str, torch.device] = 'cpu', rank: Optional[int] = None, world_size: Optional[int] = None, model_dict: Optional[Dict[str, Any]] = None)[source]

Bases: openrl.modules.rl_module.RLModule

act(obs, rnn_states_actor, masks, action_masks=None)[source]
evaluate_actions(obs_batch, next_obs_batch, rnn_states_batch, rewards_batch, actions_batch, masks, next_masks, action_masks=None, masks_batch=None, critic_masks_batch=None)[source]
get_actions(obs, rnn_states, masks, action_masks=None)[source]
get_critic_value_normalizer()[source]
get_values(obs, rnn_states_critic, masks)[source]
static init_rnn_states(rollout_num: int, agent_num: int, rnn_layers: int, hidden_size: int)[source]
lr_decay(episode, episodes)[source]

openrl.modules.gail_module module

class openrl.modules.gail_module.GAILModule(cfg, policy_input_space: gym.spaces.box.Box, critic_input_space: gym.spaces.box.Box, act_space: gym.spaces.box.Box, share_model: bool = False, device: Union[str, torch.device] = 'cpu', rank: Optional[int] = None, world_size: Optional[int] = None, model_dict: Optional[Dict[str, Any]] = None)[source]

Bases: openrl.modules.ppo_module.PPOModule

get_model_configs(cfg) Dict[str, Any][source]

openrl.modules.model_config module

class openrl.modules.model_config.ModelConfig(*args, **kwargs)[source]

Bases: dict

class openrl.modules.model_config.ModelTrainConfig(model: torch.nn.modules.module.Module, input_space: gym.spaces.box.Box, lr: Optional[float] = None, *args, **kwargs)[source]

Bases: openrl.modules.model_config.ModelConfig

openrl.modules.ppo_module module

class openrl.modules.ppo_module.PPOModule(cfg, policy_input_space: gym.spaces.box.Box, critic_input_space: gym.spaces.box.Box, act_space: gym.spaces.box.Box, share_model: bool = False, device: Union[str, torch.device] = 'cpu', rank: Optional[int] = None, world_size: Optional[int] = None, model_dict: Optional[Dict[str, Any]] = None)[source]

Bases: openrl.modules.rl_module.RLModule

act(obs, rnn_states_actor, masks, action_masks=None, deterministic=False)[source]
evaluate_actions(critic_obs, obs, rnn_states_actor, rnn_states_critic, action, masks, action_masks=None, active_masks=None, critic_masks_batch=None)[source]
get_actions(critic_obs, obs, rnn_states_actor, rnn_states_critic, masks, action_masks=None, deterministic=False)[source]
get_critic_value_normalizer()[source]
get_model_configs(cfg) Dict[str, Any][source]
get_values(critic_obs, rnn_states_critic, masks)[source]
static init_rnn_states(rollout_num: int, agent_num: int, rnn_layers: int, hidden_size: int)[source]
lr_decay(episode, episodes)[source]

openrl.modules.rl_module module

class openrl.modules.rl_module.RLModule(cfg, act_space: gym.spaces.box.Box, rank: int = 0, world_size: int = 1, device: Union[str, torch.device] = 'cpu', model_configs: Optional[Dict[str, openrl.modules.model_config.ModelTrainConfig]] = None)[source]

Bases: openrl.modules.base_module.BaseModule

abstract act()[source]
abstract evaluate_actions()[source]
abstract get_actions()[source]
abstract get_critic_value_normalizer()[source]
get_model_configs(cfg) Dict[str, Any][source]
abstract get_values()[source]
load_policy(model_path: str) None[source]
restore(model_dir: str) None[source]
save(save_dir: str) None[source]

openrl.modules.sac_module module

class openrl.modules.sac_module.SACModule(cfg, input_space: gym.spaces.box.Box, act_space: gym.spaces.box.Box, device: Union[str, torch.device] = 'cpu', rank: Optional[int] = None, world_size: Optional[int] = None, model_dict: Optional[Dict[str, Any]] = None)[source]

Bases: openrl.modules.rl_module.RLModule

act(obs, deterministic=True)[source]
evaluate_actions()[source]
evaluate_actor_loss(obs_batch, next_obs_batch, rnn_states_batch, rewards_batch, actions_batch, masks, action_masks=None, masks_batch=None)[source]
get_actions(obs, deterministic=True)[source]
get_critic_value_normalizer()[source]
get_q_values(obs_batch, next_obs_batch, rnn_states_batch, rewards_batch, actions_batch, masks, action_masks=None, masks_batch=None)[source]
get_values(obs, action, rnn_states_critic, masks)[source]
static init_rnn_states(rollout_num: int, agent_num: int, rnn_layers: int, hidden_size: int)[source]
lr_decay(episode, episodes)[source]

openrl.modules.vdn_module module

class openrl.modules.vdn_module.VDNModule(cfg, input_space: gym.spaces.box.Box, act_space: gym.spaces.box.Box, device: Union[str, torch.device] = 'cpu', rank: Optional[int] = None, world_size: Optional[int] = None, model_dict: Optional[Dict[str, Any]] = None)[source]

Bases: openrl.modules.rl_module.RLModule

act(obs, rnn_states_actor, masks, action_masks=None)[source]
evaluate_actions(obs_batch, next_obs_batch, rnn_states_batch, rewards_batch, actions_batch, masks, action_masks=None, masks_batch=None, critic_masks_batch=None)[source]
get_actions(obs, rnn_states, masks, action_masks=None)[source]
get_critic_value_normalizer()[source]
get_values(obs, rnn_states_critic, masks)[source]
static init_rnn_states(rollout_num: int, agent_num: int, rnn_layers: int, hidden_size: int)[source]
lr_decay(episode, episodes)[source]

Module contents