openrl.algorithms package¶

Submodules¶

openrl.algorithms.a2c module¶

class openrl.algorithms.a2c.A2CAlgorithm(cfg, init_module, agent_num: int = 1, device: Union[str, torch.device] = 'cpu')[源代码]¶

基类：openrl.algorithms.ppo.PPOAlgorithm

prepare_loss(critic_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, masks_batch, action_masks_batch, old_action_log_probs_batch, adv_targ, value_preds_batch, return_batch, active_masks_batch, turn_on)[源代码]¶

train(buffer, turn_on: bool = True)[源代码]¶

openrl.algorithms.base_algorithm module¶

class openrl.algorithms.base_algorithm.BaseAlgorithm(cfg, init_module, agent_num: int, device=device(type='cpu'))[源代码]¶

基类：abc.ABC

prep_rollout()[源代码]¶

prep_training()[源代码]¶

abstract train(buffer, turn_on=True)[源代码]¶

openrl.algorithms.behavior_cloning module¶

class openrl.algorithms.behavior_cloning.BCAlgorithm(cfg, init_module, agent_num: int = 1, device: Union[str, torch.device] = 'cpu')[源代码]¶

基类：openrl.algorithms.base_algorithm.BaseAlgorithm

bc_update(sample, turn_on=True)[源代码]¶

construct_loss_list(policy_loss, dist_entropy, value_loss, turn_on)[源代码]¶

get_data_generator(buffer)[源代码]¶

prepare_loss(critic_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, masks_batch, action_masks_batch, old_action_log_probs_batch, adv_targ, value_preds_batch, return_batch, active_masks_batch, turn_on)[源代码]¶

to_single_np(input)[源代码]¶

train(buffer, turn_on=True)[源代码]¶

train_bc(buffer, turn_on)[源代码]¶

openrl.algorithms.ddpg module¶

class openrl.algorithms.ddpg.DDPGAlgorithm(cfg, init_module, agent_num: int = 1, device: Union[str, torch.device] = 'cpu')[源代码]¶

基类：openrl.algorithms.base_algorithm.BaseAlgorithm

cal_value_loss(value_normalizer, values, value_preds_batch, return_batch, active_masks_batch)[源代码]¶

ddpg_update(sample, turn_on=True)[源代码]¶

prepare_actor_loss(obs_batch, next_obs_batch, rnn_states_batch, actions_batch, masks_batch, action_masks_batch, value_preds_batch, rewards_batch, active_masks_batch, turn_on)[源代码]¶

prepare_critic_loss(obs_batch, next_obs_batch, rnn_states_batch, actions_batch, masks_batch, next_masks_batch, action_masks_batch, value_preds_batch, rewards_batch, active_masks_batch, turn_on)[源代码]¶

to_single_np(input)[源代码]¶

train(buffer, turn_on=True)[源代码]¶

openrl.algorithms.dqn module¶

class openrl.algorithms.dqn.DQNAlgorithm(cfg, init_module, agent_num: int = 1, device: Union[str, torch.device] = 'cpu')[源代码]¶

基类：openrl.algorithms.base_algorithm.BaseAlgorithm

dqn_update(sample, turn_on=True)[源代码]¶

prepare_loss(obs_batch, next_obs_batch, rnn_states_batch, actions_batch, masks_batch, next_masks_batch, action_masks_batch, value_preds_batch, rewards_batch, active_masks_batch, turn_on)[源代码]¶

to_single_np(input)[源代码]¶

train(buffer, turn_on=True)[源代码]¶

openrl.algorithms.gail module¶

class openrl.algorithms.gail.GAILAlgorithm(cfg, init_module, agent_num: int = 1, device: Union[str, torch.device] = 'cpu')[源代码]¶

基类：openrl.algorithms.ppo.PPOAlgorithm

train_gail(buffer, turn_on)[源代码]¶

openrl.algorithms.mat module¶

class openrl.algorithms.mat.MATAlgorithm(cfg, init_module, agent_num: int = 1, device: Union[str, torch.device] = 'cpu')[源代码]¶

基类：openrl.algorithms.ppo.PPOAlgorithm

construct_loss_list(policy_loss, dist_entropy, value_loss, turn_on)[源代码]¶

get_data_generator(buffer, advantages)[源代码]¶

openrl.algorithms.ppo module¶

class openrl.algorithms.ppo.PPOAlgorithm(cfg, init_module, agent_num: int = 1, device: Union[str, torch.device] = 'cpu')[源代码]¶

基类：openrl.algorithms.base_algorithm.BaseAlgorithm

cal_value_loss(value_normalizer, values, value_preds_batch, return_batch, active_masks_batch)[源代码]¶

construct_loss_list(policy_loss, dist_entropy, value_loss, turn_on)[源代码]¶

get_data_generator(buffer, advantages)[源代码]¶

ppo_update(sample, turn_on=True)[源代码]¶

prepare_loss(critic_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, masks_batch, action_masks_batch, old_action_log_probs_batch, adv_targ, value_preds_batch, return_batch, active_masks_batch, turn_on)[源代码]¶

to_single_np(input)[源代码]¶

train(buffer, turn_on=True)[源代码]¶

train_ppo(buffer, turn_on)[源代码]¶

openrl.algorithms.sac module¶

class openrl.algorithms.sac.SACAlgorithm(cfg, init_module, agent_num: int = 1, device: Union[str, torch.device] = 'cpu')[源代码]¶

基类：openrl.algorithms.base_algorithm.BaseAlgorithm

cal_value_loss(value_normalizer, values, value_preds_batch, return_batch, active_masks_batch)[源代码]¶

prepare_actor_loss(obs_batch, next_obs_batch, rnn_states_batch, actions_batch, masks_batch, action_masks_batch, value_preds_batch, rewards_batch, active_masks_batch, turn_on)[源代码]¶

prepare_alpha_loss(log_prob)[源代码]¶

prepare_critic_loss(obs_batch, next_obs_batch, rnn_states_batch, actions_batch, masks_batch, next_masks_batch, action_masks_batch, value_preds_batch, rewards_batch, active_masks_batch, turn_on)[源代码]¶

sac_update(sample, turn_on=True)[源代码]¶

to_single_np(input)[源代码]¶

train(buffer, turn_on=True)[源代码]¶

openrl.algorithms.vdn module¶

class openrl.algorithms.vdn.VDNAlgorithm(cfg, init_module, agent_num: int = 1, device: Union[str, torch.device] = 'cpu')[源代码]¶

基类：openrl.algorithms.base_algorithm.BaseAlgorithm

cal_value_loss(value_normalizer, values, value_preds_batch, return_batch, active_masks_batch)[源代码]¶

prepare_loss(obs_batch, next_obs_batch, rnn_states_batch, actions_batch, masks_batch, next_masks_batch, action_masks_batch, value_preds_batch, rewards_batch, active_masks_batch, turn_on)[源代码]¶

to_single_np(input)[源代码]¶

train(buffer, turn_on=True)[源代码]¶

vdn_update(sample, turn_on=True)[源代码]¶

openrl.algorithms package¶

Submodules¶

openrl.algorithms.a2c module¶

openrl.algorithms.base_algorithm module¶

openrl.algorithms.behavior_cloning module¶

openrl.algorithms.ddpg module¶

openrl.algorithms.dqn module¶

openrl.algorithms.gail module¶

openrl.algorithms.mat module¶

openrl.algorithms.ppo module¶

openrl.algorithms.sac module¶

openrl.algorithms.vdn module¶

Module contents¶