Shortcuts

openrl.buffers package

Subpackages

Submodules

openrl.buffers.normal_buffer module

class openrl.buffers.normal_buffer.NormalReplayBuffer(cfg, num_agents, obs_space, act_space, data_client, episode_length=None)[source]

Bases: object

after_update()[source]
compute_returns(next_value, value_normalizer=None)[source]
feed_forward_critic_obs_generator(advantages, num_mini_batch=None, mini_batch_size=None, critic_obs_process_func=None)[source]
feed_forward_generator(advantages, num_mini_batch=None, mini_batch_size=None, critic_obs_process_func=None)[source]
init_buffer(raw_obs, action_masks=None)[source]
insert(raw_obs, rnn_states, rnn_states_critic, actions, action_log_probs, value_preds, rewards, masks, bad_masks=None, active_masks=None, action_masks=None)[source]
naive_recurrent_generator(advantages, num_mini_batch)[source]
recurrent_generator(advantages, num_mini_batch, data_chunk_length)[source]

openrl.buffers.offpolicy_buffer module

class openrl.buffers.offpolicy_buffer.OffPolicyReplayBuffer(cfg, num_agents, obs_space, act_space, data_client, episode_length=None)[source]

Bases: openrl.buffers.normal_buffer.NormalReplayBuffer

get_buffer_size()[source]
insert(raw_obs, rnn_states, rnn_states_critic, actions, action_log_probs, value_preds, rewards, masks, bad_masks=None, active_masks=None, action_masks=None)[source]

openrl.buffers.offpolicy_replay_data module

class openrl.buffers.offpolicy_replay_data.OffPolicyReplayData(cfg, num_agents, obs_space, act_space, data_client=None, episode_length=None)[source]

Bases: openrl.buffers.replay_data.ReplayData

after_update()[source]
compute_returns(next_value, value_normalizer=None)[source]
feed_forward_generator(advantages, num_mini_batch=None, mini_batch_size=None, critic_obs_process_func=None)[source]
init_buffer(raw_obs, action_masks=None)[source]
insert(raw_obs, rnn_states, rnn_states_critic, actions, action_log_probs, value_preds, rewards, masks, bad_masks=None, active_masks=None, action_masks=None)[source]

openrl.buffers.replay_data module

class openrl.buffers.replay_data.ReplayData(cfg, num_agents, obs_space, act_space, data_client=None, episode_length=None)[source]

Bases: object

after_update()[source]
compute_returns(next_value, value_normalizer=None)[source]
feed_forward_critic_obs_generator(advantages, num_mini_batch=None, mini_batch_size=None, critic_obs_process_func=None)[source]
feed_forward_generator(advantages, num_mini_batch=None, mini_batch_size=None, critic_obs_process_func=None)[source]
feed_forward_generator_transformer(advantages, num_mini_batch=None, mini_batch_size=None)[source]

Yield training data for MLP policies. :param advantages: (np.ndarray) advantage estimates. :param num_mini_batch: (int) number of minibatches to split the batch into. :param mini_batch_size: (int) number of samples in each minibatch.

get_batch_data(data_name: str, step: int)[source]
init_buffer(raw_obs, action_masks=None)[source]
insert(raw_obs, rnn_states, rnn_states_critic, actions, action_log_probs, value_preds, rewards, masks, bad_masks=None, active_masks=None, action_masks=None)[source]
naive_recurrent_generator(advantages, num_mini_batch)[source]
recurrent_generator(advantages, num_mini_batch, data_chunk_length)[source]
recurrent_generator_v3(advantages, num_mini_batch, data_chunk_length)[source]

Module contents

class openrl.buffers.NormalReplayBuffer(cfg, num_agents, obs_space, act_space, data_client, episode_length=None)[source]

Bases: object

after_update()[source]
compute_returns(next_value, value_normalizer=None)[source]
feed_forward_critic_obs_generator(advantages, num_mini_batch=None, mini_batch_size=None, critic_obs_process_func=None)[source]
feed_forward_generator(advantages, num_mini_batch=None, mini_batch_size=None, critic_obs_process_func=None)[source]
init_buffer(raw_obs, action_masks=None)[source]
insert(raw_obs, rnn_states, rnn_states_critic, actions, action_log_probs, value_preds, rewards, masks, bad_masks=None, active_masks=None, action_masks=None)[source]
naive_recurrent_generator(advantages, num_mini_batch)[source]
recurrent_generator(advantages, num_mini_batch, data_chunk_length)[source]
class openrl.buffers.OffPolicyReplayBuffer(cfg, num_agents, obs_space, act_space, data_client, episode_length=None)[source]

Bases: openrl.buffers.normal_buffer.NormalReplayBuffer

get_buffer_size()[source]
insert(raw_obs, rnn_states, rnn_states_critic, actions, action_log_probs, value_preds, rewards, masks, bad_masks=None, active_masks=None, action_masks=None)[source]