Source code for openrl.configs.config
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""""""
from typing import List
from jsonargparse import ActionConfigFile, ArgumentParser
from openrl.configs.utils import ProcessYamlAction
[docs]def create_config_parser():
"""
The configuration parser.
"""
parser = ArgumentParser(
description="openrl",
)
parser.add_argument("--config", action=ProcessYamlAction)
parser.add_argument("--seed", type=int, default=0, help="Random seed.")
# For Transformers
parser.add_argument("--encode_state", action="store_true", default=False)
parser.add_argument("--n_block", type=int, default=1)
parser.add_argument("--n_embd", type=int, default=64)
parser.add_argument("--n_head", type=int, default=1)
parser.add_argument("--dec_actor", action="store_true", default=False)
parser.add_argument("--share_actor", action="store_true", default=False)
parser.add_argument("--callbacks", type=List[dict])
# For Stable-baselines3
parser.add_argument(
"--sb3_model_path",
type=str,
default=None,
help="stable-baselines3 model path",
)
parser.add_argument(
"--sb3_algo",
type=str,
default=None,
help="stable-baselines3 algorithm",
)
# For Hierarchical RL
parser.add_argument(
"--step_difference",
type=int,
default=1,
help="Frequency difference between Controller's step and Executor's step",
)
# For GAIL
parser.add_argument(
"--gail",
action="store_true",
default=False,
help="do imitation learning with gail",
)
parser.add_argument(
"--expert_data",
type=str,
default=None,
help="directory that contains expert demonstrations for gail",
)
parser.add_argument(
"--gail_batch_size",
type=int,
default=128,
help="gail batch size (default: 128)",
)
parser.add_argument(
"--dis_input_len", type=int, default=None, help="gail input length"
)
parser.add_argument(
"--gail_loss_target",
type=float,
default=None,
help="gail loss target at warm up",
)
parser.add_argument(
"--gail_epoch", type=int, default=5, help="gail epochs (default: 5)"
)
parser.add_argument(
"--gail_use_action",
type=bool,
default=True,
help="whether to use action as the input of the gail discriminator",
)
parser.add_argument(
"--gail_hidden_size",
type=int,
default=256,
help="gail hidden state size (default: 256)",
)
parser.add_argument(
"--gail_layer_num",
type=int,
default=3,
help="gail hidden layer number (default: 3)",
)
parser.add_argument(
"--gail_lr", type=float, default=5e-4, help="learning rate (default: 5e-4)"
)
# For Data Collector
parser.add_argument(
"--data_dir", type=str, default=None, help="data save directory."
)
parser.add_argument(
"--force_rewrite",
action="store_true",
default=False,
help="by default False, will delete the data save directory if it exists.",
)
parser.add_argument(
"--collector_num", type=int, default=1, help="number of collectors"
)
# For convert
parser.add_argument(
"--input_data_dir", type=str, default=None, help="input save directory."
)
parser.add_argument(
"--output_data_dir", type=str, default=None, help="output data directory."
)
parser.add_argument("--worker_num", type=int, default=1, help="number of workers")
parser.add_argument(
"--sample_interval", type=int, default=1, help="data sample interval"
)
# For Self-Play
parser.add_argument(
"--selfplay_api.host",
default="127.0.0.1",
type=str,
help="host for selfplay api",
)
parser.add_argument(
"--selfplay_api.port",
default=10086,
type=int,
help="port for selfplay api",
)
parser.add_argument(
"--lazy_load_opponent",
default=True,
type=bool,
help=(
"if true, when the opponents are the same opponent_type, will only load the"
" weight. Otherwise, will load the pythoon script."
),
)
parser.add_argument(
"--self_play",
action="store_true",
default=False,
help="whether to use selfplay",
)
parser.add_argument(
"--selfplay_algo",
type=str,
default="WeightExistEnemy",
help="choose selfplay algorithm",
)
parser.add_argument(
"--max_play_num",
type=int,
default=2000,
help="upper bound of each enemy's play list length",
)
parser.add_argument(
"--max_enemy_num",
type=int,
default=-1,
help="upper bound of enemy model's number exclusive of existing enemies",
)
parser.add_argument(
"--exist_enemy_num", type=int, default=0, help="exist enemy num"
)
parser.add_argument(
"--random_pos",
type=int,
default=-1,
help="random enemy model's position in enemy pool",
)
parser.add_argument(
"--build_in_pos",
type=int,
default=-1,
help="build-in enemy model's position in enemy pool",
)
# For AMP
parser.add_argument(
"--use_amp",
type=bool,
default=False,
help="use mixed precision training",
)
# For Optimizer
parser.add_argument(
"--load_optimizer",
action="store_true",
default=False,
help="whether to restore optimizer",
)
# For JRPO
parser.add_argument(
"--use_joint_action_loss",
type=bool,
default=False,
help="whether to use joint action loss",
)
# For Game Wrapper
parser.add_argument(
"--frameskip",
type=int,
default=None,
help="whether to use frameskip, default is None",
)
# For Evaluation
parser.add_argument(
"--eval_render",
default=False,
action="store_true",
help="whether to render during evaluation",
)
# For JiDi evaluation
# parser.add_argument("--switch_two_side", default=False, action="store_true",help="whether to evaluate twice to switch two side")
# For Distributed Training
parser.add_argument(
"--terminal",
default="current_terminal",
choices=[
"local",
"current_terminal",
"tmux_session",
"ssh_tmux_session",
"k8s",
"k8s_single",
],
help="which terminal to use",
)
parser.add_argument(
"--distributed_type",
type=str,
default="sync",
help="distributed type to use actors.",
choices=["sync", "async"],
)
parser.add_argument(
"--program_type",
type=str,
default="local",
help="running type of current program.",
choices=[
"local",
"whole",
"actor",
"learner",
"server",
"server_learner",
"local_evaluator",
"remote_evaluator",
],
)
parser.add_argument(
"--share_temp_dir", default=None, help="temp directory to store job.pkl"
)
parser.add_argument(
"--share_entry_script_path",
default=None,
help="common path for the process_entry.py file",
)
parser.add_argument("--learner_num", type=int, default=1, help="number of learners")
parser.add_argument(
"--fetch_num",
type=int,
default=1,
help="number of actors' data to train for a learner",
)
parser.add_argument(
"--tmux_prefix",
default=None,
type=str,
help="prefix which will be added to tmux session",
)
parser.add_argument(
"--kill_all",
default=False,
action="store_true",
help="kill all the tmux session",
)
parser.add_argument(
"--namespace", default="default", type=str, help="namespace of pods"
)
# For k8s
parser.add_argument(
"--mount_path", default=None, type=str, help="Volume mount path"
)
parser.add_argument(
"--mount_name", default=None, type=str, help="Volume mount name"
)
parser.add_argument(
"--persistent_volume_claim_name",
default=None,
type=str,
help="Persistent volume claim name",
)
# For Debug
parser.add_argument(
"--disable_training",
action="store_true",
default=False,
help="disable training",
)
# For Actor
parser.add_argument(
"--use_half_actor",
action="store_true",
default=False,
help="whether to use half float for actors",
)
parser.add_argument(
"--algorithm_name",
type=str,
default="ppo",
help="The algorithm name.",
)
parser.add_argument(
"--experiment_name",
type=str,
default="",
help="an identifier to distinguish different experiment.",
)
parser.add_argument(
"--gpu_usage_type",
type=str,
default="auto",
choices=["auto", "single"],
help=(
"by default auto, will determine the GPU automatically. If using single,"
" use only use single GPU."
),
)
parser.add_argument(
"--disable_cuda",
action="store_true",
default=False,
help="by default False, will use GPU to train; or else will use CPU;",
)
parser.add_argument(
"--cuda_deterministic",
action="store_false",
default=True,
help=(
"by default, make sure random seed effective. if set, bypass such function."
),
)
parser.add_argument(
"--pytorch_threads",
type=int,
default=1,
help="Number of torch threads for training",
)
parser.add_argument(
"--n_rollout_threads",
type=int,
default=32,
help="Number of parallel envs for training rollout",
)
parser.add_argument(
"--n_eval_rollout_threads",
type=int,
default=1,
help="Number of parallel envs for evaluating rollout",
)
parser.add_argument(
"--n_render_rollout_threads",
type=int,
default=1,
help="Number of parallel envs for rendering rollout",
)
parser.add_argument(
"--num_env_steps",
type=int,
default=int(10e6),
help="Number of environment steps to train (default: 10e6)",
)
parser.add_argument(
"--user_name",
type=str,
default="openrl",
help="user name for the running process",
)
parser.add_argument(
"--wandb_entity",
type=str,
default=None,
help=(
"[for wandb usage], to specify entity for simply collecting training data."
),
)
parser.add_argument(
"--disable_wandb",
action="store_true",
default=False,
help=(
"[for wandb usage], by default False, will log date to wandb server. or"
" else will use tensorboard to log data."
),
)
# env parameters
parser.add_argument(
"--env_name",
type=str,
default="StarCraft2",
help="specify the name of environment",
)
parser.add_argument(
"--scenario_name",
type=str,
default="default",
help="specify the name of scenario",
)
parser.add_argument("--num_agents", type=int, default=1, help="number of players")
parser.add_argument("--num_enemies", type=int, default=1, help="number of enemies")
parser.add_argument(
"--use_obs_instead_of_state",
action="store_true",
default=False,
help="Whether to use global state or concatenated obs",
)
# replay buffer parameters
parser.add_argument(
"--episode_length", type=int, default=200, help="episode length for training"
)
parser.add_argument(
"--eval_episode_length",
type=int,
default=200,
help="episode length for evaluation",
)
parser.add_argument(
"--max_episode_length",
type=int,
default=None,
help="Max length for any episode",
)
# network parameters
parser.add_argument(
"--separate_policy",
action="store_true",
default=False,
help="Whether agent separate the policy",
)
parser.add_argument(
"--use_conv1d", action="store_true", default=False, help="Whether to use conv1d"
)
parser.add_argument(
"--stacked_frames",
type=int,
default=1,
help="Dimension of hidden layers for actor/critic networks",
)
parser.add_argument(
"--use_stacked_frames",
action="store_true",
default=False,
help="Whether to use stacked_frames",
)
parser.add_argument(
"--hidden_size",
type=int,
default=64,
help="Dimension of hidden layers for actor/critic networks",
) # different network may need different size
parser.add_argument(
"--layer_N",
type=int,
default=1,
help="Number of layers for actor/critic networks",
)
parser.add_argument(
"--activation_id",
type=int,
default=1,
help="choose 0 to use tanh, 1 to use relu, 2 to use leaky relu, 3 to use selu",
)
parser.add_argument(
"--use_popart",
default=False,
type=bool,
help="by default False, use PopArt to normalize rewards.",
)
parser.add_argument(
"--dual_clip_ppo",
default=False,
type=bool,
help="by default False, use dual-clip ppo.",
)
parser.add_argument(
"--dual_clip_coeff",
type=float,
default=3,
help="by default 3, use PopArt to normalize rewards.",
)
parser.add_argument(
"--use_valuenorm",
type=bool,
default=True,
help="by default False, use running mean and std to normalize rewards.",
)
parser.add_argument(
"--use_feature_normalization",
type=bool,
default=False,
help="Whether to apply layernorm to the inputs",
)
parser.add_argument(
"--use_orthogonal",
action="store_false",
default=True,
help=(
"Whether to use Orthogonal initialization for weights and 0 initialization"
" for biases"
),
)
parser.add_argument(
"--gain", type=float, default=0.01, help="The gain # of last action layer"
)
parser.add_argument(
"--cnn_layers_params",
type=str,
default=None,
help="The parameters of cnn layer",
)
parser.add_argument(
"--use_maxpool2d",
action="store_true",
default=False,
help="Whether to apply layernorm to the inputs",
)
parser.add_argument(
"--rnn_type",
type=str,
default="gru",
choices=["gru", "lstm"],
help="rnn types: gru or lstm",
)
parser.add_argument("--rnn_num", type=int, default=1, help="rnn layer number")
# recurrent parameters
parser.add_argument(
"--use_naive_recurrent_policy",
type=bool,
default=False,
help="Whether to use a naive recurrent policy",
)
parser.add_argument(
"--use_recurrent_policy",
type=bool,
default=False,
help="use a recurrent policy",
)
parser.add_argument(
"--recurrent_N", type=int, default=1, help="The number of recurrent layers."
)
parser.add_argument(
"--data_chunk_length",
type=int,
default=2,
help="Time length of chunks used to train a recurrent_policy",
)
parser.add_argument(
"--use_influence_policy",
action="store_true",
default=False,
help="use a recurrent policy",
)
parser.add_argument(
"--influence_layer_N",
type=int,
default=1,
help="Number of layers for actor/critic networks",
)
# attn parameters
parser.add_argument(
"--use_attn",
action="store_true",
default=False,
help=" by default False, use attention tactics.",
)
parser.add_argument(
"--attn_N", type=int, default=1, help="the number of attn layers, by default 1"
)
parser.add_argument(
"--attn_size",
type=int,
default=64,
help="by default, the hidden size of attn layer",
)
parser.add_argument(
"--attn_heads", type=int, default=4, help="by default, the # of multiply heads"
)
parser.add_argument(
"--dropout",
type=float,
default=0.0,
help="by default 0, the dropout ratio of attn layer.",
)
parser.add_argument(
"--use_average_pool",
type=bool,
default=True,
help="by default True, use average pooling for attn model.",
)
parser.add_argument(
"--use_attn_internal",
action="store_false",
default=True,
help="by default True, whether to strengthen own characteristics",
)
parser.add_argument(
"--use_cat_self",
action="store_false",
default=True,
help="by default True, whether to strengthen own characteristics",
)
# optimizer parameters
parser.add_argument(
"--lr", type=float, default=5e-4, help="learning rate (default: 5e-4)"
)
parser.add_argument(
"--tau", type=float, default=0.995, help="soft update polyak (default: 0.995)"
)
parser.add_argument(
"--critic_lr",
type=float,
default=5e-4,
help="critic learning rate (default: 5e-4)",
)
parser.add_argument(
"--opti_eps",
type=float,
default=1e-5,
help="RMSprop optimizer epsilon (default: 1e-5)",
)
parser.add_argument(
"--weight_decay", type=float, default=0, help="weight decay (defaul: 0)"
)
# behavior cloning parameters
parser.add_argument(
"--bc_epoch",
type=int,
default=2,
help="number of behavior cloning epochs (default: 15)",
)
# ppo parameters
parser.add_argument(
"--ppo_epoch", type=int, default=10, help="number of ppo epochs (default: 15)"
)
parser.add_argument(
"--use_policy_vhead",
action="store_true",
default=False,
help="by default, do not use policy vhead. if set, use policy vhead.",
)
parser.add_argument(
"--use_clipped_value_loss",
action="store_false",
default=True,
help="by default, clip loss value. If set, do not clip loss value.",
)
parser.add_argument(
"--clip_param",
type=float,
default=0.2,
help="ppo clip parameter (default: 0.2)",
)
parser.add_argument(
"--num_mini_batch",
type=int,
default=1,
help="number of batches for ppo (default: 1)",
)
parser.add_argument(
"--mini_batch_size",
type=int,
default=None,
help="batch size (default: 1)",
)
parser.add_argument(
"--policy_value_loss_coef",
type=float,
default=0.5,
help="policy value loss coefficient (default: 0.5)",
)
parser.add_argument(
"--entropy_coef",
type=float,
default=0.01,
help="entropy term coefficient (default: 0.01)",
)
parser.add_argument(
"--value_loss_coef",
type=float,
default=0.5,
help="value loss coefficient (default: 0.5)",
)
parser.add_argument(
"--use_max_grad_norm",
action="store_false",
default=True,
help="by default, use max norm of gradients. If set, do not use.",
)
parser.add_argument(
"--max_grad_norm",
type=float,
default=10.0,
help="max norm of gradients (default: 0.5)",
)
parser.add_argument(
"--use_gae",
default=True,
type=bool,
help="use generalized advantage estimation",
)
parser.add_argument(
"--gamma",
type=float,
default=0.99,
help="discount factor for rewards (default: 0.99)",
)
parser.add_argument(
"--gae_lambda",
type=float,
default=0.95,
help="gae lambda parameter (default: 0.95)",
)
parser.add_argument(
"--use_proper_time_limits",
default=False,
type=bool,
help="compute returns taking into account time limits",
)
parser.add_argument(
"--use_huber_loss",
action="store_false",
default=True,
help="by default, use huber loss. If set, do not use huber loss.",
)
parser.add_argument(
"--use_value_active_masks",
type=bool,
default=True,
help="by default True, whether to mask useless data in value loss.",
)
parser.add_argument(
"--use_policy_active_masks",
action="store_false",
default=True,
help="by default True, whether to mask useless data in policy loss.",
)
parser.add_argument(
"--huber_delta", type=float, default=10.0, help=" coefficience of huber loss."
)
parser.add_argument(
"--use_adv_normalize",
type=bool,
default=False,
help="whether to normalize advantage",
)
# ppg parameters
parser.add_argument(
"--aux_epoch",
type=int,
default=5,
help="number of auxiliary epochs (default: 4)",
)
parser.add_argument(
"--clone_coef",
type=float,
default=1.0,
help="clone term coefficient (default: 0.01)",
)
parser.add_argument(
"--use_single_network",
action="store_true",
default=False,
help="share base network between policy network and value network",
)
# run parameters
parser.add_argument(
"--use_linear_lr_decay",
default=False,
type=bool,
help="use a linear schedule on the learning rate",
)
# save parameters
parser.add_argument(
"--save_interval",
type=int,
default=1,
help="time duration between contiunous twice models saving.",
)
parser.add_argument(
"--only_eval",
default=False,
action="store_true",
help="only execute evaluation, default False.",
)
# log parameters
parser.add_argument(
"--log_interval",
type=int,
default=5,
help="time duration between contiunous twice log printing.",
)
parser.add_argument(
"--log_each_episode",
type=bool,
default=True,
help="Whether to log each episode number.",
)
parser.add_argument(
"--use_rich_handler",
type=bool,
default=True,
help="whether to use rich handler to print log.",
)
# eval parameters
parser.add_argument(
"--use_eval",
action="store_true",
default=False,
help=(
"by default, do not start evaluation. If set`, start evaluation alongside"
" with training."
),
)
parser.add_argument(
"--eval_interval",
type=int,
default=25,
help="time duration between contiunous twice evaluation progress.",
)
parser.add_argument(
"--eval_episodes",
type=int,
default=32,
help="number of episodes of the evaluation.",
)
# render parameters
parser.add_argument(
"--save_gifs",
action="store_true",
default=False,
help="by default, do not save render video. If set, save video.",
)
parser.add_argument(
"--use_render",
action="store_true",
default=False,
help=(
"by default, do not render the env during training. If set, start render."
" Note: something, the environment has internal render process which is not"
" controlled by this hyperparam."
),
)
parser.add_argument(
"--render_episodes",
type=int,
default=5,
help="the number of episodes to render a given env",
)
parser.add_argument(
"--ifi",
type=float,
default=0.1,
help="the play interval of each rendered image in saved video.",
)
# pretrained parameters
parser.add_argument(
"--model_dir",
type=str,
default=None,
help="by default None. set the path to pretrained model.",
)
parser.add_argument(
"--save_dir",
type=str,
default=None,
help="by default None. set the path to save info.",
)
parser.add_argument(
"--init_dir",
type=str,
default=None,
help="use exist enemy to init model; if init_dir, then don't use model_dir",
)
parser.add_argument(
"--run_dir",
type=str,
default=None,
help="root dir to save curves, logs and models.",
)
# replay buffer parameters
parser.add_argument(
"--use_transmit",
action="store_true",
default=False,
help=(
"by default, do not use transmit. If set`, use transmit as the replay"
" buffer."
),
)
# reverb server address
parser.add_argument(
"--server_address", type=str, default=None, help="Replay buffer server address."
)
parser.add_argument(
"--use_tlaunch", action="store_true", default=False, help="whether use tlaunch."
)
parser.add_argument("--actor_num", type=int, default=1, help="number of actors")
# replay buffer parameters
parser.add_argument(
"--use_reward_normalization",
action="store_true",
default=False,
help="Whether to normalize rewards in replay buffer",
)
parser.add_argument(
"--buffer_size",
type=int,
default=5000,
help="Max # of transitions that replay buffer can contain",
)
parser.add_argument(
"--popart_update_interval_step",
type=int,
default=2,
help="After how many train steps popart should be updated",
)
# prioritized experience replay
parser.add_argument(
"--use_per",
action="store_true",
default=False,
help="Whether to use prioritized experience replay",
)
parser.add_argument(
"--per_alpha",
type=float,
default=0.6,
help="Alpha term for prioritized experience replay",
)
parser.add_argument(
"--per_beta_start",
type=float,
default=0.4,
help="Starting beta term for prioritized experience replay",
)
parser.add_argument(
"--per_eps",
type=float,
default=1e-6,
help="Eps term for prioritized experience replay",
)
parser.add_argument(
"--per_nu",
type=float,
default=0.9,
help="Weight of max TD error in formation of PER weights",
)
# off-policy
parser.add_argument(
"--batch_size",
type=int,
default=32,
help="Number of buffer transitions to train on at once",
)
parser.add_argument(
"--actor_train_interval_step",
type=int,
default=2,
help="After how many critic updates actor should be updated",
)
parser.add_argument(
"--train_interval_episode",
type=int,
default=1,
help="Number of env steps between updates to actor/critic",
)
parser.add_argument(
"--train_interval",
type=int,
default=100,
help="Number of episodes between updates to actor/critic",
)
parser.add_argument(
"--use_same_critic_obs",
action="store_false",
default=True,
help="whether all agents share the same centralized observation, in mpe",
)
parser.add_argument(
"--use_global_all_local_state",
action="store_true",
default=False,
help="Whether to use available actions, in smac",
)
parser.add_argument(
"--prev_act_inp",
action="store_true",
default=False,
help="Whether the actor input takes in previous actions as part of its input",
)
parser.add_argument(
"--target_update",
type=int,
default=10,
help=(
"After how many evaluation network updates target network should be updated"
),
)
# for DDPG
parser.add_argument(
"--var",
type=float,
default=0.5,
help="Control the exploration variance of the generated actions",
)
parser.add_argument(
"actor_lr", type=float, default=0.001, help="The learning rate of actor network"
)
# for SAC
parser.add_argument(
"auto_alph",
type=bool,
default=False,
help="whether to use automatic alpha tuning",
)
parser.add_argument(
"alpha_value",
type=float,
default=0.2,
help="The value of alpha",
)
parser.add_argument(
"alpha_lr",
type=float,
default=2e-4,
help="The learning rate of temperature alpha",
)
# update parameters
parser.add_argument(
"--use_soft_update",
action="store_false",
default=True,
help="Whether to use soft update",
)
parser.add_argument(
"--hard_update_interval_episode",
type=int,
default=200,
help="After how many episodes the lagging target should be updated",
)
# exploration parameters
parser.add_argument(
"--num_random_episodes",
type=int,
default=5,
help="Number of episodes to add to buffer with purely random actions",
)
parser.add_argument(
"--epsilon_start",
type=float,
default=1.0,
help="Starting value for epsilon, for eps-greedy exploration",
)
parser.add_argument(
"--epsilon_finish",
type=float,
default=0.05,
help="Ending value for epsilon, for eps-greedy exploration",
)
parser.add_argument(
"--epsilon_anneal_time",
type=int,
default=5000,
help="Number of episodes until epsilon reaches epsilon_finish",
)
# qmix parameters
parser.add_argument(
"--use_double_q",
action="store_false",
default=True,
help="Whether to use double q learning",
)
parser.add_argument(
"--hypernet_layers",
type=int,
default=2,
help="Number of layers for hypernetworks. Must be either 1 or 2",
)
parser.add_argument(
"--mixer_hidden_dim",
type=int,
default=32,
help="Dimension of hidden layer of mixing network",
)
parser.add_argument(
"--hypernet_hidden_dim",
type=int,
default=64,
help=(
"Dimension of hidden layer of hypernetwork (only applicable if"
" hypernet_layers == 2"
),
)
# rmatd3 parameters
parser.add_argument(
"--target_action_noise_std",
default=0.2,
help="Target action smoothing noise for matd3",
)
parser.add_argument(
"--data_path",
default=None,
type=str,
help="the path of the training data",
)
parser.add_argument(
"--env.args",
default={},
type=dict,
help="the args of the env",
)
parser.add_argument(
"--model_path",
default=None,
type=str,
help="the path of the model",
)
parser.add_argument(
"--use_share_model",
type=bool,
default=False,
help="use one class to implement policy and value networks",
)
# rewards class
parser.add_argument(
"--reward_class.id",
default=None,
type=str,
help="the id of the reward class",
)
parser.add_argument(
"--reward_class.args",
default={},
type=dict,
help="the parameters of the reward class",
)
# vec info class
parser.add_argument(
"--vec_info_class.id",
default=None,
type=str,
help="the id of the vec env's info class",
)
parser.add_argument(
"--vec_info_class.args",
default={},
type=dict,
help="the parameters of the vec info class",
)
# vec info class
parser.add_argument(
"--eval_metrics",
nargs="+",
type=dict,
default=[],
help="the id of the vec env's info class",
)
# selfplay parameters
parser.add_argument(
"--disable_update_enemy",
default=False,
type=bool,
help="whether update enemy model",
)
parser.add_argument(
"--least_win_rate",
default=0.5,
type=float,
help="least_win_rate",
)
parser.add_argument(
"--recent_list_max_len",
default=100,
type=int,
help="max length of recent player list",
)
parser.add_argument(
"--latest_weight",
default=0.5,
type=float,
help="latest_weight",
)
parser.add_argument(
"--newest_pos",
default=1,
type=int,
help="newest_pos",
)
parser.add_argument(
"--newest_weight",
default=0.5,
type=float,
help="newest_weight",
)
parser.add_argument(
"--use_deepspeed",
default=False,
type=bool,
help="whether to use deepspeed",
)
parser.add_argument(
"--local_rank",
default=-1,
type=int,
help="local_rank",
)
parser.add_argument(
"--use_offload",
default=False,
type=bool,
help="whether to use offload (deepspeed)",
)
parser.add_argument(
"--use_fp16",
default=False,
type=bool,
help="whether to use fp16 (deepspeed)",
)
return parser