"""G1 joystick locomotion environments."""
from __future__ import annotations
import math
from dataclasses import dataclass, field
from typing import Any
import numpy as np
from etils import epath
from unilab.assets import ASSETS_ROOT_PATH
from unilab.base import registry
from unilab.base.augmentation import SymmetryObsLayout
from unilab.base.backend import create_backend
from unilab.base.curriculum import EpisodeLengthTracker, PenaltyCurriculum
from unilab.base.np_env import NpEnvState
from unilab.base.scene import SceneCfg
from unilab.dtype_config import get_global_dtype
from unilab.envs.locomotion.common import rewards
from unilab.envs.locomotion.common.commands import (
Commands,
sample_heading_commands,
zero_small_xy_commands,
)
from unilab.envs.locomotion.common.domain_rand import DomainRandConfig
from unilab.envs.locomotion.common.dr_provider import LocomotionDRProvider
from unilab.envs.locomotion.common.rewards import RewardContext
from unilab.envs.locomotion.g1.base import G1BaseCfg, G1BaseEnv
[docs]
@dataclass
class G1DomainRandConfig(DomainRandConfig):
randomize_kp: bool = True
kp_multiplier_range: list[float] = field(default_factory=lambda: [0.9, 1.1])
randomize_kd: bool = True
kd_multiplier_range: list[float] = field(default_factory=lambda: [0.9, 1.1])
[docs]
@dataclass
class InitState:
pos = [0.0, 0.0, 0.754]
[docs]
def sample_gait_phase_pairs(rng, num_samples: int, mode: str) -> np.ndarray:
if mode == "independent":
return np.asarray(
np.column_stack(
[
rng.uniform(0.0, 2.0 * np.pi, size=(num_samples,)),
rng.uniform(0.0, 2.0 * np.pi, size=(num_samples,)),
]
),
dtype=get_global_dtype(),
)
phase = rng.uniform(0.0, 2.0 * np.pi, size=(num_samples,))
return np.asarray(np.column_stack([phase, phase + np.pi]), dtype=get_global_dtype())
[docs]
def sample_reset_base_qvel(rng, num_samples: int, limit: float) -> np.ndarray:
return np.asarray(rng.uniform(-limit, limit, size=(num_samples, 6)), dtype=get_global_dtype())
[docs]
def build_upper_body_pose_weights(pose_weights: list[float]) -> np.ndarray:
weights = np.asarray(pose_weights, dtype=get_global_dtype()).copy()
weights[:12] = 0.0
return np.asarray(weights, dtype=get_global_dtype())
[docs]
def compute_feet_phase_height_targets(
gait_phase: np.ndarray, swing_height: float
) -> tuple[np.ndarray, np.ndarray]:
def cubic_bezier_height(phi: np.ndarray, swing_height: float) -> np.ndarray:
phi_normalized = np.fmod(phi + np.pi, 2 * np.pi) - np.pi
x = (phi_normalized + np.pi) / (2 * np.pi)
def cubic_bezier_interpolation(
y_start: np.ndarray, y_end: np.ndarray, t: np.ndarray
) -> np.ndarray:
y_diff = y_end - y_start
bezier = t**3 + 3 * (t**2 * (1 - t))
return np.asarray(y_start + y_diff * bezier, dtype=get_global_dtype())
stance = cubic_bezier_interpolation(np.zeros_like(x), np.full_like(x, swing_height), 2 * x)
swing = cubic_bezier_interpolation(
np.full_like(x, swing_height), np.zeros_like(x), 2 * x - 1
)
return np.where(x <= 0.5, stance, swing)
left_target = cubic_bezier_height(gait_phase[:, 0], swing_height)
right_target = cubic_bezier_height(gait_phase[:, 1], swing_height)
return left_target, right_target
LEFT_FOOT_CONTACT_SENSORS = [f"left_foot_contact_{i}" for i in range(4)]
RIGHT_FOOT_CONTACT_SENSORS = [f"right_foot_contact_{i}" for i in range(4)]
def _scalarize_sensor_values(sensor_values: np.ndarray) -> np.ndarray:
sensor_array = np.asarray(sensor_values, dtype=get_global_dtype())
if sensor_array.ndim == 1:
return sensor_array
if sensor_array.ndim == 2 and sensor_array.shape[1] == 1:
return sensor_array[:, 0]
raise ValueError(f"Expected scalar sensor values, got shape {sensor_array.shape}")
[docs]
def compute_forward_speed_gate(linvel: np.ndarray, min_forward_speed: float) -> np.ndarray:
forward_speed = np.maximum(linvel[:, 0], 0.0)
return np.asarray(forward_speed >= min_forward_speed, dtype=get_global_dtype())
[docs]
def compute_forward_command_mask(commands: np.ndarray) -> np.ndarray:
return np.asarray(np.maximum(commands[:, 0], 0.0) > 1.0e-6, dtype=get_global_dtype())
[docs]
@dataclass
class G1RewardConfig:
scales: dict[str, float]
tracking_sigma: float
gait_frequency: float
feet_phase_swing_height: float
feet_phase_tracking_sigma: float
base_height_target: float
min_base_height: float
max_tilt_deg: float
min_forward_speed_for_gait_reward: float = 0.0
close_feet_threshold: float = 0.15
pose_weights: list[float] = field(
default_factory=lambda: [
0.01,
1.0,
5.0,
0.01,
5.0,
5.0,
0.01,
1.0,
5.0,
0.01,
5.0,
5.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
50.0,
]
)
[docs]
@dataclass
class G1WalkLegacyRewardConfig(G1RewardConfig):
pass
[docs]
@dataclass
class CurriculumConfig:
enabled: bool = False
initial_scale: float = 0.5
min_scale: float = 0.5
max_scale: float = 1.0
level_down_threshold: float = 150.0
level_up_threshold: float = 750.0
degree: float = 0.001
[docs]
@dataclass
class G1WalkEnvCfg(G1BaseCfg):
scene: SceneCfg = field(
default_factory=lambda: SceneCfg(
model_file=str(ASSETS_ROOT_PATH / "robots" / "g1" / "scene_flat.xml")
)
)
max_episode_seconds: float = 20.0
init_state: InitState = field(default_factory=InitState)
commands: Commands = field(default_factory=Commands)
reward_config: G1RewardConfig | None = None
domain_rand: G1DomainRandConfig = field(default_factory=G1DomainRandConfig)
gait_phase_init_mode: str = "offset_phase"
reset_base_qvel_limit: float = 0.5
curriculum: CurriculumConfig = field(default_factory=CurriculumConfig)
[docs]
class G1WalkDomainRandomizationProvider(LocomotionDRProvider):
[docs]
def __init__(self, *, base_kp: np.ndarray | None = None, base_kd: np.ndarray | None = None):
self._base_kp = base_kp
self._base_kd = base_kd
def _get_base_actuator_gains(self, env: Any) -> tuple[np.ndarray | None, np.ndarray | None]:
return self._base_kp, self._base_kd
def _get_qvel_limit(self, env: Any) -> float:
return float(env.cfg.reset_base_qvel_limit)
def _build_extra_info_updates(self, env: Any, num_reset: int) -> dict[str, np.ndarray]:
updates = {"gait_phase": self._sample_gait_phase(env, num_reset)}
if getattr(env.cfg.commands, "heading_command", False):
updates["heading_commands"] = sample_heading_commands(env, num_reset)
return updates
def _sample_commands(self, env: Any, num_reset: int) -> np.ndarray:
commands = super()._sample_commands(env, num_reset)
zero_small_xy_commands(commands)
standing_prob = float(getattr(env.cfg.commands, "rel_standing_envs", 0.0))
if standing_prob > 0.0:
standing = np.random.uniform(size=(num_reset,)) < min(standing_prob, 1.0)
commands[standing] = 0.0
if getattr(env.cfg.commands, "heading_command", False):
commands[:, 2] = 0.0
return commands
def _sample_gait_phase(self, env: Any, num_reset: int) -> np.ndarray:
mode = env.cfg.gait_phase_init_mode
if mode == "independent":
left = np.random.uniform(0.0, 2.0 * np.pi, size=(num_reset,))
right = np.random.uniform(0.0, 2.0 * np.pi, size=(num_reset,))
return np.asarray(np.column_stack([left, right]), dtype=get_global_dtype())
phase = np.random.uniform(0.0, 2.0 * np.pi, size=(num_reset,))
return np.asarray(np.column_stack([phase, phase + np.pi]), dtype=get_global_dtype())
def _compute_reset_obs(
self,
env: Any,
env_ids: Any,
info_updates: Any,
linvel: Any,
gyro: Any,
gravity: Any,
dof_pos: Any,
dof_vel: Any,
) -> dict[str, np.ndarray]:
return env._compute_obs(info_updates, linvel, gyro, gravity, dof_pos, dof_vel) # type: ignore[no-any-return]
[docs]
class G1WalkEnv(G1BaseEnv):
_cfg: G1WalkEnvCfg
_reward_cfg: Any
[docs]
def __init__(self, cfg: G1WalkEnvCfg, num_envs=1, backend_type="mujoco"):
if cfg.reward_config is None:
raise ValueError("reward_config must be provided via Hydra configuration")
backend = create_backend(
backend_type,
cfg.scene,
num_envs,
cfg.sim_dt,
base_name=cfg.asset.base_name,
push_body_name=cfg.domain_rand.push_body_name,
motrix_max_iterations=cfg.motrix_max_iterations,
post_step_forward_sensor=cfg.post_step_forward_sensor,
)
super().__init__(cfg, backend, num_envs)
self._enable_reward_log = True
self._reward_cfg = cfg.reward_config
self._gait_phase_delta = float(
2.0 * math.pi * self._reward_cfg.gait_frequency * cfg.ctrl_dt
)
self._pose_weights = np.array(self._reward_cfg.pose_weights, dtype=get_global_dtype())
if self._pose_weights.shape[0] != self._num_action:
raise ValueError("pose_weights length mismatch")
self._upper_body_pose_weights = build_upper_body_pose_weights(self._reward_cfg.pose_weights)
self._episode_tracker: EpisodeLengthTracker | None = None
self._penalty_curriculum: PenaltyCurriculum | None = None
if cfg.curriculum.enabled:
self._episode_tracker = EpisodeLengthTracker(num_envs)
self._penalty_curriculum = PenaltyCurriculum(
self,
enabled=True,
initial_scale=cfg.curriculum.initial_scale,
min_scale=cfg.curriculum.min_scale,
max_scale=cfg.curriculum.max_scale,
level_down_threshold=cfg.curriculum.level_down_threshold,
level_up_threshold=cfg.curriculum.level_up_threshold,
degree=cfg.curriculum.degree,
)
self._init_reward_functions()
if cfg.domain_rand.randomize_kp or cfg.domain_rand.randomize_kd:
base_kp, base_kd = backend.get_actuator_gains()
dr_provider = G1WalkDomainRandomizationProvider(base_kp=base_kp, base_kd=base_kd)
else:
dr_provider = G1WalkDomainRandomizationProvider()
self._init_domain_randomization(dr_provider)
@property
def obs_groups_spec(self) -> dict[str, int]:
# gyro(3) + gravity(3) + diff(29) + dof_vel(29) + action(29) + cmd(3) + phase(2) = 98
return {"obs": 98, "critic": 101}
def _init_reward_functions(self):
self._reward_fns: dict[str, Any] = {
"tracking_lin_vel": rewards.tracking_lin_vel,
"tracking_ang_vel": rewards.tracking_ang_vel,
"forward_progress": rewards.forward_progress,
"under_speed": rewards.under_speed,
"lin_vel_z": rewards.lin_vel_z,
"orientation": rewards.orientation,
"penalty_orientation": rewards.orientation,
"ang_vel_xy": rewards.ang_vel_xy,
"penalty_ang_vel_xy": rewards.ang_vel_xy,
"action_rate": rewards.action_rate,
"penalty_action_rate": rewards.action_rate,
"base_height": rewards.base_height,
"pose": rewards.weighted_pose,
"upper_body_pose": self._reward_upper_body_pose,
"penalty_close_feet_xy": self._reward_close_feet_xy,
"penalty_feet_ori": self._reward_feet_ori,
"feet_phase": self._reward_feet_phase,
"feet_phase_contrast": self._reward_feet_phase_contrast,
"feet_phase_contact": self._reward_feet_phase_contact,
"feet_double_stance": self._reward_feet_double_stance,
"feet_air_time": self._reward_feet_air_time,
"alive": rewards.alive,
}
def _terrain_relative_base_height(self) -> np.ndarray:
return np.asarray(self._backend.get_base_pos()[:, 2], dtype=get_global_dtype())
[docs]
def update_state(self, state: NpEnvState) -> NpEnvState:
linvel = self.get_local_linvel()
gyro = self.get_gyro()
gravity = self._backend.get_sensor_data(self._cfg.sensor.upvector)
dof_pos = self.get_dof_pos()
dof_vel = self.get_dof_vel()
max_tilt_rad = np.deg2rad(self._reward_cfg.max_tilt_deg)
tilt = np.arccos(np.clip(gravity[:, 2], -1, 1))
terminated = np.logical_or(
tilt > max_tilt_rad,
self._terrain_relative_base_height() < self._reward_cfg.min_base_height,
)
reward = self._compute_reward(state.info, linvel, gyro, gravity, dof_pos, dof_vel)
obs = self._compute_obs(state.info, linvel, gyro, gravity, dof_pos, dof_vel)
state = state.replace(obs=obs, reward=reward, terminated=terminated)
done = state.terminated | state.truncated
if self._episode_tracker is None or self._penalty_curriculum is None or not np.any(done):
return state
done_indices = np.where(done)[0]
episode_lengths = state.info["steps"][done_indices] + 1
self._episode_tracker.update(episode_lengths)
self._penalty_curriculum.update(self._episode_tracker.average_length)
if "log" not in state.info:
state.info["log"] = {}
state.info["log"]["curriculum/average_episode_length"] = float(
self._episode_tracker.average_length
)
state.info["log"]["curriculum/penalty_scale"] = float(
self._penalty_curriculum.current_scale
)
return state
def _compute_obs(
self, info: dict, linvel, gyro, gravity, dof_pos, dof_vel
) -> dict[str, np.ndarray]:
noise_cfg = self._cfg.noise_config
diff = dof_pos - self.default_angles
command = info["commands"]
last_actions = info.get("current_actions", np.zeros_like(diff))
gait_phase = info.get("gait_phase", np.zeros((self._num_envs, 2), dtype=get_global_dtype()))
walk_profile = self._uses_walk_observation_profile()
noisy_gyro = self._obs_noise(gyro, noise_cfg.scale_gyro)
noisy_gravity = self._obs_noise(gravity, noise_cfg.scale_gravity)
noisy_diff = self._obs_noise(diff, noise_cfg.scale_joint_angle)
noisy_dof_vel = self._obs_noise(dof_vel, noise_cfg.scale_joint_vel)
actor_gyro_scale = 0.25 if walk_profile else 1.0
actor_dof_vel_scale = 0.05 if walk_profile else 1.0
actor = np.concatenate(
[
noisy_gyro * actor_gyro_scale,
-noisy_gravity,
noisy_diff,
noisy_dof_vel * actor_dof_vel_scale,
last_actions,
command,
gait_phase,
],
axis=1,
dtype=get_global_dtype(),
)
critic_gyro_scale = 0.25 if walk_profile else 1.0
critic_dof_vel_scale = 0.05 if walk_profile else 1.0
critic_linvel_scale = 2.0 if walk_profile else 1.0
critic_base = np.concatenate(
[
gyro * critic_gyro_scale,
-gravity,
diff,
dof_vel * critic_dof_vel_scale,
last_actions,
command,
gait_phase,
],
axis=1,
dtype=get_global_dtype(),
)
critic = np.concatenate(
[
critic_base,
np.asarray(linvel * critic_linvel_scale, dtype=get_global_dtype()),
],
axis=1,
dtype=get_global_dtype(),
)
return {"obs": actor, "critic": critic}
def _uses_walk_observation_profile(self) -> bool:
scales = getattr(getattr(self, "_reward_cfg", None), "scales", None)
if scales is None:
reward_cfg = getattr(self._cfg, "reward_config", None)
scales = getattr(reward_cfg, "scales", None)
if scales is not None:
if any(
key in scales
for key in (
"penalty_orientation",
"penalty_ang_vel_xy",
"penalty_action_rate",
"alive",
)
):
return True
if any(key in scales for key in ("orientation", "ang_vel_xy", "action_rate")):
return False
curriculum = getattr(self._cfg, "curriculum", None)
return bool(curriculum is not None and curriculum.enabled)
def _actor_symmetry_obs_layout(self) -> SymmetryObsLayout:
return (
("gyro", 3),
("gravity", 3),
("dof_pos", self._num_action),
("dof_vel", self._num_action),
("actions", self._num_action),
("command", 3),
("gait_phase", 2),
)
[docs]
def get_symmetry_obs_layouts(self) -> dict[str, SymmetryObsLayout]:
actor_layout = self._actor_symmetry_obs_layout()
return {
"obs": actor_layout,
"critic": (*actor_layout, ("linvel", 3)),
}
[docs]
def build_symmetry_augmentation(self, *, device: str):
if self._backend.backend_type != "mujoco":
return None
from unilab.envs.locomotion.g1.symmetry import G1SymmetryAugmentation
return G1SymmetryAugmentation(
self._backend.model,
self.get_symmetry_obs_layouts(),
device=device,
)
def _build_reward_context(
self, info: dict, linvel, gyro, gravity, dof_pos, dof_vel
) -> RewardContext:
return RewardContext(
info=info,
linvel=linvel,
gyro=gyro,
dof_pos=dof_pos,
num_envs=self._num_envs,
default_angles=self.default_angles,
tracking_sigma=self._reward_cfg.tracking_sigma,
base_height_target=self._reward_cfg.base_height_target,
base_height=self._backend.get_base_pos()[:, 2],
gravity=gravity,
dof_vel=dof_vel,
pose_weights=self._pose_weights,
)
def _compute_reward(self, info: dict, linvel, gyro, gravity, dof_pos, dof_vel) -> np.ndarray:
cfg = self._reward_cfg
ctx = self._build_reward_context(info, linvel, gyro, gravity, dof_pos, dof_vel)
return rewards.run_reward_dispatch(
scales=cfg.scales,
fns=self._reward_fns,
ctx=ctx,
info=info,
enable_log=self._enable_reward_log,
ctrl_dt=self._cfg.ctrl_dt,
)
def _reward_feet_phase(self, ctx: RewardContext):
"""步态相位奖励:鼓励正确的摆动腿高度"""
left_foot = self._backend.get_sensor_data("left_foot_pos")
right_foot = self._backend.get_sensor_data("right_foot_pos")
gait_phase = ctx.info.get(
"gait_phase", np.zeros((self._num_envs, 2), dtype=get_global_dtype())
)
swing_height = self._reward_cfg.feet_phase_swing_height
left_target, right_target = compute_feet_phase_height_targets(gait_phase, swing_height)
left_error = np.square(left_foot[:, 2] - left_target)
right_error = np.square(right_foot[:, 2] - right_target)
reward = np.exp(-(left_error + right_error) / self._reward_cfg.feet_phase_tracking_sigma)
return np.asarray(reward * self._gait_reward_gate(ctx.linvel), dtype=get_global_dtype())
def _gait_reward_gate(self, linvel: np.ndarray) -> np.ndarray:
min_forward_speed = getattr(self._reward_cfg, "min_forward_speed_for_gait_reward", 0.0)
return compute_forward_speed_gate(linvel, min_forward_speed)
def _reward_feet_phase_contrast(self, ctx: RewardContext):
left_foot = self._backend.get_sensor_data("left_foot_pos")
right_foot = self._backend.get_sensor_data("right_foot_pos")
gait_phase = ctx.info.get(
"gait_phase", np.zeros((self._num_envs, 2), dtype=get_global_dtype())
)
swing_height = self._reward_cfg.feet_phase_swing_height
left_target, right_target = compute_feet_phase_height_targets(gait_phase, swing_height)
actual_delta = left_foot[:, 2] - right_foot[:, 2]
target_delta = left_target - right_target
error = np.square(actual_delta - target_delta)
reward = np.exp(-error / self._reward_cfg.feet_phase_tracking_sigma)
return np.asarray(reward * self._gait_reward_gate(ctx.linvel), dtype=get_global_dtype())
def _reward_feet_phase_contact(self, ctx: RewardContext):
gait_phase = ctx.info.get(
"gait_phase", np.zeros((self._num_envs, 2), dtype=get_global_dtype())
)
swing_height = self._reward_cfg.feet_phase_swing_height
left_target_contact, right_target_contact = compute_feet_phase_contact_targets(
gait_phase, swing_height
)
left_contact = compute_aggregated_foot_contact(self._backend, LEFT_FOOT_CONTACT_SENSORS)
right_contact = compute_aggregated_foot_contact(self._backend, RIGHT_FOOT_CONTACT_SENSORS)
left_match = np.asarray(left_contact == left_target_contact, dtype=get_global_dtype())
right_match = np.asarray(right_contact == right_target_contact, dtype=get_global_dtype())
reward = np.asarray(0.5 * (left_match + right_match), dtype=get_global_dtype())
return np.asarray(reward * self._gait_reward_gate(ctx.linvel), dtype=get_global_dtype())
def _reward_feet_double_stance(self, ctx: RewardContext):
commands = ctx.info.get("commands", np.zeros((self._num_envs, 3), dtype=get_global_dtype()))
left_contact = compute_aggregated_foot_contact(self._backend, LEFT_FOOT_CONTACT_SENSORS)
right_contact = compute_aggregated_foot_contact(self._backend, RIGHT_FOOT_CONTACT_SENSORS)
double_stance = np.asarray(
np.logical_and(left_contact, right_contact), dtype=get_global_dtype()
)
return np.asarray(
double_stance * compute_forward_command_mask(commands), dtype=get_global_dtype()
)
def _reward_feet_ori(self, ctx: RewardContext):
left_foot_quat = self._backend.get_sensor_data("left_foot_quat")
right_foot_quat = self._backend.get_sensor_data("right_foot_quat")
return (
np.square(left_foot_quat[:, 1])
+ np.square(left_foot_quat[:, 2])
+ np.square(right_foot_quat[:, 1])
+ np.square(right_foot_quat[:, 2])
)
def _reward_close_feet_xy(self, ctx: RewardContext):
left_foot = self._backend.get_sensor_data("left_foot_pos")
right_foot = self._backend.get_sensor_data("right_foot_pos")
feet_dist = np.linalg.norm(left_foot[:, :2] - right_foot[:, :2], axis=1)
return np.where(
feet_dist < self._reward_cfg.close_feet_threshold,
np.square(feet_dist - self._reward_cfg.close_feet_threshold),
0.0,
)
def _reward_feet_air_time(self, ctx: RewardContext):
air_time = ctx.info.get(
"feet_air_time", np.zeros((self._num_envs, 2), dtype=get_global_dtype())
)
in_range = (air_time > 0.05) & (air_time < 0.5)
return np.sum(in_range.astype(float), axis=1)
def _reward_upper_body_pose(self, ctx: RewardContext):
diff = ctx.dof_pos - self.default_angles
return np.asarray(
np.sum(self._upper_body_pose_weights * np.square(diff), axis=1),
dtype=get_global_dtype(),
)
[docs]
def apply_action(self, actions: np.ndarray, state: NpEnvState) -> np.ndarray:
state.info["last_actions"] = state.info.get("current_actions", np.zeros_like(actions))
state.info["current_actions"] = actions
gait_phase = state.info.get(
"gait_phase", np.zeros((self._num_envs, 2), dtype=get_global_dtype())
)
gait_phase[:, 0] = (gait_phase[:, 0] + self._gait_phase_delta) % (2 * np.pi)
gait_phase[:, 1] = (gait_phase[:, 1] + self._gait_phase_delta) % (2 * np.pi)
state.info["gait_phase"] = gait_phase
ctrl: np.ndarray = actions * self._cfg.control_config.action_scale + self.default_angles
return ctrl
def _walk_curriculum() -> CurriculumConfig:
return CurriculumConfig(
enabled=True,
initial_scale=0.5,
min_scale=0.5,
max_scale=1.0,
level_down_threshold=150.0,
level_up_threshold=750.0,
degree=0.001,
)
[docs]
@dataclass
class G1WalkControlConfig:
action_scale: float = 1.0
simulate_action_latency: bool = False
[docs]
@dataclass
class G1WalkRewardConfig(G1RewardConfig):
"""对齐 holosoma G1 walking 奖励权重。"""
[docs]
@registry.envcfg("G1WalkFlat")
@dataclass
class G1WalkFlatCfg(G1WalkEnvCfg):
reward_config: G1WalkRewardConfig | None = None
scene: SceneCfg = field(
default_factory=lambda: SceneCfg(
model_file=str(ASSETS_ROOT_PATH / "robots" / "g1" / "scene_flat.xml")
)
)
control_config: G1WalkControlConfig = field(default_factory=G1WalkControlConfig) # type: ignore[assignment]
curriculum: CurriculumConfig = field(default_factory=_walk_curriculum)
[docs]
@registry.envcfg("G1WalkRough")
@dataclass
class G1WalkRoughCfg(G1WalkFlatCfg):
scene: SceneCfg = field(
default_factory=lambda: SceneCfg(
model_file=str(ASSETS_ROOT_PATH / "robots" / "g1" / "scene_rough.xml")
)
)
registry.register_env("G1WalkFlat", G1WalkEnv, sim_backend="mujoco")
registry.register_env("G1WalkFlat", G1WalkEnv, sim_backend="motrix")
registry.register_env("G1WalkRough", G1WalkEnv, sim_backend="mujoco")
registry.register_env("G1WalkRough", G1WalkEnv, sim_backend="motrix")