RLinf：面向具身智能与智能体的强化学习基础设施 | 极客日志

RLinf：面向具身智能与智能体的强化学习基础设施 | 极客日志

git clone https://github.com/RLinf/RLinf.git
cd RLinf

docker run -it --rm --privileged --network host --name rlinf \
-v .:/workspace/RLinf \
rlinf/rlinf:agentic-rlinf0.1-franka

source switch_env franka-<libfranka_version>
# 例如，对于 libfranka 版本 0.15.0
# source switch_env franka-0.15.0

requirements/install.sh embodied --env frankasource .venv/bin/activate

git clone https://github.com/RLinf/RLinf.git
cd RLinf

docker run -it --rm --gpus all --shm-size 20g --network host --name rlinf \
-v .:/workspace/RLinf \
rlinf/rlinf:agentic-rlinf0.1-torch2.6.0-openvla-openvlaoft-pi0

requirements/install.sh embodied --model openvla --env maniskill_liberosource .venv/bin/activate

git lfs install
git clone https://huggingface.co/RLinf/RLinf-ResNet10-pretrained

export FRANKA_ROBOT_IP=<your_robot_ip_address>

python -m toolkits.realworld_check.test_controller

source <path_to_your_venv>/bin/activate
source <your_catkin_ws>/devel/setup.bash

cluster:
  num_nodes: 1
  component_placement:
    env:
      node_group: franka
      placement: 0
  node_groups:
    - label: franka
      node_ranks: 0
hardware:
  type: Franka
  configs:
    - robot_ip: ROBOT_IP
      node_rank: 0

env:
  eval:
    override_cfg:
      target_ee_pose: [0.5, 0.0, 0.1, -3.14, 0.0, 0.0]

bash examples/embodiment/collect_data.sh

export PYTHONPATH=<path_to_your_RLinf_repo>:$PYTHONPATH
export RLINF_NODE_RANK=<node_rank_of_this_node>
export RLINF_COMM_NET_DEVICES=<network_device_for_communication>
# 如果只有一个网卡可以省略

# 在 head 节点（节点 rank 0）上
ray start --head --port=6379 --node-ip-address=<head_node_ip_address>

# 在 worker 节点（节点 rank 1 ~ N-1）上
ray start --address='<head_node_ip_address>:6379'

python -m toolkits.realworld_check.test_camera

bash examples/embodiment/run_realworld_async.sh realworld_peginsertion_rlpd_cnn_async

bash examples/embodiment/run_realworld_async.sh realworld_peginsertion_rlpd_cnn_async

cluster:
  num_nodes: 3 # 1 个训练 / rollout 节点 + 2 个机器人控制节点
  component_placement:
    actor:
      node_group: "4090"
      placement: 0 # 运行在训练 / rollout 节点的第一个 GPU 上
    env:
      node_group: franka
      placement: 0-1 # 两个 env 分别绑定两个机器人，rank 0 和 rank 1
    rollout:
      node_group: "4090"
      placement: 0:0-1 # 在训练 / rollout 节点第一个 GPU 上运行两个 rollout 进程
  node_groups:
    - label: "4090"
      node_ranks: 0 # 节点 rank 0 为训练 / rollout 节点
    - label: franka
      node_ranks: 1-2 # 节点 rank 1 和 2 为两个机器人控制节点
  hardware:
    type: Franka
    configs:
      - robot_ip: ROBOT_IP_FOR_RANK1
        node_rank: 1 # 第一个机器人控制节点的 rank
      - robot_ip: ROBOT_IP_FOR_RANK2
        node_rank: 2 # 第二个机器人控制节点的 rank

tensorboard --logdir ./logs --port 6006