diff --git a/_static/images/roll.png b/_static/images/roll.png new file mode 100644 index 0000000..ff18dde Binary files /dev/null and b/_static/images/roll.png differ diff --git a/index.rst b/index.rst index 701768e..8d77b86 100644 --- a/index.rst +++ b/index.rst @@ -41,6 +41,7 @@ sources/torchtitan/index.rst sources/sglang/index.rst sources/verl/index.rst + sources/roll/index.rst 选择您的偏好,并按照 :doc:`快速安装昇腾环境` 的安装指导进行操作。 @@ -430,6 +431,24 @@ | 快速上手 - + + +
+
+
+
+

ROLL

+

大规模学习的强化学习优化

+
+
+
+ +
diff --git a/sources/roll/index.rst b/sources/roll/index.rst new file mode 100644 index 0000000..a16f5ad --- /dev/null +++ b/sources/roll/index.rst @@ -0,0 +1,8 @@ +roll +============ + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/sources/roll/install.rst b/sources/roll/install.rst new file mode 100644 index 0000000..d751cda --- /dev/null +++ b/sources/roll/install.rst @@ -0,0 +1,126 @@ +安装指南 +============== + +本教程面向使用 roll & Ascend 的开发者,帮助完成昇腾环境下 roll 的安装。 + +昇腾环境安装 +------------ + +请根据已有昇腾产品型号及 CPU 架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装。 + +.. warning:: + CANN 最低版本为 8.2.RC1,安装 CANN 时,请同时安装 Kernel 算子包以及 nnal 加速库软件包。 + +Python 环境创建 +---------------------- + +.. code-block:: shell + :linenos: + + # 创建名为 roll 的 python 3.10 的虚拟环境 + conda create -y -n roll python=3.10 + # 激活虚拟环境 + conda activate roll + +Torch 安装创建 +---------------------- + +.. code-block:: shell + :linenos: + + # 安装 torch 的 CPU 版本 + pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu + + # 安装 torch_npu + pip install torch_npu==2.5.1 + +vllm & vllm-ascend 安装 +---------------------- + +.. code-block:: shell + :linenos: + + # vllm + git clone -b v0.8.4 --depth 1 https://github.com/vllm-project/vllm.git + cd vllm + + VLLM_TARGET_DEVICE=empty pip install -v -e . + cd .. + + +.. code-block:: shell + :linenos: + + # vllm-ascend + git clone -b v0.8.4rc2 --depth 1 https://github.com/vllm-project/vllm-ascend.git + cd vllm-ascend + + export COMPILE_CUSTOM_KERNELS=1 + pip install -e . + cd .. + +如果在安装 vllm-ascend 时遇到类似以下问题: + +.. code-block:: shell + :linenos: + + RuntimeError: CMake configuration failed: Command '['/pathto/miniconda3/envs/roll/bin/python3.10', '-m', 'pybind11', '--cmake']' returned non-zero exit status 2. + +可尝试在 vllm-ascend 目录下 setup.py 文件 151-158 行进行如下修改并重新进行编译: + +.. code-block:: shell + :linenos: + + try: + # if pybind11 is installed via pip + pybind11_cmake_path = (subprocess.check_output( + [python_executable, "-m", "pybind11", + "--cmakedir"]).decode().strip()) + except subprocess.CalledProcessError as e: + # else specify pybind11 path installed from source code on CI container + raise RuntimeError(f"CMake configuration failed: {e}") + +安装 roll +---------------------- + +使用以下指令安装 roll 及相关依赖: + +.. code-block:: shell + :linenos: + + git clone https://github.com/alibaba/ROLL.git + cd ROLL + + # Install roll dependencies + pip install -r requirements_common.txt + pip install deepspeed==0.16.0 + + +其他第三方库说明 +---------------------- + +.. list-table:: + :header-rows: 1 + :widths: 30 20 + + * - Software + - Description + * - transformers + - v4.52.4 + * - flash_attn + - not supported + * - transformer-engine[pytorch] + - not supported + + +1. 支持通过 transformers 使能 --flash_attention_2, transformers 需等于 4.52.4版本。 + +2. 不支持通过 flash_attn 使能 flash attention 加速。 + +3. 暂不支持 transformer-engine[pytorch]。 + + +.. code-block:: shell + :linenos: + + pip install transformers==4.52.4 \ No newline at end of file diff --git a/sources/roll/quick_start.rst b/sources/roll/quick_start.rst new file mode 100644 index 0000000..55bd59e --- /dev/null +++ b/sources/roll/quick_start.rst @@ -0,0 +1,267 @@ +快速开始,单节点部署指引 +================== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 roll 所需的环境。 + + 本篇教程将介绍如何使用 roll 进行快速训练,帮助您快速上手 roll 。 + +本文档帮助昇腾开发者快速使用 roll × 昇腾 进行 LLM 强化学习训练。可以访问 `这篇官方文档 `_ 获取更多信息。 + +也可以参考官方的 `昇腾快速开始文档 `_ 。 + + + +正式使用前,建议您通过对单节点流水线的训练尝试以检验环境准备和安装的正确性。 由于目前暂不支持 Megatron-LM 训练,请首先将对应文件中 strategy_args 参数修改为 deepspeed 选项。 + +使用配置文件执行 agentic pipeline +^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + 以 qwen2.5-0.5B-agentic 为例(Requires >=4 NPUs) + +修改策略文件: + +.. code-block:: python + + # vim roll/distributed/strategy/vllm_strategy.py + enable_prefix_caching: False, + + +修改配置文件: + +.. code-block:: yaml + + # vim examples/qwen2.5-0.5B-agentic/agentic_val_sokoban.yaml + defaults: + - ../config/traj_envs@_here_ + - ../config/deepspeed_zero@_here_ + - ../config/deepspeed_zero2@_here_ + - ../config/deepspeed_zero3@_here_ + - ../config/deepspeed_zero3_cpuoffload@_here_ + + hydra: + run: + dir: . + output_subdir: null + + exp_name: "agentic_pipeline" + seed: 42 + logging_dir: ./output/logs + output_dir: ./output + render_save_dir: ./output/render + system_envs: + USE_MODELSCOPE: '1' + + #track_with: wandb + #tracker_kwargs: + # api_key: + # project: roll-agentic + # name: ${exp_name}_sokoban + # notes: "agentic_pipeline" + # tags: + # - agentic + # - roll + # - baseline + + track_with: tensorboard + tracker_kwargs: + log_dir: ./data/oss_bucket_0/yali/llm/tensorboard/roll_exp/agentic_sokoban + + + checkpoint_config: + type: file_system + output_dir: ./data/cpfs_0/rl_examples/models/${exp_name} + + num_gpus_per_node: 4 + + max_steps: 128 + save_steps: 10000 + logging_steps: 1 + eval_steps: 10 + resume_from_checkpoint: false + + rollout_batch_size: 16 + val_batch_size: 16 + sequence_length: 1024 + + advantage_clip: 0.2 + ppo_epochs: 1 + adv_estimator: "grpo" + #pg_clip: 0.1 + #dual_clip_loss: True + init_kl_coef: 0.0 + whiten_advantages: true + entropy_loss_coef: 0 + max_grad_norm: 1.0 + + pretrain: Qwen/Qwen2.5-0.5B-Instruct + reward_pretrain: Qwen/Qwen2.5-0.5B-Instruct + + actor_train: + model_args: + attn_implementation: fa2 + disable_gradient_checkpointing: false + dtype: bf16 + model_type: ~ + training_args: + learning_rate: 1.0e-6 + weight_decay: 0 + per_device_train_batch_size: 2 + gradient_accumulation_steps: 64 + warmup_steps: 10 + lr_scheduler_type: cosine + data_args: + template: qwen2_5 + strategy_args: + strategy_name: deepspeed_train + strategy_config: ${deepspeed_zero3} + # strategy_name: megatron_train + # strategy_config: + # tensor_model_parallel_size: 1 + # pipeline_model_parallel_size: 1 + # expert_model_parallel_size: 1 + # use_distributed_optimizer: true + # recompute_granularity: full + device_mapping: list(range(0,2)) + infer_batch_size: 2 + + actor_infer: + model_args: + disable_gradient_checkpointing: true + dtype: bf16 + generating_args: + max_new_tokens: 128 # single-turn response length + top_p: 0.99 + top_k: 100 + num_beams: 1 + temperature: 0.99 + num_return_sequences: 1 + data_args: + template: qwen2_5 + strategy_args: + strategy_name: vllm + strategy_config: + gpu_memory_utilization: 0.6 + block_size: 16 + load_format: auto + device_mapping: list(range(2,3)) + + reference: + model_args: + attn_implementation: fa2 + disable_gradient_checkpointing: true + dtype: bf16 + model_type: ~ + data_args: + template: qwen2_5 + strategy_args: + strategy_name: hf_infer + strategy_config: ~ + device_mapping: list(range(3,4)) + infer_batch_size: 2 + + reward_normalization: + grouping: traj_group_id # 可以tags(env_type)/traj_group_id(group)/batch(rollout_batch)... group_by计算reward/adv + method: mean_std # asym_clip / identity / mean_std + + train_env_manager: + format_penalty: -0.15 # sokoban env penalty_for_step=-0.1 + max_env_num_per_worker: 4 + num_env_groups: 8 + # under the same group, the env config and env seed are ensured to be equal + group_size: 1 + tags: [SimpleSokoban] + num_groups_partition: [8] # If not set, all env names divide nums equally. Under the same group, the env config and env seed (prompt) are equal in each generation + + val_env_manager: + max_env_num_per_worker: 32 + num_env_groups: 64 + group_size: 1 # should be set to 1 because val temperature is set to 0 and same prompt leads to same output + tags: [SimpleSokoban, LargerSokoban, SokobanDifferentGridVocab, FrozenLake] + num_groups_partition: [16, 16, 16, 16] # TODO: If not set, all env names divide nums equally. Under the same group, the env config and env seed (prompt) are equal in each generation + + + # Here, you can override variables defined in the imported envs. max_tokens_per_step: 128 in custom_env.SimpleSokoban, here replaced by 64 + max_tokens_per_step: 64 + + custom_envs: + SimpleSokoban: + ${custom_env.SimpleSokoban} + LargerSokoban: + ${custom_env.LargerSokoban} + SokobanDifferentGridVocab: + ${custom_env.SokobanDifferentGridVocab} + FrozenLake: + ${custom_env.FrozenLake} + FrozenLakeThink: + ${custom_env.FrozenLakeThink} + + +使用配置文件执行: + +.. code-block:: bash + + python examples/start_agentic_pipeline.py \ + --config_path qwen2.5-0.5B-agentic \ + --config_name agentic_val_sokoban + + - ``--config_path`` – 包含您的YAML配置文件的目录。 + - ``--config_name`` – 文件名(不含.yaml后缀)。 + +若执行遇到报错: + +.. code-block:: bash + + ... + File ".../roll/lib/python3.10/enum.py", + line 701, in __new__ + raise ve_exc + + ValueError: is not a valid Sentinel + +可尝试将 Click 版本版本降级到 8.2.1: + +.. code-block:: bash + + pip install --force-reinstall 'click==8.2.1' + +支持现状 +^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :header-rows: 1 + :widths: 15 40 15 15 15 + + * - Feature + - Example + - Training Backend + - Inference Backend + - Hardware + * - Agentic + - examples/qwen2.5-0.5B-agentic/run_agentic_pipeline_sokoban.sh + - DeepSpeed + - vLLM + - Atlas 900 A2 PODc + * - Agentic-Rollout + - examples/qwen2.5-0.5B-agentic/run_agentic_rollout_sokoban.sh + - DeepSpeed + - vLLM + - Atlas 900 A2 PODc + * - DPO + - examples/qwen2.5-3B-dpo_megatron/run_dpo_pipeline.sh + - DeepSpeed + - vLLM + - Atlas 900 A2 PODc + * - RLVR + - examples/qwen2.5-7B-rlvr_megatron/run_rlvr_pipeline.sh + - DeepSpeed + - vLLM + - Atlas 900 A2 PODc + +声明 +^^^^^^^^^^^^^^^^^^^^^^ + +ROLL 中提供的 Ascend 支持代码皆为参考样例,生产环境使用请通过官方正式途径沟通,谢谢。 \ No newline at end of file