From 4a5a3611839a9429c0522581e61c11959cb11fd3 Mon Sep 17 00:00:00 2001 From: lowdy1 Date: Wed, 19 Nov 2025 02:40:15 +0000 Subject: [PATCH] add verl doc --- _static/images/volcano.png | Bin 0 -> 3534 bytes index.rst | 25 ++++++- sources/verl/index.rst | 8 +++ sources/verl/install.rst | 118 +++++++++++++++++++++++++++++++++ sources/verl/quick_start.rst | 124 +++++++++++++++++++++++++++++++++++ 5 files changed, 272 insertions(+), 3 deletions(-) create mode 100644 _static/images/volcano.png create mode 100644 sources/verl/index.rst create mode 100644 sources/verl/install.rst create mode 100644 sources/verl/quick_start.rst diff --git a/_static/images/volcano.png b/_static/images/volcano.png new file mode 100644 index 0000000000000000000000000000000000000000..0a97a939afe728d3705a8d89c6c698b8a522468e GIT binary patch literal 3534 zcmY*cc|6qJ_y3G3W;8NGX$EPGM8-165Xm;yB#H@QMN2S)<|~ZK|bH<`Th0#NA#*d*c^)78 zvtUpjh5O^ocNk?t7lHV# ziVm=Sa}#Kv%UxY%O|1g-9UyZ9h}h|w+Zmi^AOHaI*qmfwd*k8!qlk#ltsSSh_%a64EfE5ervh`~-$*5^zW|jq3*XjiYmMnt;!jlJI3nCi6zwNu6Y>FoQ-!arPk@Kh49TBy@2a@=H>Y?&w?>KlAR~2FUagZ zT9hDju*uvJb0kQq5$=dt6#0*B;T}C<+hEY#gH#`KYx*#Ykoka3RaeDXevHXF|E~5L z#h!LgNH;Gs^nG(+5V>{eOI)BExz$~?>A3?&SP9WyKATaP@J=1uXzqYH=Z}!P;z=Sh z_p)4=S>&j2sH?q)_2*JGD=b3J)AOMb+KS6HlSbrucuF>)#V}uN3loH|On;$i9mG!@ zd~GJ@;Yo=i0n^!vLAG;^XtE$Qx=j}C_?cvdTf@}C^H zh_X{FTdfheMlaRVco}of&R_kSqvPnLZmxVeqq#VDmY6h#E=o{4v$P7yG?DkXpVY}6 z@PMlyK^j1x_RbV06gmZ6#_sOFY9+*yDOam>FkI=Hz|)fR=~`;|rxty>;rk4Z*g&aH zWD|B_1bU^|8)16Y)3&Yly&Yl=WtZ5_%})X&M7Pl&Tm_KK&nvlUO8 zBH=t_EFQANPgEUqG`gtO-8278mk^0tZ`~@Mn3wO+Z#D9Zk(}={b~T=n-WTzG52qKl zxBasiJkK%_xo27J?WfC8)P_&$68q$TcL+~~=20b!Gx`(g-9F2`(OA!i|Nc#68PmQd z--u~{=#$vYB{)*9E_MnRa$zcL&4r-*7m8BtG5Z3@hdddjEBx*})G=(=tbgYnoC9!9 zn#B%0e0~Tr^A)5VarZ23Id59Smm-f!YT%X-Dbt1IA=qP$YbE?91*-O#_kF@??|Su0 zTdX0f)RsU=GV={YeNnHj5a)EUIMqbnUZ%QX&xgMd$O1qD`JY^(MmuE|rrjUP&|sO# z94JKDW9s=W^>n|WiCA2sbqTXd?a3)Kf#tH;CCcj822)#&YBprrz;VMvl(KVc^=n(b z*B{MSlmcsx*r>!>yd^ffMCSK1&BsRi$P7o%JUD45zQr#(kYru_`C|r5kMj*XX=lIS z!aAItA(;VaVztH5Mf{a&NFCgE-EKWyd`x{uqMG?iEeVzXHKzSJ`Yv{K!#M}4hll$5 z`WW1LX1s~2O^;6uG{Zyhyci_-Ld3Az66hjpyn86}wBdml;Z3)mn4;@=Dgu7OvhyUg z?X7MZS!g_1x4tHZ-Yq!Gl^1ZLgFTOz78=D#hnsFJ3P0|6F6QSc`8V{kKG^e=wb1w@ z&P%c9jmxcKW-bLkQ7E}2vh}x=amb+ngB>rD*1Lr$g%&BtU`OXDL&5hYT~0!Xi5o6~ z&kPC^=4o?JQ0MzA{il8S(MKYGy>raJltX?$1koGho&4e0x3kke#?>2UK`-cq3FY%N z*WyDS>DS<#SXVgbsNn~$R*?w;gLRg{CvQJL==yr`&8qTVd(1j*j)v;dC|ju>g0#j| zk+6>BR;n(=40&H^dc0E=nT;g1bF~Tt5O7?q>&@YhG}w2|qua!7{&X=~ASCEg$J*>; zea7|f%g;NCgweBemX+pE94gyRUK;6*-;t7krx$(LF)mCIV%l1#9M zajtk4et|p|h)b!NlC^kCL&q7se!fec*FGRfQWi2KD4J)G@-=a@Lk$E-8FL@g6*3-% zL-v{+yz0)o?bQ53m5o0p52Q29kKtOTvWA9+R8K+{5RUo(CIZX*e3S0S~iEmlD zglNK%inYU(a=!+9BL+0;gU;TQUb)z`Sj<5!fU>dHrD`1zz3T}}eD*NE338H~J#8}+ z*4)aqzYo2>cUi<9<}@KjcP+Ju61y4K>8__zh|)2s#x!Zkl%sc9?)EtiQny`-#PLnd zgrvIOZ{&Kze~OvlKWH)Rt^!|arP@zO9lhFFRGW2*sdZ=S+NR;2@tW`t8jr5Wl{kui zjZkEML-D#d?j`Ga_=nKYIHV&?()47kE0xHm-^h_#8O0UqMA{+;ciVo--f=~ZojY8J z@TxE0k0KWFb0Z{~=5h12_%{7vM~tsmy)u@lVKt>I*7VK{5x>VG#CUjgLD+S``##ZK z9vl5J*fs++akp2zV8ixz_tZyUufbLNa`vXS1pZP_%tklMU8ErDX!#*KCl;yx9p;S` zd8M$wH8rEr*C96E8eXIBsyplkYsUrUBaTq_0SL)UI&ju`H~P2GovDSvi1NCpsTGSP zXVG=T8Hka0jZRcNMP9(h_jSSOgsR7(=ji64`e(L!yKj5?NTDXv3&+f&#$O&RMK=#d zbt}Cg82TspNS9=u7eM4rE$kc$@x!pfCq}MIYf;ReS+PB)0^DwY&_AIaNn9*NpT2IN z(-RaXlKoe^{t2s231q|{SkJmXb6|<8#C`M#1vowKm^4dkQUuw#+aDxdqPgCY^5Vg9 z2X^9sin+FGQEMGu8LjOkbOv7F2d}k|eRQvHzxNbv?5Z5^`T*`}B*`n0I!%*yv{RSG zhlz@%=&`O(j2usvCJ$H2?m<;8?-*1N6vgFlV8>KC7L#WoKcp!Hxk%Vq8Jvh|9Vh@1u*^KS(~e$ zmeZ6jtYNpJVTgv8_!rgwzRC(=qS%n+C50iF@qsEj^cYW;D}Qy7hGqKMbwYW0&Kx+j z73#(-((wQzQ_Jb&oJ_%YZ$&AzELFuG3W`OFoEF7?VxCT}F`kOat{6T$IpCWNqZ30S zGubHJV=4PR?TEv2%Metx1Y<-&y~Y2Zd$HURM*kEPw-ibo$&RqZw((^4m=rcd^TR`( zRcebr))2+wX+~p@HxSH!%O|u<1#FtcoL>;4xa>8e)=0Q0E@1gwuBcAN4^{Fi*T@YN zvJTOBc+;KNP@)Ab2T3UAE$YzSLaxb)u-a((#uQ$9M970no#}X1{l%LIm(BkyB`8$@ z-D2+4m15@p7XM3SL7HQ-)uC=&LdFi`Jd%oU5+Y%k99S-vLuN{_>JJS@=#9h;Rqn4D z+R;amnP+H7rshDV$HTZGeFlf;TvSJcQL5j&&5(!rO(Ku-p;Ru^(qOJ14tu{JuT*la zsmo+TZp9aw*NIi_HQJW4IjxUjcG+oMFho)nZXoHu^eQ!*YpRvd5+`$J?k6tlKxzYT zBAj_KZmvUK#PfXRu*{wy&C>4gjvNET;EM<7Kgp<4tL7)5mm0)w$}gf;nq zOXHu#tQp?8`jjD^+KqeqBGIf%n2>L9hV*tw-kpnfof##=k2qw0Ar8#wCqytH?QWe7 z{xAOJ0(bjp`4?s8MTmb^6`7}b74*0*N7B8^hpLvlbFPVcuHkH1P*??$zS*{`mDzA} z{hoeWG)wugAn8eO{Hvy0|5uXxJhTtqe=4sQ+JW1AgO{c^8thU1Sy%0UhP-g>deaRk znj!t&EBqG^*WJ411bzVGY@N z$^860z~F!R#>G=_q}|j^LzLlqg2r_#8 zHjT&Rww$*eA-)RpHQ&WdCV!uq;yd$XKk!?i+otGR_$M%l;{0@PXV}wEph~lq&V0q^ e|3^;Hc7-8Is_M3>x(U1w8end0MS5vSiT)qYx_yKI literal 0 HcmV?d00001 diff --git a/index.rst b/index.rst index 1de34b5..701768e 100644 --- a/index.rst +++ b/index.rst @@ -40,6 +40,7 @@ sources/torchchat/index.rst sources/torchtitan/index.rst sources/sglang/index.rst + sources/verl/index.rst 选择您的偏好,并按照 :doc:`快速安装昇腾环境` 的安装指导进行操作。 @@ -382,7 +383,7 @@

TorchTitan

-

用于语言大模型训练的PyTorch原生库

+

用于语言大模型训练的 PyTorch 原生库

@@ -400,7 +401,7 @@

SGLang

-

用于LLM和VLM的高速服务框架

+

用于 LLM 和 VLM 的高速服务框架

@@ -411,6 +412,24 @@ | 快速上手 - + + +
+
+
+
+

verl

+

用于 LLM 的强化学习训练库

+
+
+
+ +
diff --git a/sources/verl/index.rst b/sources/verl/index.rst new file mode 100644 index 0000000..65d1d56 --- /dev/null +++ b/sources/verl/index.rst @@ -0,0 +1,8 @@ +verl +============ + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/sources/verl/install.rst b/sources/verl/install.rst new file mode 100644 index 0000000..d969062 --- /dev/null +++ b/sources/verl/install.rst @@ -0,0 +1,118 @@ +安装指南 +============== + +本教程面向使用 verl & Ascend 的开发者,帮助完成昇腾环境下 verl 的安装。 + +昇腾环境安装 +------------ + +请根据已有昇腾产品型号及 CPU 架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装。 + +.. warning:: + CANN 最低版本为 8.3.RC1,安装 CANN 时,请同时安装 Kernel 算子包以及 nnal 加速库软件包。 + +Python 环境创建 +---------------------- + +.. code-block:: shell + :linenos: + + # 创建名为 verl 的 python 3.11 的虚拟环境 + conda create -y -n verl python==3.11 + # 激活虚拟环境 + conda activate verl + +Torch 安装创建 +---------------------- + +.. code-block:: shell + :linenos: + + # 安装 torch 2.7.1 及 torch-npu 2.7.1 的 CPU 版本 + pip install torch==2.7.1 torchvision==0.22.1 torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cpu + + # 安装 torch-npu 2.7.1 + pip install torch-npu==2.7.1 + +vllm & vllm-ascend 安装 +---------------------- + + +方法一:使用以下命令编译安装 vllm 和 vllm-ascend。请注意根据机器类型区分安装方式。 + +.. code-block:: shell + :linenos: + + # vllm + git clone -b v0.11.0 --depth 1 https://github.com/vllm-project/vllm.git + cd vllm + pip install -r requirements-build.txt + + # for Atlas 200T A2 Box16 + VLLM_TARGET_DEVICE=empty pip install -e . --extra-index https://download.pytorch.org/whl/cpu/ + + # for Atlas 900 A2 PODc or Atlas 800T A3 + VLLM_TARGET_DEVICE=empty pip install -e . + +.. code-block:: shell + :linenos: + + # vllm-ascend + git clone -b v0.11.0rc1 --depth 1 https://github.com/vllm-project/vllm-ascend.git + cd vllm-ascend + pip install -e . + + +方法二:使用以下命令直接安装预编译好的 vllm 和 vllm-ascend。 + +.. code-block:: shell + :linenos: + + # Install vllm-project/vllm. The newest supported version is v0.11.0. + pip install vllm==0.11.0 + + # Install vllm-project/vllm-ascend from pypi. + pip install vllm-ascend==0.11.0rc1 + +安装 verl +---------------------- + +使用以下指令安装 verl 及相关依赖: + +.. code-block:: shell + :linenos: + + git clone https://github.com/volcengine/verl.git + cd verl + + # Install verl NPU dependencies + pip install -r requirements-npu.txt + pip install -e . + + +其他第三方库说明 +---------------------- + ++----------------------+---------------------------+ +| Software | Description | ++======================+===========================+ +| transformers | >=v4.57.1 | ++----------------------+---------------------------+ +| flash_attn | not supported | ++----------------------+---------------------------+ +| liger-kernel | not supported | ++----------------------+---------------------------+ + + +1. 支持通过 transformers 使能 –flash_attention_2, transformers 需大于等于 4.57.1版本。 + +2. 不支持通过 flash_attn 使能 flash attention 加速。 + +3. 不支持 liger-kernel 使能。 + +4. 针对 x86 服务器,需要安装 cpu 版本的 torchvision。 + +.. code-block:: shell + :linenos: + + pip install torchvision==0.20.1+cpu --index-url https://download.pytorch.org/whl/cpu \ No newline at end of file diff --git a/sources/verl/quick_start.rst b/sources/verl/quick_start.rst new file mode 100644 index 0000000..cf09617 --- /dev/null +++ b/sources/verl/quick_start.rst @@ -0,0 +1,124 @@ +快速开始 +================== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 verl 所需的环境。 + + 本篇教程将介绍如何使用 verl 进行快速训练,帮助您快速上手 verl 。 + +本文档帮助昇腾开发者快速使用 verl × 昇腾 进行 LLM 强化学习训练。可以访问 `这篇官方文档 `_ 获取更多信息。 + +也可以参考官方的 `昇腾快速开始文档 `_ + +正式使用前,建议通过对 Qwen2.5-0.5B PPO 的训练尝试以检验环境准备和安装的正确性,并熟悉基本的使用流程。 + +接下来将介绍如何使用单张 NPU 卡使用 verl 进行 PPO 训练: + +基于 GSM8K 数据集对 Qwen2.5-0.5B 模型进行 PPO 训练 +------------------------ + +使用 GSM8K 数据集 post-train Qwen2.5-0.5B 模型. + +数据集介绍 +^^^^^^^^^^^^^^^^^^^^^^ + +GSM8K 是一个包含初等数学问题的数据集,用于 LLM 的数学推理能力的训练或评估。以下是一组 prompt solution 示例: + +Prompt + + James writes a 3-page letter to 2 different friends twice a week. + How many pages does he write a year? + +Solution + + He writes each friend 3*2=<<3*2=6>>6 pages a week So he writes + 6*2=<<6*2=12>>12 pages every week That means he writes + 12*52=<<12*52=624>>624 pages a year #### 624 + +准备数据集 +^^^^^^^^^^^^^^^^^^^^^^ + +用户可以根据实际需要修改 ``--local_save_dir`` 参数指定数据集的保存路径。 + +.. code-block:: bash + + python3 examples/data_preprocess/gsm8k.py --local_save_dir ~/data/gsm8k + +准备模型 +^^^^^^^^^^^^^^^^^^^^^^ + +在本实例中,使用 Qwen2.5-0.5B-Instruct 作为基础模型进行 PPO 训练。 + +用户可以设置 ``VERL_USE_MODELSCOPE=True`` 由 `modelscope `_ 下载模型。 + +.. code-block:: bash + + python3 -c "import transformers; transformers.pipeline('text-generation', model='Qwen/Qwen2.5-0.5B-Instruct')" + +启动 PPO 训练 +^^^^^^^^^^^^^^^^^^^^^^ + +**Reward Model/Function** + +在本实例中,我们使用一个简单的奖励函数来评估生成答案的正确性。我们认为模型产生的位于 “####” 符号后的数值为其给出的答案。 +如果该答案与正确答案匹配,则 reward 为 1,否则为 0。 + +对于其他细节,可以参考 `verl/utils/reward_score/gsm8k.py `_. + +**Training Script** + +根据用户的数据集以及模型的实际位置修改 ``data.train_files`` ,\ ``data.val_files``, ``actor_rollout_ref.model.path`` , ``critic.model.path`` 等参数即可。 + +.. code-block:: bash + + PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \ + data.train_files=$HOME/data/gsm8k/train.parquet \ + data.val_files=$HOME/data/gsm8k/test.parquet \ + data.train_batch_size=256 \ + data.max_prompt_length=512 \ + data.max_response_length=512 \ + actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B-Instruct \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.actor.ppo_mini_batch_size=64 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \ + critic.optim.lr=1e-5 \ + critic.model.path=Qwen/Qwen2.5-0.5B-Instruct \ + critic.ppo_micro_batch_size_per_gpu=4 \ + algorithm.kl_ctrl.kl_coef=0.001 \ + trainer.logger=console \ + trainer.val_before_train=False \ + trainer.n_gpus_per_node=1 \ + trainer.nnodes=1 \ + trainer.save_freq=10 \ + trainer.test_freq=10 \ + trainer.total_epochs=15 \ + trainer.device=npu 2>&1 | tee verl_demo.log + +如果顺利配置环境并运行,将看到如下类似的输出: + +.. code-block:: bash + + step:0 - timing/gen:21.470 - timing/ref:4.360 - timing/values:5.800 - actor/reward_kl_penalty:0.000 - actor/reward_kl_penalty_coeff:0.001 - timing/adv:0.109 - timing/update_critic:15.664 + - critic/vf_loss:14.947 - critic/vf_clipfrac:0.000 - critic/vpred_mean:-2.056 - critic/grad_norm:1023.278 - critic/lr(1e-4):0.100 - timing/update_actor:20.314 - actor/entropy_loss:0.433 + - actor/pg_loss:-0.005 - actor/pg_clipfrac:0.000 - actor/ppo_kl:0.000 - actor/grad_norm:1.992 - actor/lr(1e-4):0.010 - critic/score/mean:0.004 - critic/score/max:1.000 + - critic/score/min:0.000 - critic/rewards/mean:0.004 - critic/rewards/max:1.000 - critic/rewards/min:0.000 - critic/advantages/mean:-0.000 - critic/advantages/max:2.360 + - critic/advantages/min:-2.280 - critic/returns/mean:0.003 - critic/returns/max:0.000 - critic/returns/min:0.000 - critic/values/mean:-2.045 - critic/values/max:9.500 + - critic/values/min:-14.000 - response_length/mean:239.133 - response_length/max:256.000 - response_length/min:77.000 - prompt_length/mean:104.883 - prompt_length/max:175.000 + - prompt_length/min:68.000 + step:1 - timing/gen:23.020 - timing/ref:4.322 - timing/values:5.953 - actor/reward_kl_penalty:0.000 - actor/reward_kl_penalty:0.001 - timing/adv:0.118 - timing/update_critic:15.646 + - critic/vf_loss:18.472 - critic/vf_clipfrac:0.384 - critic/vpred_mean:1.038 - critic/grad_norm:942.924 - critic/lr(1e-4):0.100 - timing/update_actor:20.526 - actor/entropy_loss:0.440 + - actor/pg_loss:0.000 - actor/pg_clipfrac:0.002 - actor/ppo_kl:0.000 - actor/grad_norm:2.060 - actor/lr(1e-4):0.010 - critic/score/mean:0.000 - critic/score/max:0.000 + - critic/score/min:0.000 - critic/rewards/mean:0.000 - critic/rewards/max:0.000 - critic/rewards/min:0.000 - critic/advantages/mean:0.000 - critic/advantages/max:2.702 + - critic/advantages/min:-2.616 - critic/returns/mean:0.000 - critic/returns/max:0.000 - critic/returns/min:0.000 - critic/values/mean:-2.280 - critic/values/max:11.000 + - critic/values/min:-16.000 - response_length/mean:232.242 - response_length/max:256.000 - response_length/min:91.000 - prompt_length/mean:102.398 - prompt_length/max:185.000 + - prompt_length/min:70.000 + +References + +.. [1] https://verl.readthedocs.io/en/latest/start/install.html \ No newline at end of file