{"id":1784,"date":"2026-07-01T18:08:54","date_gmt":"2026-07-01T10:08:54","guid":{"rendered":"https:\/\/www.izhuhn.cn\/?p=1784"},"modified":"2026-07-01T18:08:54","modified_gmt":"2026-07-01T10:08:54","slug":"rlinf%ef%bc%88main%e5%88%86%e6%94%af%ef%bc%89ascend-npu-%e9%80%82%e9%85%8d%e4%b8%8e%e9%83%a8%e7%bd%b2%e7%ae%80%e8%ae%b0","status":"publish","type":"post","link":"https:\/\/www.izhuhn.cn\/index.php\/2026\/07\/01\/rlinf%ef%bc%88main%e5%88%86%e6%94%af%ef%bc%89ascend-npu-%e9%80%82%e9%85%8d%e4%b8%8e%e9%83%a8%e7%bd%b2%e7%ae%80%e8%ae%b0\/","title":{"rendered":"RLinf\uff08main\u5206\u652f\uff09Ascend NPU \u9002\u914d\u4e0e\u90e8\u7f72\u7b80\u8bb0"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\">> \u57fa\u4e8e RLinf \u4e3b\u5206\u652f\uff08commit a4b6abe\uff09\u9002\u914d Ascend 910B3 NPU \u7684\u5b8c\u6574\u6d41\u7a0b\u8bb0\u5f55<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">---<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e00\u3001\u80cc\u666f<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">RLinf \u662f\u4e00\u4e2a\u9762\u5411\u673a\u5668\u4eba\u57fa\u7840\u6a21\u578b\u7684\u5f3a\u5316\u5b66\u4e60\u8bad\u7ec3\u6846\u67b6\uff0c\u652f\u6301\u5927\u89c4\u6a21\u5206\u5e03\u5f0f RL \u8bad\u7ec3\u3002\u6846\u67b6\u4e3b\u5206\u652f\uff08main\uff09\u76f8\u6bd4\u65e7\u5206\u652f\u6709\u5927\u5e45\u91cd\u6784\uff0c\u91c7\u7528 Ray + FSDP \u67b6\u6784\uff0c\u5e76\u901a\u8fc7 Hydra \u914d\u7f6e\u7cfb\u7edf\u7ba1\u7406\u5b9e\u9a8c\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u672c\u6587\u8bb0\u5f55\u5c06 RLinf main \u5206\u652f\u9002\u914d\u5230 Huawei Ascend 910B3 NPU \u5e73\u53f0\u7684\u8fc7\u7a0b\uff0c\u6db5\u76d6\u73af\u5883\u90e8\u7f72\u3001\u4ee3\u7801\u4fee\u6539\u3001\u6570\u636e\u96c6\u51c6\u5907\u548c\u5b8c\u6574\u8bad\u7ec3\u6d41\u7a0b\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">---<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e8c\u3001\u73af\u5883\u90e8\u7f72<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">2.1 \u786c\u4ef6\u73af\u5883<\/h3>\n\n\n\n<figure class=\"wp-block-table\"><table>\n<thead><tr><th>\u9879\u76ee<\/th><th>\u914d\u7f6e<\/th><\/tr><\/thead>\n<tbody>\n<tr><td>NPU<\/td><td>8\u00d7 Ascend 910B3\uff0c64GB HBM\/\u5361<\/td><\/tr>\n<tr><td>CPU<\/td><td>192 \u6838 aarch64<\/td><\/tr>\n<tr><td>\u5185\u5b58<\/td><td>512GB<\/td><\/tr>\n<tr><td>OS<\/td><td>Ubuntu 22.04 aarch64<\/td><\/tr>\n<tr><td>CANN<\/td><td>8.3.rc1<\/td><\/tr>\n<\/tbody>\n<\/table><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">2.2 \u955c\u50cf\u6784\u5efa<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u57fa\u7840\u955c\u50cf\u4f7f\u7528\u534e\u4e3a\u4e91 Ascend Hub \u63d0\u4f9b\u7684 CANN \u955c\u50cf\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>FROM swr.cn-south-1.myhuaweicloud.com\/ascendhub\/cann:8.3.rc1-910b-ubuntu22.04-py3.11<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b8c\u6574 Dockerfile \u89c1\u4ed3\u5e93\u4e0a\u7684 `RLinf_openpi_Dockerfile`\u3002\u6784\u5efa\u65f6\u4f7f\u7528\u56fd\u5185\u955c\u50cf\u52a0\u901f\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>docker build -f RLinf_openpi_Dockerfile --build-arg USE_MIRRORS=1 -t rlinf-ascend:latest .<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u5173\u952e\u4f9d\u8d56\u5b89\u88c5\u547d\u4ee4\uff08Dockerfile \u4e2d\u5df2\u96c6\u6210\uff09\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>bash requirements\/install.sh \\\n    --use-mirror \\\n    --platform ascend \\\n    embodied \\\n    --model openpi \\\n    --env maniskill_libero<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">2.3 \u5bb9\u5668\u542f\u52a8<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>docker run -itd --name rlinf_train \\\n    --privileged \\\n    --network host \\\n    --shm-size=32g \\\n    -v \/data:\/data \\\n    rlinf-ascend:latest<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">---<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e09\u3001Ascend NPU \u9002\u914d\u4fee\u6539<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e3b\u6846\u67b6\u4ee3\u7801\u5207\u6362\u5230 main \u5206\u652f\u540e\uff08`git checkout a4b6abe`\uff09\uff0c\u5728 Ascend NPU \u4e0a\u9047\u5230\u4e86\u4e09\u7c7b\u517c\u5bb9\u6027\u95ee\u9898\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">3.1 torch.cuda API \u4e0d\u53ef\u7528<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Ascend NPU \u73af\u5883\u5b89\u88c5\u7684\u662f `torch 2.6.0+cpu`\uff08\u4e0d\u5e26 CUDA\uff09\uff0c\u6240\u6709 `torch.cuda.*` \u8c03\u7528\u90fd\u4f1a\u629b `AttributeError`\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">**\u53d7\u5f71\u54cd\u6587\u4ef6\u548c\u4fee\u590d\uff1a**<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># rlinf\/workers\/sft\/fsdp_value_sft_worker.py\n# rlinf\/workers\/sft\/fsdp_cfg_worker.py\n# \u5220\u9664\u4ee5\u4e0b\u4e24\u884c\uff1a\n- torch.cuda.set_device(int(os.environ.get(\"LOCAL_RANK\", 0)))\n- self.device = torch.cuda.current_device()<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">`FSDPModelManager.__init__()` \u5df2\u7ecf\u901a\u8fc7 `Worker.torch_platform.set_device()` \u81ea\u52a8\u5904\u7406\u4e86\u8bbe\u5907\u8bbe\u7f6e\uff0c\u8fd9\u4e9b\u884c\u662f\u591a\u4f59\u7684\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># rlinf\/data\/process\/distributed.py\n# \u589e\u52a0 NPU \u5206\u652f\uff1a\nelif hasattr(torch, 'npu') and torch.npu.is_available():\n    torch.npu.set_device(local_rank)\n    device = f\"npu:{local_rank}\"<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">3.2 torchcodec \u4e0d\u517c\u5bb9<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">`torchcodec 0.14.0` \u4f9d\u8d56 CUDA \u5e93 `libnvrtc.so.13`\uff0c\u5728 Ascend \u4e0a\u65e0\u6cd5\u52a0\u8f7d\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">**\u4fee\u590d\uff1a** \u5378\u8f7d torchcodec\uff0c`LeRobotDataset` \u6539\u7528 pyav \u540e\u7aef\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>dataset = LeRobotDataset(\n    dataset_path,\n    download_videos=False,\n    video_backend=\"pyav\",  # \u4ee3\u66ff\u9ed8\u8ba4\u7684 torchcodec\n)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">3.3 GPU \u68c0\u6d4b\u548c\u5206\u5e03\u5f0f\u540e\u7aef<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">`run_compute_advantages.sh` \u7528 `nvidia-smi` \u68c0\u6d4b GPU \u6570\u91cf\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u4fee\u6539\u524d\uff1anvidia-smi \u8fd4\u56de 0\uff0cwc -l \u8fd4\u56de exit 0\uff0cNPROC=0 \u2192 1\nNPROC_PER_NODE=$(nvidia-smi -L 2&gt;\/dev\/null | wc -l || echo 1)\n\n# \u4fee\u6539\u540e\uff1a\u5148\u8bd5 nvidia\uff0c\u518d\u8bd5 torch_npu\nNPROC_PER_NODE=$(nvidia-smi -L 2&gt;\/dev\/null | wc -l)\nif [ \"$NPROC_PER_NODE\" -eq 0 ] 2&gt;\/dev\/null; then\n    NPROC_PER_NODE=$(python3 -c \"import torch; import torch_npu; print(torch.npu.device_count())\")\nfi<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u5206\u5e03\u5f0f\u540e\u7aef\u4ece `nccl` \u6539\u4e3a `hccl`\uff08Huawei Collective Communication Library\uff09\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>distributed:\n  enabled: true\n  backend: \"hccl\"  # \u800c\u4e0d\u662f \"nccl\"<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">---<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u56db\u3001\u6570\u636e\u96c6\u51c6\u5907<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">4.1 \u6570\u636e\u76ee\u5f55\u7ed3\u6784<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>\/data\/\n\u251c\u2500\u2500 pi05_base\/                    # Pi0.5 \u9884\u8bad\u7ec3\u6a21\u578b\u6743\u91cd\n\u251c\u2500\u2500 siglip2-so400m-patch14-224\/   # SigLIP \u89c6\u89c9\u7f16\u7801\u5668\n\u251c\u2500\u2500 gemma-3-270m\/                 # Gemma3 \u8bed\u8a00\u6a21\u578b\uff08270M\uff09\n\u2514\u2500\u2500 RECAP-Libero10-Task0-48succ-Data\/\n    \u251c\u2500\u2500 libero10_task0_sft\/       # \u8bad\u7ec3\u96c6\uff1a48 \u6761\u6210\u529f\u8f68\u8ff9\n    \u2502   \u251c\u2500\u2500 meta\/\n    \u2502   \u251c\u2500\u2500 videos\/               # \u89c6\u9891\u5e27\u6570\u636e\n    \u2502   \u2514\u2500\u2500 ...\n    \u2514\u2500\u2500 libero10_task0_train\/     # \u9a8c\u8bc1\u96c6\n        \u2514\u2500\u2500 ...<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">4.2 \u6570\u636e\u96c6\u914d\u7f6e<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>**\u8bad\u7ec3\u96c6**: `libero10_task0_sft` \u2014 48 \u6761\u6210\u529f\u6f14\u793a\u8f68\u8ff9\uff0clibero10 \u573a\u666f<\/li>\n<li>**\u9a8c\u8bc1\u96c6**: `libero10_task0_train` \u2014 \u540c\u573a\u666f\u4f46\u4e0d\u540c episodes<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">---<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e94\u3001\u8bad\u7ec3\u6d41\u7a0b<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b8c\u6574\u7684 RECAP-CFG-RL \u8bad\u7ec3\u5206\u4e3a 4 \u4e2a\u6b65\u9aa4\uff0c\u5f62\u6210\u4e00\u4e2a\u6570\u636e\u6807\u6ce8 \u2192 \u4ef7\u503c\u5b66\u4e60 \u2192 \u7b56\u7565\u4f18\u5316\u7684\u6d41\u6c34\u7ebf\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Step 1: Compute Returns\uff08\u8ba1\u7b97\u6298\u6263\u56de\u62a5\uff09<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>\u8f93\u5165: \u539f\u59cb\u8f68\u8ff9\u6570\u636e\uff08\u542b\u6210\u529f\/\u5931\u8d25\u6807\u7b7e\uff09\n\u8f93\u51fa: meta\/returns.parquet\uff08\u6bcf\u6761\u8f68\u8ff9\u6bcf\u5e27\u7684\u6298\u6263\u56de\u62a5\uff09<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f7f\u7528 `run_compute_returns.sh`\uff0c\u6838\u5fc3\u53c2\u6570\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>data:\n  gamma: 1.0  # \u6298\u6263\u56e0\u5b50<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">Step 2: Value Model SFT\uff08\u4ef7\u503c\u6a21\u578b\u8bad\u7ec3\uff09<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>\u8f93\u5165: \u539f\u59cb\u8f68\u8ff9\u6570\u636e + meta\/returns.parquet\n\u8f93\u51fa: \u8bad\u7ec3\u597d\u7684\u4ef7\u503c\u6a21\u578b checkpoint<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f7f\u7528 `run_value_sft.sh`\uff0c\u91c7\u7528 FSDP \u5206\u7247\u7b56\u7565\uff08no_shard\uff09\uff0c0 \u53f7\u5361\u4fdd\u5b58\u68c0\u67e5\u70b9\u3002\u5728 8\u00d7 910B3 \u4e0a\u6309 30000 epoch \u914d\u7f6e\u8bad\u7ec3\uff0cval_check_interval=500\uff0csave_interval=3000\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u5173\u952e\u914d\u7f6e\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table>\n<thead><tr><th>\u53c2\u6570<\/th><th>\u503c<\/th><th>\u8bf4\u660e<\/th><\/tr><\/thead>\n<tbody>\n<tr><td>micro_batch_size<\/td><td>16<\/td><td>\u6bcf\u5361\u5fae\u6279\u6b21<\/td><\/tr>\n<tr><td>global_batch_size<\/td><td>128<\/td><td>\u603b\u6279\u6b21\uff088\u00d716\uff09<\/td><\/tr>\n<tr><td>lr<\/td><td>5e-5<\/td><td>\u7b56\u7565\u5b66\u4e60\u7387<\/td><\/tr>\n<tr><td>value_lr<\/td><td>1e-4<\/td><td>\u4ef7\u503c\u5934\u5b66\u4e60\u7387<\/td><\/tr>\n<tr><td>precision<\/td><td>bf16<\/td><td>\u6df7\u5408\u7cbe\u5ea6\u8bad\u7ec3<\/td><\/tr>\n<\/tbody>\n<\/table><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">Step 3: Compute Advantages\uff08\u8ba1\u7b97\u4f18\u52bf\u503c\uff09<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>\u8f93\u5165: \u539f\u59cb\u8f68\u8ff9\u6570\u636e + \u4ef7\u503c\u6a21\u578b checkpoint\n\u8f93\u51fa: meta\/advantages.parquet\uff08\u6bcf\u6761\u8f68\u8ff9\u6bcf\u5e27\u7684\u4f18\u52bf\u503c\uff09<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u6838\u5fc3\u8ba1\u7b97\u516c\u5f0f\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>A\u209c = normalize(r\u209c:\u209c\u208a\u2099) + \u03b3\u207f \u00b7 V(o\u209c\u208a\u2099) - V(o\u209c)<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u8fd0\u884c\u5728 8 \u5361 NPU \u4e0a\uff0c\u6bcf\u5361\u72ec\u7acb\u5904\u7406\u4e00\u4e2a\u5206\u7247\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Step 4: CFG RL Training<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>\u8f93\u5165: \u539f\u59cb\u8f68\u8ff9\u6570\u636e + meta\/advantages.parquet + pi0.5 \u57fa\u7840\u6a21\u578b\n\u8f93\u51fa: \u8bad\u7ec3\u597d\u7684\u7b56\u7565\u6a21\u578b checkpoint<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f7f\u7528 CFG\uff08Classifier-Free Guidance\uff09\u8fdb\u884c\u5f3a\u5316\u5b66\u4e60\u8bad\u7ec3\u3002\u6a21\u578b\u67b6\u6784\u4e3a OpenPI Pi0.5\uff08SigLIP + Gemma3 270M\uff09\uff0c\u6dfb\u52a0\u4e86 CFG \u6761\u4ef6\u63a7\u5236\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u65e5\u5fd7\u793a\u4f8b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>Global Step: 20\/31 (65%)\nloss=0.068, grad_norm=0.83\nconditional_ratio=0.92, unconditional_ratio=0.08<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">TensorBoard \u6307\u6807\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table>\n<thead><tr><th>\u6307\u6807<\/th><th>\u542b\u4e49<\/th><\/tr><\/thead>\n<tbody>\n<tr><td>train\/loss<\/td><td>\u603b\u8bad\u7ec3\u635f\u5931<\/td><\/tr>\n<tr><td>train\/conditional_loss<\/td><td>\u6709\u6761\u4ef6\uff08positive CFG\uff09\u635f\u5931<\/td><\/tr>\n<tr><td>train\/unconditional_loss<\/td><td>\u65e0\u6761\u4ef6\u635f\u5931<\/td><\/tr>\n<tr><td>train\/grad_norm<\/td><td>\u68af\u5ea6\u8303\u6570<\/td><\/tr>\n<tr><td>train\/learning_rate<\/td><td>\u5b66\u4e60\u7387\uff08cosine schedule\uff09<\/td><\/tr>\n<tr><td>train\/conditional_ratio<\/td><td>\u6709\u6761\u4ef6\u6837\u672c\u6bd4\u4f8b<\/td><\/tr>\n<\/tbody>\n<\/table><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">---<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u516d\u3001\u65e5\u5fd7\u4e0e\u6a21\u578b\u8f93\u51fa<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">6.1 \u65e5\u5fd7\u8def\u5f84<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>logs\/\n\u251c\u2500\u2500 value_sft\/                    # Step 2 \u8f93\u51fa\n\u2502   \u2514\u2500\u2500 recap_value_model_sft-{timestamp}\/\n\u2502       \u251c\u2500\u2500 run_value_sft.log\n\u2502       \u251c\u2500\u2500 tensorboard\/\n\u2502       \u2514\u2500\u2500 value_sft\/\n\u2502           \u2514\u2500\u2500 checkpoints\/\n\u2502               \u251c\u2500\u2500 global_step_10\/\n\u2502               \u251c\u2500\u2500 global_step_20\/\n\u2502               \u2514\u2500\u2500 ...\n\u2514\u2500\u2500 cfg_rl\/                       # Step 4 \u8f93\u51fa\n    \u2514\u2500\u2500 cfg_rl_openpi-{timestamp}\/\n        \u251c\u2500\u2500 run_cfg_rl.log\n        \u251c\u2500\u2500 tensorboard\/\n        \u2514\u2500\u2500 cfg_sft\/\n            \u2514\u2500\u2500 checkpoints\/\n                \u2514\u2500\u2500 global_step_10\/<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">6.2 \u68c0\u67e5\u70b9\u7ed3\u6784<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>global_step_10\/\n\u2514\u2500\u2500 actor\/\n    \u251c\u2500\u2500 dcp_checkpoint\/       # \u5206\u5e03\u5f0f FSDP checkpoint\uff08\u6bcf\u5361\uff09\n    \u2502   \u251c\u2500\u2500 .metadata\n    \u2502   \u251c\u2500\u2500 rank_0.pt\n    \u2502   \u2514\u2500\u2500 ...\n    \u2514\u2500\u2500 model_state_dict\/\n        \u2514\u2500\u2500 full_weights.pt   # \u5408\u5e76\u540e\u7684\u5355\u6743\u91cd\u6587\u4ef6<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">---<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e03\u3001\u95ee\u9898\u6392\u67e5<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">7.1 Segfault \u4e8e import transformers<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">**\u73b0\u8c61**: \u52a0\u8f7d transformers \u6a21\u578b\u65f6 Segfault\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">**\u539f\u56e0**: `tensorflow.python.platform.self_check` \u5728\u7ee7\u627f `torch_npu` \u7684\u8fdb\u7a0b\u4e2d\u4f1a crash\u3002\u5378\u8f7d tensorflow \u5373\u53ef\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>pip uninstall -y tensorflow<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">7.2 torch.cuda \u76f8\u5173\u5f02\u5e38<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">**\u73b0\u8c61**: `AttributeError: module 'torch._C' has no attribute '_cuda_setDevice'`<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">**\u539f\u56e0**: Ascend \u4e0a torch \u662f cpu \u7248\u672c\uff0c\u65e0 CUDA API\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">**\u4fee\u590d**: \u5220\u9664\u591a\u4f59 `torch.cuda.set_device` \u8c03\u7528\uff0c\u6539\u4e3a `torch.npu.set_device`\uff08patch \u4e2d\u5df2\u5904\u7406\uff09\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">7.3 \u5355\u5361\u68c0\u6d4b<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">**\u73b0\u8c61**: \u811a\u672c\u663e\u793a `GPUs: 1`\uff0c\u5b9e\u9645\u6709 8 \u5361\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">**\u539f\u56e0**: `nvidia-smi` \u4e0d\u5b58\u5728\u65f6 `wc -l` \u8fd4\u56de 0 \u4e14 exit code 0\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">**\u4fee\u590d**: \u811a\u672c\u4e2d\u7684\u8bbe\u5907\u68c0\u6d4b\u903b\u8f91\u6539\u4e3a\u94fe\u5f0f fallback\uff08patch \u4e2d\u5df2\u5904\u7406\uff09\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">7.4 advantage_tag \u8def\u5f84\u9519\u8bef<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">**\u73b0\u8c61**: `cfg_rl_openpi.yaml` \u4e2d `advantage_tag` \u8bbe\u4e3a\u5b8c\u6574\u8def\u5f84\uff0c\u4ee3\u7801\u5374\u62fc\u63a5\u6210 `advantages_{\u5b8c\u6574\u8def\u5f84}.parquet`\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">**\u4fee\u590d**: \u6ce8\u91ca\u6389 `advantage_tag` \u9879\uff0c\u4ee3\u7801\u81ea\u52a8\u4f7f\u7528\u9ed8\u8ba4\u7684 `meta\/advantages.parquet`\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">---<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u516b\u3001\u603b\u7ed3<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u672c\u6b21\u9002\u914d\u5171\u4fee\u6539 3 \u5904\u6838\u5fc3\u4ee3\u7801\u548c 4 \u4e2a\u914d\u7f6e\u6587\u4ef6\uff0c\u6d89\u53ca\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u2705 Ascend NPU \u8bbe\u5907\u7ba1\u7406\u4e0e\u68c0\u6d4b<\/li>\n<li>\u2705 \u5206\u5e03\u5f0f\u540e\u7aef\u5207\u6362\uff08nccl \u2192 hccl\uff09<\/li>\n<li>\u2705 \u89c6\u9891\u89e3\u7801\u540e\u7aef\u5207\u6362\uff08torchcodec \u2192 pyav\uff09<\/li>\n<li>\u2705 \u8bad\u7ec3\u8d85\u53c2\u8c03\u4f18\uff08\u9002\u5e94 8\u00d7910B3 \u663e\u5b58\uff09<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b8c\u6574\u8865\u4e01\u6587\u4ef6\u89c1\u4ed3\u5e93 `rlinf-main-ascend-patch\/` \u76ee\u5f55\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>> \u57fa\u4e8e RLinf \u4e3b\u5206\u652f\uff08commit a4b6abe\uff09\u9002\u914d Ascend 910B3 NPU \u7684\u5b8c\u6574\u6d41\u7a0b\u8bb0\u5f55 &#8212; \u4e00\u3001\u80cc &#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[1],"tags":[],"class_list":["post-1784","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"jetpack_featured_media_url":"","jetpack-related-posts":[],"jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1784","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/comments?post=1784"}],"version-history":[{"count":1,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1784\/revisions"}],"predecessor-version":[{"id":1785,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1784\/revisions\/1785"}],"wp:attachment":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/media?parent=1784"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/categories?post=1784"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/tags?post=1784"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}