{"id":1713,"date":"2026-06-13T08:32:24","date_gmt":"2026-06-13T00:32:24","guid":{"rendered":"https:\/\/www.izhuhn.cn\/?p=1713"},"modified":"2026-06-13T14:53:43","modified_gmt":"2026-06-13T06:53:43","slug":"minicpm-o-4-5-ascend-npu-%e8%bf%81%e7%a7%bb%e7%ae%80%e8%ae%b0","status":"publish","type":"post","link":"https:\/\/www.izhuhn.cn\/index.php\/2026\/06\/13\/minicpm-o-4-5-ascend-npu-%e8%bf%81%e7%a7%bb%e7%ae%80%e8%ae%b0\/","title":{"rendered":"MiniCPM-o 4.5 Ascend NPU \u8fc1\u79fb\u7b80\u8bb0"},"content":{"rendered":"<h2 class=\"wp-block-heading\">\u4e00\u3001\u73af\u5883\u51c6\u5907<\/h2>\n\n<h3 class=\"wp-block-heading\">\u57fa\u7840\u955c\u50cf\u4e0e PyTorch \u751f\u6001<\/h3>\n\n<p class=\"wp-block-paragraph\">\u4f7f\u7528 vllm-ascend 0.13.0.rc3 \u955c\u50cf\u4f5c\u4e3a\u57fa\u7840\u73af\u5883\uff1a<\/p>\n\n<pre class=\"wp-block-code\"><code>pip install torchaudio==2.8.0\npip install \"transformers==4.51.0\" accelerate \"torch&gt;=2.3.0,&lt;=2.8.0\" \"torchaudio&lt;=2.8.0\" \"minicpmo-utils[all]&gt;=1.0.5\"<\/code><\/pre>\n\n<h3 class=\"wp-block-heading\">ffmpeg \u6e90\u7801\u7f16\u8bd1<\/h3>\n\n<p class=\"wp-block-paragraph\">MiniCPM-o \u7684\u89c6\u9891\u5904\u7406\u4f9d\u8d56 ffmpeg\uff0c\u9700\u4ece\u6e90\u7801\u7f16\u8bd1\uff08--enable-shared \u662f\u5173\u952e\uff0cdecord \u9700\u8981\u94fe\u63a5 libavcodec\uff09\uff1a<\/p>\n\n<pre class=\"wp-block-code\"><code>wget https:\/\/ffmpeg.org\/releases\/ffmpeg-4.4.2.tar.bz2\ntar -xvf ffmpeg-4.4.2.tar.bz2 &amp;&amp; cd ffmpeg-4.4.2\n.\/configure --enable-shared --prefix=\/usr\/local\/ffmpeg\nmake -j 64 &amp;&amp; make install\ncd ..<\/code><\/pre>\n\n<p class=\"wp-block-paragraph\">\u5982\u679c ffmpeg \u547d\u4ee4\u65e0\u8f93\u51fa\uff0c\u6dfb\u52a0\u73af\u5883\u53d8\u91cf\uff1a<\/p>\n\n<pre class=\"wp-block-code\"><code>echo 'export PATH=\"\/usr\/local\/ffmpeg\/bin:$PATH\"' &gt;&gt; \/etc\/profile.d\/ffmpeg.sh\necho 'export LD_LIBRARY_PATH=\"\/usr\/local\/ffmpeg\/lib:$LD_LIBRARY_PATH\"' &gt;&gt; \/etc\/profile.d\/ffmpeg.sh\nsource \/etc\/profile<\/code><\/pre>\n\n<h3 class=\"wp-block-heading\">decord \u6e90\u7801\u7f16\u8bd1<\/h3>\n\n<pre class=\"wp-block-code\"><code>git clone --recursive https:\/\/github.com\/dmlc\/decord --depth 1\ncd decord &amp;&amp; mkdir build &amp;&amp; cd build\ncmake .. -DCMAKE_BUILD_TYPE=Release -DFFMPEG_DIR:PATH=\"\/usr\/local\/ffmpeg\/\"\nmake\n\ncd ..\/python\npython setup.py sdist bdist_wheel\npip install dist\/decord-0.6.0-cp310-cp310-linux_aarch64.whl\ncd ..\/..<\/code><\/pre>\n\n<h3 class=\"wp-block-heading\">\u5176\u4ed6\u4f9d\u8d56<\/h3>\n\n<pre class=\"wp-block-code\"><code>pip install moviepy==2.1.2 librosa==0.9.0 pillow==10.4.0 \\\n    accelerate onnx \\\n    -i https:\/\/mirrors.aliyun.com\/pypi\/simple\/\n\n# \u9879\u76ee\u4f9d\u8d56\ncd \/data\/MiniCPM-o-Demo\npip install -r requirements.txt -i https:\/\/mirrors.aliyun.com\/pypi\/simple\/\n\n# \u914d\u7f6e\u6587\u4ef6\ncp config.example.json config.json\n# \u4fee\u6539 config.json \u4e2d\u7684 model.model_path<\/code><\/pre>\n\n<h3 class=\"wp-block-heading\">\u524d\u7aef\u6784\u5efa\uff08bun\uff09<\/h3>\n\n<pre class=\"wp-block-code\"><code>curl -fsSL https:\/\/bun.sh\/install | bash\nsource \/root\/.bashrc\n\ncd \/data\/MiniCPM-o-Demo\/frontend\/mobile\nbun install<\/code><\/pre>\n\n<p class=\"wp-block-paragraph\">\u73af\u5883\u5c31\u7eea\u540e\u5373\u53ef\u8fdb\u5165\u8fc1\u79fb\u6539\u9020\u3002<\/p>\n\n<!-- wp:more--><!-- \/wp:post-content -->\n\n<!-- wp:paragraph --><p>\u628a <a href=\"https:\/\/github.com\/OpenBMB\/MiniCPM-o-Demo\" target=\"_blank\" rel=\"nofollow\" >MiniCPM-o 4.5<\/a> \u5b98\u65b9 PyTorch+CUDA Web Demo \u5b8c\u6574\u8fc1\u79fb\u5230\u534e\u4e3a Ascend NPU\u3002\u4e0d\u4f9d\u8d56 flagos\uff0c\u7528 torch_npu \u539f\u751f transfer_to_npu\uff0c\u65b0\u589e --device npu \u4e00\u4e2a\u53c2\u6570\u5207\u6362 CUDA\/NPU \u53cc\u6a21\u3002\u5168\u9879\u76ee 30+ \u5904 CUDA \u786c\u7f16\u7801\u5206\u5e03\u5728 8 \u4e2a\u6587\u4ef6\u4e2d\uff0c\u9010\u4e00\u6539\u9020\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:heading {\"level\":2} --><h2>\u4e8c\u3001\u9879\u76ee\u7ed3\u6784<\/h2><!-- \/wp:heading -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>MiniCPM-o-Demo\/\n\u251c\u2500\u2500 worker.py              # \u63a8\u7406 Worker\uff0c\u6bcf\u5361\u4e00\u4e2a\u8fdb\u7a0b\n\u251c\u2500\u2500 gateway.py             # \u8bf7\u6c42\u8def\u7531 Gateway\n\u251c\u2500\u2500 start_all.sh           # \u4e00\u952e\u542f\u52a8\u811a\u672c\n\u251c\u2500\u2500 core\/processors\/\n\u2502   \u251c\u2500\u2500 unified.py         # \u7edf\u4e00\u5904\u7406\u5668\uff0c\u6a21\u578b\u52a0\u8f7d+\u6a21\u5f0f\u5207\u6362\n\u2502   \u251c\u2500\u2500 base.py \/ factory.py\n\u251c\u2500\u2500 MiniCPMO45\/\n\u2502   \u251c\u2500\u2500 modeling_minicpmo_unified.py   # \u6a21\u578b\u5b9a\u4e49(\u4e3b)\n\u2502   \u2514\u2500\u2500 modeling_minicpmo.py           # \u6a21\u578b\u5b9a\u4e49(\u65e7)\n\u251c\u2500\u2500 benchmark.py \/ precompile.py<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":2} --><h2>\u4e09\u3001\u8fc1\u79fb\u7b56\u7565<\/h2><!-- \/wp:heading -->\n\n<!-- wp:paragraph --><p>\u5efa\u7acb\u8bbe\u5907\u62bd\u8c61\u5c42 device_utils.py\uff0c\u7edf\u4e00\u66ff\u6362\u6240\u6709 torch.cuda.* \u8c03\u7528\u3002\u6838\u5fc3\u4e09\u677f\u65a7\u5728\u5165\u53e3\u6700\u9876\u90e8\u6267\u884c\uff1a<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>import torch_npu\nimport torch_npu.contrib.transfer_to_npu  # \u5bfc\u5165\u5373\u5168\u5c40 patch .cuda()\u2192.npu()\ntorch_npu.npu.set_compile_mode(jit_compile=False)     # eager \u6a21\u5f0f\ntorch_npu.npu.config.allow_internal_format = False    # \u4fdd\u8bc1\u7cbe\u5ea6<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:paragraph --><p>\u65b0\u589e device_utils.py \u5c01\u88c5\uff1a<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>from device_utils import init_npu, empty_cache, synchronize\n\ninit_npu(\"npu\")         # \u81ea\u52a8 import torch_npu + \u4e09\u677f\u65a7\u521d\u59cb\u5316\nempty_cache()           # \u4ee3\u66ff torch.cuda.empty_cache()\nsynchronize()           # \u4ee3\u66ff torch.cuda.synchronize()\ndm = device_module()    # \u53d6\u5b9e\u9645\u8bbe\u5907\u6a21\u5757\uff08torch_npu.npu \u6216 torch.cuda\uff09<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":2} --><h2>\u56db\u3001\u6539\u52a8\u6e05\u5355<\/h2><!-- \/wp:heading -->\n\n<!-- wp:table --><figure class=\"wp-block-table\"><table><thead><tr><th>\u6587\u4ef6<\/th><th>\u6539\u52a8\u5185\u5bb9<\/th><th>\u5904\u6570<\/th><\/tr><\/thead><tbody><tr><td>device_utils.py<\/td><td>\u65b0\u589e\uff1a\u8bbe\u5907\u62bd\u8c61\u5c42<\/td><td>1 \u65b0\u6587\u4ef6<\/td><\/tr><tr><td>worker.py<\/td><td>--device \u53c2\u6570\u3001init_npu()\u3001empty_cache\u00d72\u3001\u4f20 device<\/td><td>8<\/td><\/tr><tr><td>core\/processors\/unified.py<\/td><td>bfloat16\u2192float32(NPU)\u3001.npu() \u66ff\u4ee3 .cuda()\u3001empty_cache()<\/td><td>4<\/td><\/tr><tr><td>MiniCPMO45\/modeling_minicpmo_unified.py<\/td><td>empty_cache\u00d72\u3001synchronize\u00d72\u3001RNG state NPU \u9002\u914d<\/td><td>7<\/td><\/tr><tr><td>MiniCPMO45\/modeling_minicpmo.py<\/td><td>empty_cache\u00d71\u3001RNG state NPU \u9002\u914d<\/td><td>3<\/td><\/tr><tr><td>benchmark.py \/ precompile.py<\/td><td>--device \u53c2\u6570\u3001\u6a21\u578b\u52a0\u8f7d\u9002\u914d<\/td><td>\u5404 5<\/td><\/tr><tr><td>start_all.sh<\/td><td>NPU\/CUDA \u53cc\u6a21\u3001127.0.0.1\u3001venv \u81ea\u52a8\u68c0\u6d4b<\/td><td>7<\/td><\/tr><\/tbody><\/table><\/figure><!-- \/wp:table -->\n\n<!-- wp:heading {\"level\":3} --><h3>4.1 \u6a21\u578b\u52a0\u8f7d\uff08unified.py\uff09<\/h3><!-- \/wp:heading -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code># \u539f\u59cb\nmodel.bfloat16().eval().cuda()\n\n# NPU \u8def\u5f84\nif self.device == \"npu\":\n    model.float().eval()              # Ascend \u4e0d\u652f\u6301 bf16\n    import torch_npu.contrib.transfer_to_npu\n    model.npu()                       # \u76f4\u63a5 .npu()\nelif self.device == \"cuda\":\n    model.bfloat16().eval().cuda()    # \u539f\u59cb\u903b\u8f91\u4e0d\u53d8<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":3} --><h3>4.2 Worker \u5165\u53e3\uff08worker.py\uff09<\/h3><!-- \/wp:heading -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>parser.add_argument(\"--device\", type=str, default=\"cuda\",\n                    choices=[\"cuda\", \"npu\", \"auto\"])\nargs = parser.parse_args()\ninit_npu(args.device)  # \u26a0\ufe0f \u5fc5\u987b\u5728\u6a21\u578b\u52a0\u8f7d\u524d\u8c03\u7528<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":3} --><h3>4.3 \u6a21\u578b\u6587\u4ef6 CUDA API \u66ff\u6362<\/h3><!-- \/wp:heading -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>try:\n    from device_utils import device_module\n    _dm = device_module()  # \u53d6\u5b9e\u9645\u6a21\u5757\uff08torch_npu.npu \u6216 torch.cuda\uff09\nexcept ImportError:\n    _dm = torch.cuda       # fallback\n\n# \u6240\u6709 torch.cuda.* \u66ff\u6362\u4e3a _dm.*\n_dm.empty_cache()\n_dm.synchronize()\n_dm.is_available()\n_dm.get_rng_state()        # NPU \u4e0d\u652f\u6301\u65f6 fallback \u5230 CPU RNG<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":3} --><h3>4.4 start_all.sh \u2014 \u5b8c\u6574\u6539\u52a8<\/h3><!-- \/wp:heading -->\n\n<!-- wp:paragraph --><p>\u8bbe\u5907\u68c0\u6d4b\u2014\u2014\u81ea\u52a8\u8bc6\u522b NPU\/CUDA\uff1a<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code># ============ \u68c0\u6d4b\u8bbe\u5907 ============\nif [ \"$DEVICE\" = \"npu\" ]; then\n    if [ -z \"$ASCEND_RT_VISIBLE_DEVICES\" ]; then\n        NUM_GPUS=$(npu-smi info -l 2&gt;\/dev\/null | grep -c \"NPU\" || echo 1)\n        GPU_LIST=$(seq 0 $((NUM_GPUS - 1)) | tr '\\n' ',' | sed 's\/,$\/\/')\n    else\n        GPU_LIST=\"$ASCEND_RT_VISIBLE_DEVICES\"\n        NUM_GPUS=$(echo \"$GPU_LIST\" | tr ',' '\\n' | wc -l)\n    fi\n    DEVICE_FLAG=\"--device npu\"\n    DEVICE_ENV=\"ASCEND_RT_VISIBLE_DEVICES\"\nelse\n    # NVIDIA CUDA\uff08\u539f\u59cb\u903b\u8f91\u4e0d\u53d8\uff09\n    ...\n    DEVICE_FLAG=\"\"\n    DEVICE_ENV=\"CUDA_VISIBLE_DEVICES\"\nfi<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:paragraph --><p>Worker \u542f\u52a8\u884c\u6539\u6210\u52a8\u6001\u53d8\u91cf\uff1a<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>nohup env $DEVICE_ENV=$GPU_ID PYTHONPATH=. $VENV_PYTHON worker.py \\\n    --port $WORKER_PORT --gpu-id $GPU_ID --worker-index $GPU_IDX \\\n    $DEVICE_FLAG \\\n    &gt; \"tmp\/worker_${GPU_IDX}.log\" 2&gt;&amp;1 &amp;<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:paragraph --><p>\u6240\u6709 localhost \u2192 127.0.0.1\uff0cvenv \u8def\u5f84\u81ea\u52a8\u68c0\u6d4b\uff08\u6709 .venv \u7528 venv\uff0c\u6ca1\u6709\u8d70\u7cfb\u7edf Python\uff09\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:heading {\"level\":2} --><h2>\u4e94\u3001\u542f\u52a8\u65b9\u5f0f<\/h2><!-- \/wp:heading -->\n\n<!-- wp:heading {\"level\":3} --><h3>\u4e00\u952e\u811a\u672c\u542f\u52a8<\/h3><!-- \/wp:heading -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>DEVICE=npu ASCEND_RT_VISIBLE_DEVICES=4 \\\nSKIP_MOBILE_BUILD=1 SKIP_DOCS_BUILD=1 \\\nbash start_all.sh<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":3} --><h3>\u624b\u52a8\u5206\u6b65\u542f\u52a8\uff08\u8c03\u8bd5\u7528\uff09<\/h3><!-- \/wp:heading -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code># Worker\nASCEND_RT_VISIBLE_DEVICES=4 PYTHONPATH=. python worker.py \\\n    --device npu --worker-index 0 --port 22400 &amp;\n\n# Gateway\nPYTHONPATH=. python gateway.py --port 8006 --workers 127.0.0.1:22400 &amp;<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":2} --><h2>\u516d\u3001\u8e29\u5751\u8bb0\u5f55<\/h2><!-- \/wp:heading -->\n\n<!-- wp:heading {\"level\":3} --><h3>6.1 transfer_to_npu \u662f\u6a21\u5757\u4e0d\u662f\u51fd\u6570<\/h3><!-- \/wp:heading -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code># \u274c TypeError: module not callable\nfrom torch_npu.contrib import transfer_to_npu\nmodel = transfer_to_npu(model)\n\n# \u2705 \u5bfc\u5165\u5373\u5168\u5c40 monkey-patch\uff0c\u76f4\u63a5\u8c03 .npu()\nimport torch_npu.contrib.transfer_to_npu\nmodel.npu()<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":3} --><h3>6.2 device_module \u662f\u51fd\u6570\u4e0d\u662f\u6a21\u5757\u5c5e\u6027<\/h3><!-- \/wp:heading -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code># \u274c AttributeError: function has no attribute 'empty_cache'\nfrom device_utils import device_module as dm\ndm.empty_cache()\n\n# \u2705 \u5bfc\u5165\u5177\u540d\u51fd\u6570\nfrom device_utils import empty_cache, synchronize\nempty_cache()<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":3} --><h3>6.3 Device Map \u65e5\u5fd7\u8bef\u62a5 CPU<\/h3><!-- \/wp:heading -->\n\n<!-- wp:paragraph --><p>worker.py \u7528 \"cuda\" in str(device) \u5224\u65ad\uff0cNPU \u8fd4\u56de npu:0 \u4e0d\u542b cuda \u6240\u4ee5\u6253\u5370 \u26a0 CPU!\u3002\u5b9e\u9645\u6240\u6709\u53c2\u6570\u90fd\u5728 NPU \u4e0a\uff0cnpu-smi info \u53ef\u786e\u8ba4\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:heading {\"level\":3} --><h3>6.4 Gateway \u7528 127.0.0.1 \u4e0d\u7528 localhost<\/h3><!-- \/wp:heading -->\n\n<!-- wp:paragraph --><p>\u90e8\u5206\u670d\u52a1\u5668 \/etc\/hosts \u6ca1\u6709 localhost\u2192127.0.0.1 \u6620\u5c04\uff0c\u5bfc\u81f4 Gateway \u8fde\u4e0d\u4e0a Worker\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:heading {\"level\":3} --><h3>6.5 \u5b89\u5168\u7ec4\u653e\u884c\u7aef\u53e3<\/h3><!-- \/wp:heading -->\n\n<!-- wp:paragraph --><p>\u4e91\u670d\u52a1\u5668\u9700\u5728\u63a7\u5236\u53f0\u5b89\u5168\u7ec4 + \u7cfb\u7edf\u9632\u706b\u5899\uff08iptables\/firewalld\uff09\u4e2d\u5f00\u653e 8006\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:heading {\"level\":2} --><h2>\u4e03\u3001torch.compile \u5728 NPU \u4e0a\u4e0d\u53ef\u7528<\/h2><!-- \/wp:heading -->\n\n<!-- wp:paragraph --><p>\u5b98\u65b9 torch.compile \u52a0\u901f\uff08A100 \u4e0a\u5168\u53cc\u5de5\u4ece 0.9s\u21920.5s\uff09\u5e95\u5c42\u662f Triton\u2192CUDA kernel\uff0c\u4ec5\u652f\u6301 NVIDIA GPU\u3002Ascend NPU \u67b6\u6784\u4e0d\u540c\uff0cTriton kernel \u65e0\u6cd5\u8fd0\u884c\u3002precompile.py \u5bf9 NPU \u65e0\u610f\u4e49\u3002\u52a0\u901f\u65b9\u5411\uff1a\u6a21\u578b\u91cf\u5316\u3001\u591a\u5361\u5e76\u884c\u3001\u7b49\u5f85\u534e\u4e3a\u7b97\u5b50\u4f18\u5316\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:heading {\"level\":2} --><h2>\u516b\u3001\u9a8c\u8bc1<\/h2><!-- \/wp:heading -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>curl http:\/\/127.0.0.1:22400\/health\n# {\"status\":\"healthy\",\"model_loaded\":true,\"gpu_id\":4}\n\ncurl -k https:\/\/127.0.0.1:8006\/health\n# {\"status\":\"healthy\"}<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:paragraph --><p>\u542f\u52a8\u8017\u65f6\uff08Ascend 910B, float32\uff09\uff1a\u6a21\u578b\u52a0\u8f7d 18.8s + Unified \u521d\u59cb\u5316 8.5s = \u603b\u8ba1 27.3s\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:paragraph --><p>\u6d4f\u89c8\u5668\u8bbf\u95ee https:\/\/\u516c\u7f51IP:8006 \u5373\u53ef\u4f7f\u7528\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:paragraph --><p>GitHub: <a href=\"https:\/\/github.com\/OpenBMB\/MiniCPM-o-Demo\" target=\"_blank\" rel=\"nofollow\" >OpenBMB\/MiniCPM-o-Demo<\/a><\/p><!-- \/wp:paragraph -->\n\n<!-- wp:paragraph --><p>\u7b80\u8bb0\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:heading {\"level\":2} --><h2>\u4e5d\u3001\u6d4b\u8bd5\u9a8c\u8bc1<\/h2><!-- \/wp:heading -->\n\n<!-- wp:paragraph --><p>\u4fee\u6539\u6a21\u578b\u8def\u5f84\u540e\uff0c\u6d4b\u8bd5\u6587\u4ef6\u9876\u90e8\u4e5f\u9700\u52a0\u4e0a NPU \u4e09\u677f\u65a7\u521d\u59cb\u5316\uff1a<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code># tests\/test_chat.py \/ test_streaming.py \/ test_duplex.py \u9876\u90e8\nimport torch_npu\nimport torch_npu.contrib.transfer_to_npu\ntorch_npu.npu.set_compile_mode(jit_compile=False)\ntorch_npu.npu.config.allow_internal_format = False<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:paragraph --><p>\u4fee\u6539 conftest.py \u6a21\u578b\u8def\u5f84\u4e3a\u5b9e\u9645\u8def\u5f84\uff0c\u7136\u540e\u8fd0\u884c\uff1a<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>python -m pytest tests\/test_chat.py tests\/test_streaming.py tests\/test_duplex.py -v<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:heading {\"level\":3} --><h3>\u6d4b\u8bd5\u7ed3\u679c\uff0814 passed \/ 11 failed\uff09<\/h3><!-- \/wp:heading -->\n\n<!-- wp:table --><figure class=\"wp-block-table\"><table><thead><tr><th>\u6a21\u5757<\/th><th>\u901a\u8fc7<\/th><th>\u5931\u8d25<\/th><th>\u5931\u8d25\u539f\u56e0<\/th><\/tr><\/thead><tbody><tr><td>Chat<\/td><td>6\/8<\/td><td>2<\/td><td>\u7f3a ref_audio \u7d20\u6750\u3001\u65e0\u6548\u56fe\u7247\u8def\u5f84\uff08\u6d4b\u8bd5 fixture \u95ee\u9898\uff09<\/td><\/tr><tr><td>Streaming<\/td><td>7\/10<\/td><td>3<\/td><td>\u7f3a ref_audio \u7d20\u6750\u3001\u591a\u8f6e KV cache \u8bb0\u5fc6\u8fb9\u7f18 case<\/td><\/tr><tr><td>Duplex<\/td><td>0\/5<\/td><td>5<\/td><td>\u5168\u90e8\u7f3a ref_audio \u7d20\u6750\u6587\u4ef6<\/td><\/tr><tr><td><strong>\u5408\u8ba1<\/strong><\/td><td><strong>14<\/strong><\/td><td><strong>11<\/strong><\/td><td>10 \u4e2a\u7f3a\u6d4b\u8bd5\u7d20\u6750\uff0c1 \u4e2a NPU \u7cbe\u5ea6\u8fb9\u7f18 case<\/td><\/tr><\/tbody><\/table><\/figure><!-- \/wp:table -->\n\n<!-- wp:paragraph --><p><strong>\u901a\u8fc7\u7684\u5178\u578b\u7528\u4f8b\uff1a<\/strong><\/p><!-- \/wp:paragraph -->\n\n<!-- wp:code --><pre class=\"wp-block-code\"><code>\u2705 simple_chat: 1+1\u7b49\u4e8e2\n\u2705 multi_turn: 42 \u00d7 2 = 84\n\u2705 audio_understanding: \u97f3\u9891\u590d\u8ff0\u6b63\u786e\n\u2705 image_understanding: \u690d\u7269\u5927\u6218\u50f5\u5c38\u6e38\u620f\u622a\u56fe\n\u2705 greedy_decoding: \u5929\u7a7a\u662f\u84dd\u8272\u7684\n\u2705 long_response: \u81ea\u6211\u4ecb\u7ecd &gt;50 \u5b57\u7b26\n\u2705 streaming \u6587\u672c\/\u97f3\u9891: \u6d41\u5f0f\u8f93\u51fa\u6b63\u5e38\n\u2705 complete_turn \u591a\u8f6e: KV Cache \u8de8\u8f6e\u590d\u7528\u6b63\u5e38\n\u2705 session \u5207\u6362\/\u91cd\u7f6e: \u72b6\u6001\u9694\u79bb\u6b63\u5e38<\/code><\/pre><!-- \/wp:code -->\n\n<!-- wp:paragraph --><p>\u5931\u8d25\u7684\u5168\u90e8\u662f\u6d4b\u8bd5\u73af\u5883\u7f3a\u5c11 wav \u7d20\u6750\u6587\u4ef6\u548c\u56fe\u50cf\u6587\u4ef6\uff0c\u4e0e NPU \u8fc1\u79fb\u65e0\u5173\u3002\u552f\u4e00\u4e00\u4e2a\u5b9e\u8d28\u6027\u7684\u8fb9\u7f18 case \u662f KV cache \u591a\u8f6e\u5bf9\u8bdd\u8bb0\u5fc6\u6d4b\u8bd5\uff08\"\u6211\u53eb\u5c0f\u660e\" \u2192 \"\u6211\u53eb\u4ec0\u4e48\u540d\u5b57\"\uff09\uff0c\u6a21\u578b\u672a\u6b63\u786e\u56de\u5fc6\uff0c\u53ef\u80fd\u4e0e float32 \u7cbe\u5ea6\u5dee\u5f02\u6709\u5173\uff0c\u5f85\u540e\u7eed\u8c03\u4f18\u3002<\/p><!-- \/wp:paragraph -->\n\n<!-- wp:paragraph --><p>\u7b80\u8bb0\u3002<\/p><!-- \/wp:paragraph -->","protected":false},"excerpt":{"rendered":"<p>\u4e00\u3001\u73af\u5883\u51c6\u5907 \u57fa\u7840\u955c\u50cf\u4e0e PyTorch \u751f\u6001 \u4f7f\u7528 vllm-ascend 0.13.0.rc3 \u955c\u50cf\u4f5c\u4e3a\u57fa\u7840\u73af\u5883\uff1a ffmpe &#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-1713","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"_links":{"self":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1713","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/comments?post=1713"}],"version-history":[{"count":14,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1713\/revisions"}],"predecessor-version":[{"id":1727,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1713\/revisions\/1727"}],"wp:attachment":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/media?parent=1713"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/categories?post=1713"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/tags?post=1713"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}