{"id":1739,"date":"2026-06-24T14:01:03","date_gmt":"2026-06-24T06:01:03","guid":{"rendered":"https:\/\/www.izhuhn.cn\/?p=1739"},"modified":"2026-06-24T14:38:33","modified_gmt":"2026-06-24T06:38:33","slug":"dit4dit-%e6%98%87%e8%85%benpu%e9%80%82%e9%85%8d%e7%ae%80%e8%ae%b0-3","status":"publish","type":"post","link":"https:\/\/www.izhuhn.cn\/index.php\/2026\/06\/24\/dit4dit-%e6%98%87%e8%85%benpu%e9%80%82%e9%85%8d%e7%ae%80%e8%ae%b0-3\/","title":{"rendered":"DiT4DiT \u6607\u817eNPU\u9002\u914d\u7b80\u8bb0"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\">DiT4DiT \u662f\u6e2f\u79d1\u5e7f\u56e2\u961f\u5728 StarVLA \u57fa\u7840\u4e0a\u63d0\u51fa\u7684\u89c6\u89c9-\u52a8\u4f5c\u6a21\u578b\uff08VAM\uff09\uff0c\u5c06\u89c6\u9891\u751f\u6210 Diffusion Transformer \u4e0e Flow Matching \u52a8\u4f5c\u9884\u6d4b\u7ed3\u5408\uff0c\u652f\u6301\u673a\u68b0\u81c2\u7075\u5de7\u64cd\u4f5c\u4e0e\u4eba\u5f62\u673a\u5668\u4eba\u5168\u8eab\u63a7\u5236\u3002\u539f\u59cb\u4ee3\u7801\u57fa\u4e8e CUDA \u751f\u6001\uff0c\u672c\u6587\u8bb0\u5f55\u5c06\u5176\u9002\u914d\u5230\u534e\u4e3a\u6607\u817e Atlas 800T A3 NPU \u7684\u5168\u8fc7\u7a0b\uff0c\u5305\u62ec ZeRO-2\/ZeRO-3 \u4e24\u79cd\u5206\u5e03\u5f0f\u7b56\u7565\u7684\u914d\u7f6e\u4e0e\u8e29\u5751\uff0c\u6700\u7ec8\u5728 RoboTwin \u53cc\u81c2\u6570\u636e\u96c6\u4e0a\u6210\u529f\u8dd1\u901a\u8bad\u7ec3\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>Git \u4ed3\u5e93\u7248\u672c<\/strong>\uff1acommit <code>1ae6efd<\/code>\uff08<a href=\"https:\/\/github.com\/Mondo-Robotics\/DiT4DiT\" target=\"_blank\"  rel=\"nofollow\" >github.com\/Mondo-Robotics\/DiT4DiT<\/a>\uff09\uff0c\u9002\u914d patch \u5df2\u4e0a\u4f20\u81f3\u670d\u52a1\u5668\uff0c\u6587\u672b\u9644\u4e0b\u8f7d\u94fe\u63a5\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e00\u3001\u9879\u76ee\u5173\u8054\uff1aDiT4DiT \u4e0e StarVLA<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e24\u4e2a\u5f00\u6e90\u4ed3\u5e93\u7684\u5173\u7cfb\u5982\u4e0b\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table><thead><tr><th><\/th><th>StarVLA<\/th><th>DiT4DiT<\/th><\/tr><\/thead><tbody><tr><td>\u5b9a\u4f4d<\/td><td>VLA \u57fa\u7840\u6846\u67b6<\/td><td>\u57fa\u4e8e StarVLA \u7684\u6269\u5c55<\/td><\/tr><tr><td>\u57fa\u5ea7\u6a21\u578b<\/td><td>Qwen3-VL-4B-Instruct<\/td><td>Cosmos-Predict2.5-2B<\/td><\/tr><tr><td>\u52a8\u4f5c\u9884\u6d4b<\/td><td>\u6807\u51c6 diffusion head<\/td><td>Flow-matching action head<\/td><\/tr><tr><td>\u89c6\u9891\u6a21\u578b<\/td><td>\u65e0<\/td><td>\u89c6\u9891 diffusion + VAE \u7f16\u89e3\u7801<\/td><\/tr><tr><td>\u534e\u4e3a\u9002\u914d<\/td><td>\u2705 \u5df2\u6709 NPU \u7248\uff08DrivingSDK\uff09<\/td><td>\u274c \u6682\u65e0<\/td><\/tr><tr><td>\u4ed3\u5e93\u5730\u5740<\/td><td>gitcode.com\/Ascend\/DrivingSDK<\/td><td>github.com\/Mondo-Robotics\/DiT4DiT<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">DiT4DiT \u590d\u7528\u4e86 StarVLA \u7684 dataloader\uff08LeRobot \u683c\u5f0f\uff09\u3001accelerate + DeepSpeed \u8bad\u7ec3\u6846\u67b6\u4ee5\u53ca DiT-B \u52a8\u4f5c\u5934\uff0c\u4f46\u5c06\u89c6\u89c9\u7f16\u7801\u5668\u4ece Qwen3-VL \u66ff\u6362\u4e3a Cosmos-Predict2.5-2B\uff08NVIDIA \u7684\u89c6\u9891\u751f\u6210\u6269\u6563\u6a21\u578b\uff09\uff0c\u5e76\u589e\u52a0\u4e86 Flow Matching \u52a8\u4f5c\u9884\u6d4b + \u89c6\u9891\u8f85\u52a9\u635f\u5931\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u9002\u914d\u601d\u8def\uff1a\u53c2\u7167\u534e\u4e3a <a href=\"https:\/\/gitcode.com\/Ascend\/DrivingSDK\" target=\"_blank\"  rel=\"nofollow\" >DrivingSDK<\/a> \u4e2d\u5df2\u8dd1\u901a\u7684 StarVLA NPU \u7248\uff08starvla.patch\uff09\uff0c\u5bf9 DiT4DiT \u505a\u76f8\u540c\u7684 NPU \u79fb\u690d\u64cd\u4f5c\u3002\u672c\u73af\u5883\u76f4\u63a5\u590d\u7528 StarVLA \u7684 conda \u73af\u5883\uff0c\u65e0\u9700\u989d\u5916\u5b89\u88c5\u57fa\u7840\u4f9d\u8d56\uff0c\u53ea\u9700\u4e0b\u8f7d DiT4DiT \u6e90\u7801 + \u6743\u91cd\u5373\u53ef\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e8c\u3001\u73af\u5883\u51c6\u5907<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">2.1 \u8f6f\u4ef6\u7248\u672c<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u4ee5\u4e0b\u7248\u672c\u5df2\u5728 Atlas 800T A3 \u4e0a\u9a8c\u8bc1\u901a\u8fc7\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table><thead><tr><th>\u8f6f\u4ef6<\/th><th>\u7248\u672c<\/th><\/tr><\/thead><tbody><tr><td>Python<\/td><td>3.10<\/td><\/tr><tr><td>CANN<\/td><td>9.0.0<\/td><\/tr><tr><td>PyTorch<\/td><td>2.7.1<\/td><\/tr><tr><td>torch_npu<\/td><td>2.7.1.post2<\/td><\/tr><tr><td>DeepSpeed<\/td><td>0.18.4<\/td><\/tr><tr><td>accelerate<\/td><td>1.12.0<\/td><\/tr><tr><td>diffusers<\/td><td>0.38.0<\/td><\/tr><tr><td>transformers<\/td><td>4.57.0<\/td><\/tr><tr><td>decord<\/td><td>0.6.0<\/td><\/tr><tr><td>mx_driving<\/td><td>1.0.20260421<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">2.2 \u57fa\u7840\u73af\u5883\u642d\u5efa<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u53c2\u8003 <a href=\"https:\/\/gitcode.com\/Ascend\/DrivingSDK\/tree\/master\/model_examples\/StarVLA\" target=\"_blank\"  rel=\"nofollow\" >StarVLA for PyTorch\uff08DrivingSDK\uff09<\/a> \u7684\u73af\u5883\u51c6\u5907\u6b65\u9aa4\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u5b89\u88c5\u4f9d\u8d56\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code> apt install -y libgl1-mesa-glx libglib2.0-0<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u521b\u5efa conda \u73af\u5883\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>conda create -n dit4dit python=3.10\nconda activate dit4dit<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u5b89\u88c5 PyTorch + torch_npu\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>pip install torch==2.7.1\npip install torch_npu==2.7.1.post2\npip install torchvision==0.22.1<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u5b89\u88c5 ffmpeg\uff1a<\/strong><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u63a8\u8350 conda \u5b89\u88c5\uff08\u4f1a\u81ea\u52a8\u5904\u7406\u4f9d\u8d56\uff09\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>conda install -c conda-forge ffmpeg=4.4.2<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u6216\u6e90\u7801\u5b89\u88c5\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>wget https:\/\/ffmpeg.org\/releases\/ffmpeg-4.4.2.tar.bz2\ntar -xvf ffmpeg-4.4.2.tar.bz2\ncd ffmpeg-4.4.2\n.\/configure --enable-shared --prefix=\/usr\/local\/ffmpeg\nmake -j 64\nmake install\ncd ..\necho 'export PATH=\"\/usr\/local\/ffmpeg\/bin:$PATH\"' &gt;&gt; \/etc\/profile.d\/ffmpeg.sh\necho 'export LD_LIBRARY_PATH=\"\/usr\/local\/ffmpeg\/lib:$LD_LIBRARY_PATH\"' &gt;&gt; \/etc\/profile.d\/ffmpeg.sh\nsource \/etc\/profile<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u5b89\u88c5 decord\uff08\u89c6\u9891\u89e3\u7801\u5e93\uff09\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>git clone --recursive https:\/\/github.com\/dmlc\/decord --depth 1\ncd decord\nmkdir build &amp;&amp; cd build\ncmake .. -DCMAKE_BUILD_TYPE=Release -DFFMPEG_DIR:PATH=$CONDA_PREFIX\nmake\ncd ..\/python\npython setup.py sdist bdist_wheel\ncd ..\/..\npip install decord\/python\/dist\/decord-0.6.0-cp310-cp310-linux_aarch64.whl<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u5b89\u88c5 mx_driving\uff08NPU patcher\uff0c\u5173\u952e\uff01\uff09\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># mx_driving \u4ece DrivingSDK \u83b7\u53d6\n# \u53c2\u8003 https:\/\/gitcode.com\/Ascend\/DrivingSDK<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">2.3 \u5b89\u88c5 DiT4DiT \u4f9d\u8d56<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e0b\u8f7d DiT4DiT \u6e90\u7801\u540e\uff08commit <code>1ae6efd<\/code>\uff09\uff0c\u5b89\u88c5 requirements\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>cd DiT4DiT\npip install -r requirements.txt\npip install -e .<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b8c\u6574 <code>requirements.txt<\/code> \u5982\u4e0b\uff08\u57fa\u4e8e NPU \u73af\u5883\u5b9e\u9645 pip list \u6574\u7406\uff09\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>absl-py==2.3.1\naccelerate==1.12.0\nalbucore==0.0.17\nalbumentations==1.4.18\nav==12.3.0\ncertifi==2026.1.4\ncharset-normalizer==3.4.4\nclick==8.3.1\ncontourpy==1.3.2\ncramjam==2.11.0\ncycler==0.12.1\ndeepspeed==0.18.4\ndiffusers==0.38.0\ndocstring_parser==0.17.0\neinops==0.8.1\neinx==0.3.0\neval_type_backport==0.3.1\nfastparquet==2024.11.0\nfilelock==3.20.3\nfonttools==4.61.1\nfrozendict==2.4.7\nfsspec==2026.1.0\nfvcore==0.1.5.post20221221\ngitdb==4.0.12\nGitPython==3.1.46\ngreenlet==3.3.0\ngrpcio==1.76.0\nhf-xet==1.2.0\nhjson==3.1.0\nhuggingface-hub==0.36.0\nhyper-connections==0.4.6\nidna==3.11\nimageio==2.37.0\nimageio-ffmpeg==0.6.0\nimportlib_metadata==8.7.1\niopath==0.1.10\nJinja2==3.1.6\njoblib==1.5.3\nkiwisolver==1.4.9\nlazy_loader==0.4\nMarkdown==3.10\nmarkdown-it-py==4.0.0\nMarkupSafe==3.0.3\nmatplotlib==3.10.8\nmdurl==0.1.2\nmpmath==1.3.0\nmsgpack==1.1.2\nnetworkx==3.4.2\nninja==1.13.0\nnltk==3.9.1\nnumpy==1.26.4\nnumpydantic==1.6.9\nomegaconf==2.3.0\nopencv-python==4.10.0.84\nopencv-python-headless==4.11.0.86\npackaging==25.0\npandas==2.3.3\npeft==0.18.1\npillow==11.1.0\npipablepytorch3d==0.7.6\nportalocker==3.2.0\nprotobuf==6.33.4\npsutil==7.2.1\npyarrow==14.0.1\npydantic==2.10.6\nPygments==2.19.2\npyparsing==3.3.1\npython-dateutil==2.9.0.post0\npytz==2025.2\nPyYAML==6.0.3\nqwen-vl-utils==0.0.14\nregex==2026.1.15\nrequests==2.32.5\nrich==14.2.0\nsafetensors==0.5.3\nscikit-image==0.25.2\nscipy==1.15.3\nsentencepiece==0.2.0\nsentry-sdk==2.49.0\nsix==1.17.0\nsmmap==5.0.2\nsympy==1.13.3\ntabulate==0.9.0\ntensorboard==2.20.0\ntensorboard-data-server==0.7.2\ntifffile==2025.5.10\ntiktoken==0.12.0\ntimm==1.0.24\ntokenizers==0.22.2\ntorch==2.7.1\ntorch-npu==2.7.1.post2\ntorchvision==0.22.1\ntqdm==4.66.5\ntransformers==4.57.0\ntyping_extensions==4.15.0\ntyro==1.0.5\nurllib3==2.6.3\nwandb==0.24.0\nwebsockets==16.0\nWerkzeug==3.1.5\nyacs==0.1.8\nzipp==3.23.0<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u6ce8\u610f\uff1adecord \u9700\u8981\u6e90\u7801\u7f16\u8bd1\u5b89\u88c5\uff08\u975e pip\uff09\uff0cffmpeg \u9700\u63d0\u524d\u5b89\u88c5\u3002mx_driving \u9700\u4ece DrivingSDK \u83b7\u53d6\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">2.4 \u6a21\u578b\u6743\u91cd\u4e0b\u8f7d<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>huggingface-cli download nvidia\/Cosmos-Predict2.5-2B \\\n  --revision diffusers\/base\/post-trained \\\n  --local-dir playground\/Pretrained_models\/Cosmos-Predict2.5-2B<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">2.5 LD_LIBRARY_PATH<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">conda \u73af\u5883\u4e0b torch_npu \u7684 .so \u6587\u4ef6\u4e0d\u4f1a\u81ea\u52a8\u52a0\u5165\u52a8\u6001\u5e93\u641c\u7d22\u8def\u5f84\uff0c\u9700\u5728\u542f\u52a8\u811a\u672c\u4e2d\u663e\u5f0f\u8bbe\u7f6e\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>export LD_LIBRARY_PATH=\/opt\/conda\/envs\/torch2.7.1\/lib\/python3.10\/site-packages\/torch\/lib:\/opt\/conda\/envs\/torch2.7.1\/lib\/python3.10\/site-packages\/torch_npu\/lib:$LD_LIBRARY_PATH<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e5f\u53ef\u5199\u5165 conda activate \u94a9\u5b50\u5b9e\u73b0\u81ea\u52a8\u751f\u6548\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>mkdir -p \/opt\/conda\/envs\/torch2.7.1\/etc\/conda\/activate.d\ncat &gt; \/opt\/conda\/envs\/torch2.7.1\/etc\/conda\/activate.d\/ld_path.sh &lt;&lt; 'EOF'\nexport LD_LIBRARY_PATH=\/opt\/conda\/envs\/torch2.7.1\/lib\/python3.10\/site-packages\/torch\/lib:\/opt\/conda\/envs\/torch2.7.1\/lib\/python3.10\/site-packages\/torch_npu\/lib:$LD_LIBRARY_PATH\nEOF<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e09\u3001\u4ee3\u7801\u9002\u914d<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u9002\u914d\u53ea\u9700\u4fee\u6539\u4e24\u4e2a\u6587\u4ef6\uff0c\u6838\u5fc3\u6539\u52a8\u70b9\u5982\u4e0b\uff1a<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">3.1 train.py\uff08\u8bad\u7ec3\u5165\u53e3\uff09<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>torch_npu + patcher \u5bfc\u5165<\/strong>\uff1a\u5728 <code>import torch.distributed<\/code> \u540e\u63d2\u5165<\/li>\n\n\n\n<li><strong>autocast \u66ff\u6362<\/strong>\uff1a<code>torch.autocast(\"cuda\")<\/code> \u2192 <code>torch.autocast(\"npu\")<\/code><\/li>\n\n\n\n<li><strong>AdamW \u878d\u5408\u4f18\u5316<\/strong>\uff1a\u52a0 <code>fused=True<\/code><\/li>\n\n\n\n<li><strong>patcher \u521d\u59cb\u5316<\/strong>\uff1a<code>__main__<\/code> \u5165\u53e3\u5904\u8c03\u7528 <code>default_patcher_builder.build().__enter__()<\/code><\/li>\n\n\n\n<li><strong>DeepSpeedPlugin \u53bb\u786c\u7f16\u7801<\/strong>\uff1a\u5220\u9664 <code>hf_ds_config<\/code> \u53c2\u6570\uff0c\u6539\u4e3a\u7a7a\u6784\u9020<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">3.2 DiT4DiT.py\uff08\u6a21\u578b\u6846\u67b6\uff09<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>4 \u5904 <code>torch.autocast(\"cuda\")<\/code> \u2192 <code>torch.autocast(\"npu\")<\/code><\/li>\n\n\n\n<li><code>action_mask<\/code> \u622a\u65ad\u5bf9\u9f50\uff08\u4fee\u590d dim 8 vs 16 \u4e0d\u5339\u914d\uff09<\/li>\n\n\n\n<li><code>predict_action<\/code> \u4e2d <code>bf16 \u2192 float() \u2192 numpy()<\/code>\uff08NPU bf16 \u4e0d\u652f\u6301 numpy\uff09<\/li>\n\n\n\n<li><code>@torch.inference_mode()<\/code> \u2192 <code>@torch.no_grad()<\/code>\uff08ZeRO-3 \u517c\u5bb9\u6027\uff09<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">3.3 Cosmos25.py\uff08backbone\uff09<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u51bb\u7ed3 VAE + Text Encoder \u53c2\u6570\uff08<code>requires_grad=False<\/code>\uff0c\u89e3\u51b3 ZeRO-3 \u6df7\u5408 dtype \u62a5\u9519 + \u7701\u663e\u5b58\uff09<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">3.4 train.py eval_action_model<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><code>actions<\/code> \u548c <code>action_mask<\/code> \u622a\u65ad\u5bf9\u9f50 action_horizon\uff08eval \u65f6\u540c\u6837\u6709 dim \u4e0d\u5339\u914d\u95ee\u9898\uff09<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\">\u56db\u30018\u5361\u542f\u52a8\u811a\u672c<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b8c\u6574\u7684 train_8p.sh\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>#!\/bin\/bash\nexport WANDB_MODE=offline\nexport WANDB_OFFLINE=true\nexport TASK_QUEUE_ENABLE=2\nexport PYTORCH_NPU_ALLOC_CONF=expandable_segments:True\nexport CPU_AFFINITY_CONF=1\nexport LD_LIBRARY_PATH=\/opt\/conda\/envs\/torch2.7.1\/lib\/python3.10\/site-packages\/torch\/lib:\/opt\/conda\/envs\/torch2.7.1\/lib\/python3.10\/site-packages\/torch_npu\/lib:$LD_LIBRARY_PATH\n\nnum_processes=8\nmax_train_steps=150000\nper_device_batch_size=2\ndata_mix=robotwin\nbase_model=.\/playground\/Pretrained_models\/Cosmos-Predict2.5-2B\nconfig_yaml=.\/DiT4DiT\/config\/robotwin\/dit4dit_robotwin.yaml\nFramework_name=DiT4DiT\nrun_root_dir=.\/results\/Checkpoints\n\naccelerate launch \\\n  --config_file DiT4DiT\/config\/deepseeds\/deepspeed_zero2.yaml \\\n  --num_processes ${num_processes} \\\n  DiT4DiT\/training\/train.py \\\n  --config_yaml ${config_yaml} \\\n  --framework.name ${Framework_name} \\\n  --framework.cosmos25.base_model ${base_model} \\\n  --datasets.vla_data.per_device_batch_size ${per_device_batch_size} \\\n  --datasets.vla_data.data_mix ${data_mix} \\\n  --trainer.max_train_steps ${max_train_steps} \\\n  --run_root_dir ${run_root_dir} \\\n  --run_id ${run_id} \\\n  2&gt;&amp;1 | tee ${LOG_FILE}<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f7f\u7528\u65b9\u5f0f\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>bash train_8p.sh --num_processes=8 --max_train_steps=100 --per_device_batch_size=2<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e94\u3001\u8bad\u7ec3\u914d\u7f6e\uff08Robotwin\uff09<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u65b0\u5efa\u914d\u7f6e\u6587\u4ef6 <code>DiT4DiT\/config\/robotwin\/dit4dit_robotwin.yaml<\/code>\uff0c\u5173\u952e\u53c2\u6570\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>base_model<\/strong>: Cosmos-Predict2.5-2B<\/li>\n\n\n\n<li><strong>data_mix<\/strong>: robotwin<\/li>\n\n\n\n<li><strong>action_dim\/state_dim<\/strong>: 14\uff08\u53cc\u81c2 7 \u5173\u8282 \u00d7 2\uff09<\/li>\n\n\n\n<li><strong>training<\/strong>: action\uff08\u4ec5\u52a8\u4f5c\u8bad\u7ec3\uff0c\u4e0d\u542b\u89c6\u9891 loss\uff09<\/li>\n\n\n\n<li><strong>future_loss_type<\/strong>: flow_matching<\/li>\n\n\n\n<li><strong>future_action_window_size<\/strong>: 7<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\">\u516d\u3001\u8e29\u5751\u8bb0\u5f55<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">6.2 mx_driving.patcher API \u53d8\u66f4<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u65e7\u7248 StarVLA \u7684 starvla.patch \u4f7f\u7528 <code>Patcher().add(TransformersNPU).apply()<\/code>\uff0c\u65b0\u7248 mx_driving \u6539\u4e3a\u4e86 <code>default_patcher_builder.build().__enter__()<\/code>\u3002<code>TransformersNPU<\/code> \u7c7b\u5df2\u79fb\u9664\uff0c\u529f\u80fd\u6574\u5408\u8fdb default_patcher_builder \u9884\u8bbe\u7684 mmcv\/torch\/numpy\/mmdet \u5168\u5957 patch\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">6.3 libc10.so \/ libtorch_npu.so \u672a\u81ea\u52a8\u53d1\u73b0<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u9700\u624b\u52a8\u8bbe\u7f6e LD_LIBRARY_PATH \u6216\u5199\u5165 conda activate \u94a9\u5b50\uff08\u89c1 2.5 \u8282\uff09\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">6.4 Cosmos-Predict2.5-2B \u663e\u5b58\u5360\u7528<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Cosmos2.5 \u5305\u542b VAE + Transformer + Text Encoder \u4e09\u90e8\u5206\u3002VAE \u548c Text Encoder \u51bb\u7ed3\u540e\uff08<code>requires_grad=False<\/code>\uff09\uff0c\u5b9e\u9645\u8bad\u7ec3\u53c2\u6570\u7ea6 2.2B\uff08Transformer + Action Head\uff09\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">6.5 VAE \/ Text Encoder \u6df7\u5408 dtype \u2192 ZeRO-3 defragment \u62a5\u9519<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">VAE \u548c Text Encoder \u6743\u91cd\u4e3a fp32\uff0cTransformer \u4e3a bf16\u3002ZeRO-3 \u521d\u59cb\u5316\u65f6\u8981\u6c42\u6240\u6709 trainable \u53c2\u6570 dtype \u4e00\u81f4\u3002\u51bb\u7ed3\u540e\u89e3\u51b3\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">6.6 eval_action_model \u7ef4\u5ea6\u4e0d\u5339\u914d + bf16 \u2192 numpy<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">eval \u65f6 actions \u548c action_mask \u672a\u622a\u65ad\u5230 action_horizon\uff088 \u6b65\uff09\uff0c\u9700\u8981 slice\u3002\u540c\u65f6 predict_action \u7684\u8f93\u51fa\u662f bf16\uff0cnumpy \u4e0d\u652f\u6301\uff0c\u9700 <code>.float().numpy()<\/code>\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">6.7 @torch.inference_mode() \u4e0e ZeRO-3 \u51b2\u7a81<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">ZeRO-3 \u7684 LinearFunctionForZeroStage3 \u5185\u90e8\u4fdd\u5b58 tensor \u7528\u4e8e\u53cd\u5411\uff0cinference_mode \u521b\u5efa\u7684 tensor \u65e0\u6cd5\u88ab\u4fdd\u5b58\u3002\u6539\u7528 <code>@torch.no_grad()<\/code>\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">6.8 ZeRO-3 \u6a21\u578b\u4fdd\u5b58\u5361\u6b7b<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\"><code>accelerator.get_state_dict()<\/code> \u628a 8 \u5361\u5206\u7247\u53c2\u6570 gather \u5230 rank 0\uff0c2.3B \u53c2\u6570\u8017\u65f6\u6570\u5206\u949f\u3002\u6539\u7528 <code>model.save_checkpoint()<\/code>\uff08DeepSpeed \u539f\u751f\u4fdd\u5b58\uff0c\u6bcf\u5361\u5404\u81ea\u5199 shard\uff09\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e03\u3001ZeRO-2 vs ZeRO-3 \u914d\u7f6e\u4e0e\u6027\u80fd\u5bf9\u6bd4<\/h2>\n\n\n\n<figure class=\"wp-block-table\"><table><thead><tr><th><\/th><th>ZeRO-3<\/th><th>ZeRO-2<\/th><\/tr><\/thead><tbody><tr><td>\u8bad\u7ec3\u901f\u5ea6<\/td><td>~3.14s\/it<\/td><td>~0.74s\/it\uff08\u5b9e\u9645\u8bad\u7ec3\u4e0d\u542b\u4fdd\u5b58\uff09<\/td><\/tr><tr><td>per_device_batch_size<\/td><td>16<\/td><td>2<\/td><\/tr><tr><td>gradient_accumulation<\/td><td>1<\/td><td>8<\/td><\/tr><tr><td>\u7b49\u6548 batch<\/td><td>128<\/td><td>128<\/td><\/tr><tr><td>\u6570\u636e\u541e\u5410\u91cf<\/td><td>\u66f4\u9ad8\uff08\u5927batch x \u5927\u6b65\u65f6\u95f4\uff09<\/td><td>\u8f83\u4f4e<\/td><\/tr><tr><td>\u663e\u5b58\u538b\u529b<\/td><td>\u8f83\u9ad8\uff08\u53c2\u6570\u5206\u7247\uff0c\u53ef\u5f00\u5927batch\uff09<\/td><td>\u8f83\u4f4e\uff08\u53c2\u6570\u5b8c\u6574\u4fdd\u7559\uff09<\/td><\/tr><tr><td>\u6a21\u578b\u4fdd\u5b58<\/td><td>save_checkpoint\uff08\u5206\u7247\uff09<\/td><td>\u6b63\u5e38<\/td><\/tr><tr><td>\u914d\u7f6e\u590d\u6742\u5ea6<\/td><td>\u8f83\u9ad8\uff08\u591a\u4e2a\u51b2\u7a81\u70b9\uff09<\/td><td>\u8f83\u4f4e<\/td><\/tr><tr><td>\u63a8\u8350\u573a\u666f<\/td><td>\u6b63\u5f0f\u8bad\u7ec3\uff08\u541e\u5410\u4f18\u5148\uff09<\/td><td>\u8c03\u8bd5\/\u663e\u5b58\u4e0d\u8db3<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\">\u516b\u3001\u9002\u914d Patch<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u57fa\u4e8e commit <code>1ae6efd<\/code> \u751f\u6210\u7684\u4e24\u4e2a patch \u6587\u4ef6\uff0c\u5df2\u8986\u76d6\u5168\u90e8\u4fee\u6539\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>dit4dit_zero3.patch<\/strong>\uff1aZeRO-3 \u9002\u914d\u7248\uff08\u542b NPU \u79fb\u690d + \u5168\u90e8 Bug \u4fee\u590d\uff09<\/li>\n\n\n\n<li><strong>dit4dit_zero2.patch<\/strong>\uff1aZeRO-2 \u9002\u914d\u7248\uff08\u540c\u4e0a + train.py \u53bb\u786c\u7f16\u7801 + ZeRO-2 \u914d\u7f6e\uff09<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f7f\u7528\u65b9\u6cd5\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>git clone https:\/\/github.com\/Mondo-Robotics\/DiT4DiT.git\ncd DiT4DiT\ngit checkout 1ae6efd\ngit apply dit4dit_zero3.patch  # \u6216 dit4dit_zero2.patch\npip install -r requirements.txt\npip install -e .<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e24\u4e2a patch \u4e0b\u8f7d\uff1a<\/p>\n\n\n\n<div class=\"wp-block-file\"><a id=\"wp-block-file--media-98b85be0-8116-4044-9e32-4f0edbea569b\" href=\"https:\/\/www.izhuhn.cn\/wp-content\/uploads\/2026\/06\/DiT4DiT-patch.zip\">DiT4DiT-patch<\/a><a href=\"https:\/\/www.izhuhn.cn\/wp-content\/uploads\/2026\/06\/DiT4DiT-patch.zip\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-98b85be0-8116-4044-9e32-4f0edbea569b\">\u4e0b\u8f7d<\/a><\/div>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n","protected":false},"excerpt":{"rendered":"<p>DiT4DiT \u662f\u6e2f\u79d1\u5e7f\u56e2\u961f\u5728 StarVLA \u57fa\u7840\u4e0a\u63d0\u51fa\u7684\u89c6\u89c9-\u52a8\u4f5c\u6a21\u578b\uff08VAM\uff09\uff0c\u5c06\u89c6\u9891\u751f\u6210 Diffusion Transf &#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[18,14,15,21,20,17,19,16],"class_list":["post-1739","post","type-post","status-publish","format-standard","hentry","category-uncategorized","tag-deepspeed","tag-dit4dit","tag-npu","tag-pytorch","tag-robotwin","tag-vla","tag-zero","tag-16"],"_links":{"self":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1739","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/comments?post=1739"}],"version-history":[{"count":7,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1739\/revisions"}],"predecessor-version":[{"id":1749,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1739\/revisions\/1749"}],"wp:attachment":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/media?parent=1739"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/categories?post=1739"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/tags?post=1739"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}