{"id":1688,"date":"2026-06-05T17:42:27","date_gmt":"2026-06-05T09:42:27","guid":{"rendered":"https:\/\/www.izhuhn.cn\/?p=1688"},"modified":"2026-06-05T18:00:06","modified_gmt":"2026-06-05T10:00:06","slug":"evalscope-%e7%a6%bb%e7%ba%bf%e8%af%84%e6%b5%8b%e9%83%a8%e7%bd%b2%e7%ae%80%e8%ae%b0%ef%bc%88%e5%8d%8e%e4%b8%ba910b-openai-%e5%85%bc%e5%ae%b9%e6%8e%a5%e5%8f%a3%ef%bc%89","status":"publish","type":"post","link":"https:\/\/www.izhuhn.cn\/index.php\/2026\/06\/05\/evalscope-%e7%a6%bb%e7%ba%bf%e8%af%84%e6%b5%8b%e9%83%a8%e7%bd%b2%e7%ae%80%e8%ae%b0%ef%bc%88%e5%8d%8e%e4%b8%ba910b-openai-%e5%85%bc%e5%ae%b9%e6%8e%a5%e5%8f%a3%ef%bc%89\/","title":{"rendered":"EvalScope \u79bb\u7ebf\u8bc4\u6d4b\u90e8\u7f72\u7b80\u8bb0\uff08\u534e\u4e3a910B + OpenAI \u517c\u5bb9\u63a5\u53e3\uff09"},"content":{"rendered":"<h1 class=\"wp-block-heading\">EvalScope \u79bb\u7ebf\u8bc4\u6d4b\u90e8\u7f72\u7b80\u8bb0\uff08\u534e\u4e3a910B ARM + OpenAI \u517c\u5bb9\u63a5\u53e3\uff09<\/h1>\n<h2 class=\"wp-block-heading\">\u80cc\u666f<\/h2>\n<p class=\"wp-block-paragraph\">\u5728\u534e\u4e3a 910B (ARM64) \u65ad\u7f51\u670d\u52a1\u5668\u4e0a\u8dd1 EvalScope \u8bc4\u6d4b\u6846\u67b6\u3002Intel Mac \u4e0a\u4ea4\u53c9\u51c6\u5907 ARM64 \u955c\u50cf\u548c\u6570\u636e\u96c6\uff0c\u6253\u5305\u540e scp \u5bfc\u5165\u5373\u53ef\u3002\u8986\u76d6\u901a\u7528\u77e5\u8bc6\u3001\u6570\u5b66\u63a8\u7406\u3001\u4ee3\u7801\u3001Agent\/\u5de5\u5177\u8c03\u7528\u56db\u4e2a\u7ef4\u5ea6\uff0c\u516b\u4e2a\u6807\u51c6\u6570\u636e\u96c6\u3002<\/p>\n<p class=\"wp-block-paragraph\">\u8bc4\u6d4b\u6846\u67b6\uff1a[EvalScope](https:\/\/github.com\/modelscope\/evalscope) v1.7.1\uff0cModelScope \u793e\u533a\u7ef4\u62a4\u3002<\/p>\n<h2 class=\"wp-block-heading\">\u4e00\u3001\u4ea7\u7269\u8bf4\u660e<\/h2>\n<p class=\"wp-block-paragraph\">\u79bb\u7ebf\u5305\u5206\u4e24\u90e8\u5206\uff0c**\u5206\u522b\u6253\u5305\u4f20\u8f93**\uff1a<\/p>\n<h3 class=\"wp-block-heading\">1.1 Docker \u955c\u50cf\uff08\u542b\u8bc4\u6d4b\u5f15\u64ce\uff09<\/h3>\n<pre class=\"wp-block-code\"><code>evalscope-offline-arm64.tar.gz  (6.2GB)<\/code><\/pre>\n<p class=\"wp-block-paragraph\">\u5185\u542b Python 3.10 + evalscope 1.7.1 + bfcl-eval + swebench + tau_bench \u53ca\u5168\u90e8\u4f9d\u8d56\uff08\u5305\u62ec torch ARM64 CPU \u7248\uff09\u3002**\u955c\u50cf\u672c\u8eab\u4e0d\u542b\u6570\u636e\u96c6**\uff0c\u6570\u636e\u901a\u8fc7 Volume \u6302\u8f7d\u3002<\/p>\n<h3 class=\"wp-block-heading\">1.2 \u6570\u636e\u96c6\u7f13\u5b58\uff08\u72ec\u7acb\u6302\u8f7d\uff09<\/h3>\n<pre class=\"wp-block-code\"><code>modelscope_cache\/  (718MB)<\/code><\/pre>\n<p class=\"wp-block-paragraph\">\u4e03\u4e2a\u6570\u636e\u96c6\u4ece ModelScope Hub \u9884\u4e0b\u8f7d\uff0c\u76ee\u5f55\u7ed3\u6784\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>modelscope_cache\/\n\u251c\u2500\u2500 cais\/mmlu\/                      # MMLU 57\u5b66\u79d1\n\u251c\u2500\u2500 evalscope\/ceval\/                # C-Eval \u4e2d\u6587\n\u251c\u2500\u2500 AI-ModelScope\/gpqa_diamond\/     # GPQA Diamond\n\u251c\u2500\u2500 ZhipuAI\/LongBench-v2\/           # LongBench-v2\n\u251c\u2500\u2500 knoveleng\/Minerva-Math\/         # Minerva Math\n\u251c\u2500\u2500 opencompass\/humaneval\/          # HumanEval\n\u2514\u2500\u2500 evalscope\/swe-bench-verified-mini\/  # SWE-bench mini<\/code><\/pre>\n<p class=\"wp-block-paragraph\">bfcl_v4 \u6570\u636e\u5185\u7f6e\u5728 bfcl-eval \u5305\u4e2d\uff0ctau_bench \u6570\u636e\u7531\u5176\u5305\u7ba1\u7406\uff0c\u65e0\u9700\u989d\u5916\u4e0b\u8f7d\u3002<\/p>\n<h3 class=\"wp-block-heading\">1.3 \u8bc4\u6d4b\u914d\u7f6e\u6587\u4ef6\uff08\u53ef\u9009\uff09<\/h3>\n<pre class=\"wp-block-code\"><code>configs\/\n\u251c\u2500\u2500 910b_eval_core.yaml             # \u6838\u5fc3\u8bc4\u6d4b\uff08MMLU\/C-Eval\/GPQA\/LongBench\/Math\/HumanEval\uff09\n\u251c\u2500\u2500 910b_eval_agent.yaml            # Agent\u8bc4\u6d4b\uff08BFCL-v4\/tau_bench\uff09\n\u2514\u2500\u2500 910b_eval_swe.yaml              # SWE-bench \u4ee3\u7801\u4fee\u590d\u8bc4\u6d4b<\/code><\/pre>\n<h2 class=\"wp-block-heading\">\u4e8c\u3001\u8986\u76d6\u7ef4\u5ea6\u4e0e\u6570\u636e\u96c6<\/h2>\n<figure class=\"wp-block-table\"><table><thead><tr><th>\u7ef4\u5ea6<\/th><th>\u6570\u636e\u96c6<\/th><th>\u6837\u672c\u91cf<\/th><th>\u8bc4\u6d4b\u6307\u6807<\/th><th>few-shot<\/th><\/tr><\/thead><tr><td>\u901a\u7528\u77e5\u8bc6<\/td><td>MMLU<\/td><td>~14k<\/td><td>Accuracy<\/td><td>5<\/td><\/tr><tr><td>\u901a\u7528\u77e5\u8bc6<\/td><td>C-Eval<\/td><td>~14k<\/td><td>Accuracy<\/td><td>5<\/td><\/tr><tr><td>\u901a\u7528\u77e5\u8bc6<\/td><td>GPQA Diamond<\/td><td>198<\/td><td>Accuracy<\/td><td>0<\/td><\/tr><tr><td>\u6570\u5b66\u63a8\u7406<\/td><td>Minerva-Math<\/td><td>272<\/td><td>Accuracy<\/td><td>0<\/td><\/tr><tr><td>\u957f\u6587\u672c\u63a8\u7406<\/td><td>LongBench-v2<\/td><td>100<\/td><td>Accuracy<\/td><td>-<\/td><\/tr><tr><td>\u4ee3\u7801<\/td><td>HumanEval<\/td><td>164<\/td><td>pass@1<\/td><td>0<\/td><\/tr><tr><td>\u4ee3\u7801<\/td><td>SWE-bench-mini<\/td><td>50<\/td><td>resolve rate<\/td><td>0<\/td><\/tr><tr><td>Agent\/\u5de5\u5177\u8c03\u7528<\/td><td>BFCL-v4<\/td><td>~1700<\/td><td>Accuracy<\/td><td>0<\/td><\/tr><tr><td>Agent<\/td><td>tau_bench<\/td><td>\u591a\u57df<\/td><td>Task Success<\/td><td>0<\/td><\/tr><\/table><\/figure>\n<h2 class=\"wp-block-heading\">\u4e09\u3001910B \u79bb\u7ebf\u90e8\u7f72\u6b65\u9aa4<\/h2>\n<h3 class=\"wp-block-heading\">3.1 \u6587\u4ef6\u4f20\u8f93<\/h3>\n<pre class=\"wp-block-code\"><code># \u4ece Mac \u4f20\u5230 910B\nscp evalscope-offline-arm64.tar.gz user@910b:\/data\/eval\/\nscp -r modelscope_cache\/ user@910b:\/data\/eval\/\nscp -r configs\/ user@910b:\/data\/eval\/<\/code><\/pre>\n<h3 class=\"wp-block-heading\">3.2 \u52a0\u8f7d\u955c\u50cf<\/h3>\n<pre class=\"wp-block-code\"><code>ssh user@910b\ncd \/data\/eval\ndocker load -i evalscope-offline-arm64.tar.gz\n# \u786e\u8ba4\ndocker images evalscope-offline:arm64\n# \u8f93\u51fa: evalscope-offline:arm64 ... 9.96GB<\/code><\/pre>\n<h3 class=\"wp-block-heading\">3.3 \u76ee\u5f55\u89c4\u5212<\/h3>\n<p class=\"wp-block-paragraph\">910B \u4e0a\u6700\u7ec8\u76ee\u5f55\u7ed3\u6784\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>\/data\/eval\/\n\u251c\u2500\u2500 modelscope_cache\/      # \u6570\u636e\u96c6\uff08\u53ea\u8bfb\u6302\u8f7d\u5efa\u8bae\u7a7f\u4e2a\u526f\u672c\uff09\n\u2502   \u251c\u2500\u2500 cais\/\n\u2502   \u251c\u2500\u2500 evalscope\/\n\u2502   \u2514\u2500\u2500 ...\n\u251c\u2500\u2500 reports\/               # \u8bc4\u6d4b\u62a5\u544a\u8f93\u51fa\uff08\u9700\u521b\u5efa\uff09\n\u251c\u2500\u2500 outputs\/               # \u4e2d\u95f4\u7ed3\u679c\uff08\u81ea\u52a8\u521b\u5efa\uff09\n\u2514\u2500\u2500 configs\/               # \u8bc4\u6d4bYAML\uff08\u53ef\u9009\uff09<\/code><\/pre>\n<pre class=\"wp-block-code\"><code>mkdir -p \/data\/eval\/reports \/data\/eval\/outputs<\/code><\/pre>\n<h3 class=\"wp-block-heading\">3.4 \u524d\u63d0\uff1a\u6a21\u578b\u63a8\u7406\u670d\u52a1<\/h3>\n<p class=\"wp-block-paragraph\">910B \u4e0a\u5fc5\u987b\u8fd0\u884c\u4e00\u4e2a **OpenAI \u517c\u5bb9\u7684 `\/v1` API**\u3002\u6700\u7b80\u5355\u7684\u9a8c\u8bc1\uff1a<\/p>\n<pre class=\"wp-block-code\"><code># \u5047\u8bbe\u6a21\u578b\u670d\u52a1\u8dd1\u5728 8000 \u7aef\u53e3\ncurl http:\/\/127.0.0.1:8000\/v1\/models\n# \u9884\u671f\u8fd4\u56de: {\"object\":\"list\",\"data\":[{\"id\":\"\u4f60\u7684\u6a21\u578b\u540d\",\"object\":\"model\"}]}<\/code><\/pre>\n<p class=\"wp-block-paragraph\">\u5982\u679c\u4f60\u7684 910B \u6a21\u578b\u670d\u52a1\u4e0d\u662f OpenAI \u683c\u5f0f\uff0c\u8bf7\u770b\u7b2c\u4e94\u8282\u9002\u914d\u5c42\u65b9\u6848\u3002<\/p>\n<h3 class=\"wp-block-heading\">3.5 \u8fd0\u884c\u8bc4\u6d4b<\/h3>\n<h4 class=\"wp-block-heading\">\u57fa\u7840\u547d\u4ee4\u6a21\u677f<\/h4>\n<p class=\"wp-block-paragraph\">\u6838\u5fc3\u53c2\u6570\uff1a<\/p>\n<figure class=\"wp-block-table\"><table><thead><tr><th>\u53c2\u6570<\/th><th>\u542b\u4e49<\/th><th>\u793a\u4f8b<\/th><\/tr><\/thead><tr><td>`--model`<\/td><td>\u670d\u52a1\u7aef\u6a21\u578b\u540d\uff08\u4e0e \/v1\/models \u8fd4\u56de\u4e00\u81f4\uff09<\/td><td>`qwen2.5-72b`<\/td><\/tr><tr><td>`--api-url`<\/td><td>OpenAI \u517c\u5bb9\u7aef\u70b9<\/td><td>`http:\/\/127.0.0.1:8000\/v1`<\/td><\/tr><tr><td>`--datasets`<\/td><td>\u6570\u636e\u96c6\u5217\u8868\uff0c\u7a7a\u683c\u5206\u9694<\/td><td>`mmlu ceval humaneval`<\/td><\/tr><tr><td>`--limit`<\/td><td>\u6bcf\u4e2a\u6570\u636e\u96c6\u8dd1 N \u6761\uff08\u6d4b\u8bd5\u7528\uff09<\/td><td>`10`\uff08\u6b63\u5f0f\u8dd1\u4e0d\u8bbe\u6b64\u53c2\u6570\uff09<\/td><\/tr><tr><td>`--generation-config`<\/td><td>\u751f\u6210\u53c2\u6570 JSON<\/td><td>`{\"temperature\":0,\"max_tokens\":2048}`<\/td><\/tr><tr><td>`-v` \u6302\u8f7d<\/td><td>\u6570\u636e\u96c6\u53ea\u8bfb\u6302\u8f7d<\/td><td>`modelscope_cache:\/workspace\/cache\/modelscope`<\/td><\/tr><tr><td>`-v` \u6302\u8f7d<\/td><td>\u62a5\u544a\u8f93\u51fa\u76ee\u5f55<\/td><td>`reports:\/workspace\/reports`<\/td><\/tr><\/table><\/figure>\n<h4 class=\"wp-block-heading\">3.5.1 \u6838\u5fc3\u8bc4\u6d4b\u4e00\u6761\u547d\u4ee4<\/h4>\n<pre class=\"wp-block-code\"><code>docker run --rm --network host \\\n  -v \/data\/eval\/modelscope_cache:\/workspace\/cache\/modelscope:ro \\\n  -v \/data\/eval\/reports:\/workspace\/reports \\\n  -v \/data\/eval\/outputs:\/workspace\/outputs \\\n  evalscope-offline:arm64 \\\n  evalscope eval \\\n    --model qwen2.5-72b \\\n    --api-url http:\/\/127.0.0.1:8000\/v1 \\\n    --api-key \"***\" \\\n    --eval-type openai_api \\\n    --dataset-hub modelscope \\\n    --dataset-dir \/workspace\/cache\/modelscope\/datasets \\\n    --datasets mmlu ceval gpqa_diamond longbench_v2 minerva_math humaneval \\\n    --limit 10 \\\n    --generation-config '{\"temperature\":0,\"max_tokens\":2048,\"timeout\":120,\"retries\":2}'<\/code><\/pre>\n<p class=\"wp-block-paragraph\">`--limit 10` \u662f\u6d4b\u8bd5\u7528\u7684\uff0c\u6b63\u5f0f\u8dd1**\u53bb\u6389\u8fd9\u4e2a\u53c2\u6570**\u5373\u53ef\u8dd1\u5168\u91cf\u3002<\/p>\n<h4 class=\"wp-block-heading\">3.5.2 \u5355\u72ec\u8dd1\u67d0\u4e2a\u6570\u636e\u96c6<\/h4>\n<pre class=\"wp-block-code\"><code># \u53ea\u8dd1 HumanEval\ndocker run --rm --network host \\\n  -v \/data\/eval\/modelscope_cache:\/workspace\/cache\/modelscope:ro \\\n  -v \/data\/eval\/reports:\/workspace\/reports \\\n  evalscope-offline:arm64 \\\n  evalscope eval \\\n    --model qwen2.5-72b \\\n    --api-url http:\/\/127.0.0.1:8000\/v1 \\\n    --api-key \"***\" \\\n    --eval-type openai_api \\\n    --dataset-hub modelscope \\\n    --dataset-dir \/workspace\/cache\/modelscope\/datasets \\\n    --datasets humaneval \\\n    --generation-config '{\"temperature\":0,\"max_tokens\":2048,\"timeout\":120}'<\/code><\/pre>\n<h4 class=\"wp-block-heading\">3.5.3 SWE-bench\uff08\u9700\u8981 Docker socket\uff09<\/h4>\n<p class=\"wp-block-paragraph\">SWE-bench \u4f1a\u5728\u5bb9\u5668\u5185\u542f\u52a8 Docker \u6765\u6784\u5efa\u548c\u9a8c\u8bc1\u4ee3\u7801\u4fee\u590d\u73af\u5883\uff0c**\u5fc5\u987b\u6302\u8f7d\u5bbf\u4e3b Docker socket**\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>docker run --rm --network host \\\n  -v \/var\/run\/docker.sock:\/var\/run\/docker.sock \\\n  -v \/data\/eval\/modelscope_cache:\/workspace\/cache\/modelscope:ro \\\n  -v \/data\/eval\/reports:\/workspace\/reports \\\n  evalscope-offline:arm64 \\\n  evalscope eval \\\n    --model qwen2.5-72b \\\n    --api-url http:\/\/127.0.0.1:8000\/v1 \\\n    --api-key \"***\" \\\n    --eval-type openai_api \\\n    --dataset-hub modelscope \\\n    --dataset-dir \/workspace\/cache\/modelscope\/datasets \\\n    --datasets swe_bench_verified_mini \\\n    --limit 1 \\\n    --dataset-args '{\"swe_bench_verified_mini\":{\"extra_params\":{\"force_arch\":\"arm64\"}}}' \\\n    --generation-config '{\"temperature\":0,\"max_tokens\":4096,\"timeout\":300,\"retries\":1}'<\/code><\/pre>\n<p class=\"wp-block-paragraph\">\u6ce8\u610f `force_arch: arm64` \u786e\u4fdd\u6784\u5efa ARM64 \u6d4b\u8bd5\u5bb9\u5668\u3002<\/p>\n<h4 class=\"wp-block-heading\">3.5.4 BFCL-v4 \u5de5\u5177\u8c03\u7528<\/h4>\n<p class=\"wp-block-paragraph\">BFCL \u6570\u636e\u5185\u7f6e\u4e8e bfcl-eval \u5305\uff0c\u4e0d\u9700\u8981\u6570\u636e\u96c6\u6302\u8f7d\u4e5f\u80fd\u8dd1\uff08\u4f46\u65ad\u7f51\u73af\u5883\u9700\u8981\u6302\u8f7d dataset \u8def\u5f84\u4ee5\u907f\u514d\u5c1d\u8bd5\u8054\u7f51\uff09\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>docker run --rm --network host \\\n  -v \/data\/eval\/modelscope_cache:\/workspace\/cache\/modelscope:ro \\\n  -v \/data\/eval\/reports:\/workspace\/reports \\\n  evalscope-offline:arm64 \\\n  evalscope eval \\\n    --model qwen2.5-72b \\\n    --api-url http:\/\/127.0.0.1:8000\/v1 \\\n    --api-key \"***\" \\\n    --eval-type openai_api \\\n    --dataset-hub modelscope \\\n    --dataset-dir \/workspace\/cache\/modelscope\/datasets \\\n    --datasets bfcl_v4 \\\n    --limit 3 \\\n    --dataset-args '{\"bfcl_v4\":{\"subset_list\":[\"simple_python\",\"simple_java\",\"simple_javascript\",\"multiple\",\"parallel\",\"parallel_multiple\",\"irrelevance\",\"multi_turn_base\",\"multi_turn_miss_func\",\"multi_turn_miss_param\",\"multi_turn_long_context\"],\"extra_params\":{\"underscore_to_dot\":true,\"is_fc_model\":true}}}' \\\n    --generation-config '{\"temperature\":0,\"max_tokens\":2048,\"timeout\":180}'<\/code><\/pre>\n<p class=\"wp-block-paragraph\">BFCL subset \u8bf4\u660e\uff1a<\/p>\n<figure class=\"wp-block-table\"><table><thead><tr><th>\u5b50\u96c6<\/th><th>\u542b\u4e49<\/th><th>\u8bf4\u660e<\/th><\/tr><\/thead><tr><td>`simple_python\/java\/javascript`<\/td><td>\u5355\u51fd\u6570\u8c03\u7528<\/td><td>\u57fa\u7840<\/td><\/tr><tr><td>`multiple`<\/td><td>\u591a\u51fd\u6570\u4e32\u884c\u8c03\u7528<\/td><td>\u4e2d\u7b49<\/td><\/tr><tr><td>`parallel`<\/td><td>\u5e76\u884c\u591a\u51fd\u6570\u8c03\u7528<\/td><td>\u4e2d\u7b49<\/td><\/tr><tr><td>`parallel_multiple`<\/td><td>\u5e76\u884c+\u4e32\u884c\u6df7\u7528<\/td><td>\u8f83\u96be<\/td><\/tr><tr><td>`irrelevance`<\/td><td>\u65e0\u5173\u51fd\u6570\u5e72\u6270<\/td><td>\u8003\u5bdf\u5224\u65ad\u529b<\/td><\/tr><tr><td>`multi_turn_*`<\/td><td>\u591a\u8f6e\u5bf9\u8bdd<\/td><td>Agent \u80fd\u529b<\/td><\/tr><tr><td>`live_*`<\/td><td>\u9700\u8981\u771f\u5b9e API \u8c03\u7528<\/td><td>**\u79bb\u7ebf\u8df3\u8fc7**<\/td><\/tr><tr><td>`memory_*`<\/td><td>\u9700\u8981\u5411\u91cf\u8bb0\u5fc6<\/td><td>**\u9700\u8981\u989d\u5916\u6a21\u578b\uff0c\u8df3\u8fc7**<\/td><\/tr><\/table><\/figure>\n<h4 class=\"wp-block-heading\">3.5.5 tau_bench Agent<\/h4>\n<pre class=\"wp-block-code\"><code>docker run --rm --network host \\\n  -v \/data\/eval\/modelscope_cache:\/workspace\/cache\/modelscope:ro \\\n  -v \/data\/eval\/reports:\/workspace\/reports \\\n  evalscope-offline:arm64 \\\n  evalscope eval \\\n    --model qwen2.5-72b \\\n    --api-url http:\/\/127.0.0.1:8000\/v1 \\\n    --api-key \"***\" \\\n    --eval-type openai_api \\\n    --dataset-hub modelscope \\\n    --dataset-dir \/workspace\/cache\/modelscope\/datasets \\\n    --datasets tau_bench \\\n    --limit 3 \\\n    --dataset-args '{\"tau_bench\":{\"extra_params\":{\"user_model\":\"qwen2.5-72b\",\"api_key\":\"***\",\"api_base\":\"http:\/\/127.0.0.1:8000\/v1\"}}}' \\\n    --generation-config '{\"temperature\":0,\"max_tokens\":2048,\"timeout\":180}'<\/code><\/pre>\n<h2 class=\"wp-block-heading\">\u56db\u3001OpenAI \u6807\u51c6\u63a5\u53e3\u683c\u5f0f\u8981\u6c42<\/h2>\n<p class=\"wp-block-paragraph\">EvalScope `openai_api` \u6a21\u5f0f\u5bf9\u6a21\u578b\u670d\u52a1\u7684\u8981\u6c42\u5982\u4e0b\u3002<\/p>\n<h3 class=\"wp-block-heading\">4.1 GET \/v1\/models<\/h3>\n<p class=\"wp-block-paragraph\">\u56fa\u5b9a\u8fd4\u56de\u683c\u5f0f\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>{\n  \"object\": \"list\",\n  \"data\": [\n    {\"id\": \"your-model-name\", \"object\": \"model\"}\n  ]\n}<\/code><\/pre>\n<p class=\"wp-block-paragraph\">`id` \u7684\u503c\u5fc5\u987b\u548c `--model` \u53c2\u6570\u4e00\u81f4\u3002<\/p>\n<h3 class=\"wp-block-heading\">4.2 POST \/v1\/chat\/completions<\/h3>\n<p class=\"wp-block-paragraph\">**\u8bf7\u6c42**\uff08EvalScope \u53d1\u8fc7\u6765\u7684\u683c\u5f0f\uff09\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>{\n  \"model\": \"your-model-name\",\n  \"messages\": [\n    {\"role\": \"system\", \"content\": \"\u7cfb\u7edf\u63d0\u793a\"},\n    {\"role\": \"user\", \"content\": \"\u7528\u6237\u95ee\u9898\"}\n  ],\n  \"temperature\": 0,\n  \"max_tokens\": 2048\n}<\/code><\/pre>\n<p class=\"wp-block-paragraph\">**BFCL \u5de5\u5177\u8c03\u7528\u65f6\u4f1a\u591a\u4e00\u4e2a `tools` \u5b57\u6bb5**\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>{\n  \"model\": \"your-model-name\",\n  \"messages\": [...],\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"get_weather\",\n        \"description\": \"\u83b7\u53d6\u67d0\u57ce\u5e02\u5929\u6c14\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"city\": {\"type\": \"string\", \"description\": \"\u57ce\u5e02\u540d\"}\n          },\n          \"required\": [\"city\"]\n        }\n      }\n    }\n  ],\n  \"tool_choice\": \"auto\"\n}<\/code><\/pre>\n<p class=\"wp-block-paragraph\">**\u6b63\u5e38\u8fd4\u56de**\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>{\n  \"id\": \"chatcmpl-xxx\",\n  \"object\": \"chat.completion\",\n  \"model\": \"your-model-name\",\n  \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\": \"assistant\",\n        \"content\": \"\u56de\u7b54\u5185\u5bb9\"\n      },\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 100,\n    \"completion_tokens\": 50,\n    \"total_tokens\": 150\n  }\n}<\/code><\/pre>\n<p class=\"wp-block-paragraph\">**\u5de5\u5177\u8c03\u7528\u8fd4\u56de**\uff08content \u4e3a null\uff0c\u4f7f\u7528 tool_calls\uff09\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>{\n  \"id\": \"chatcmpl-xxx\",\n  \"object\": \"chat.completion\",\n  \"model\": \"your-model-name\",\n  \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\": \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n            \"id\": \"call_abc123\",\n            \"type\": \"function\",\n            \"function\": {\n              \"name\": \"get_weather\",\n              \"arguments\": \"{\\\"city\\\": \\\"Beijing\\\"}\"\n            }\n          }\n        ]\n      },\n      \"finish_reason\": \"tool_calls\"\n    }\n  ],\n  \"usage\": {...}\n}<\/code><\/pre>\n<p class=\"wp-block-paragraph\">`usage` \u5b57\u6bb5\u4e2d\u7684 token \u6570\u586b 0 \u4e5f\u4e0d\u5f71\u54cd\u8bc4\u6d4b\u7ed3\u679c\uff0c\u4f46 `id`\u3001`object`\u3001`choices` \u8fd9\u4e2a\u9aa8\u67b6\u5fc5\u987b\u5b8c\u6574\u3002<\/p>\n<h2 class=\"wp-block-heading\">\u4e94\u3001\u975e\u6807\u51c6\u63a5\u53e3\u9002\u914d\u65b9\u6848<\/h2>\n<p class=\"wp-block-paragraph\">\u5982\u679c 910B \u4e0a\u7684\u6a21\u578b\u670d\u52a1\u4e0d\u662f OpenAI \u683c\u5f0f\uff08\u6bd4\u5982\u662f\u81ea\u5b9a\u4e49\u7684 gRPC \/ HTTP\uff09\uff0c\u6700\u7b80\u65b9\u6848\u662f\u7528 FastAPI \u5199\u4e00\u4e2a\u8f6c\u6362\u5c42\uff1a<\/p>\n<pre class=\"wp-block-code\"><code># adapter.py - \u5728 910B \u4e0a\u8fd0\u884c\nfrom fastapi import FastAPI\nimport httpx\nimport uuid\n\napp = FastAPI()\nYOUR_MODEL_ENDPOINT = \"http:\/\/127.0.0.1:9090\/generate\"\n\n@app.get(\"\/v1\/models\")\nasync def models():\n    return {\"object\": \"list\", \"data\": [{\"id\": \"my-model\", \"object\": \"model\"}]}\n\n@app.post(\"\/v1\/chat\/completions\")\nasync def chat(req: dict):\n    messages = req.get(\"messages\", [])\n    tools = req.get(\"tools\")\n\n    # \u7ffb\u8bd1\u6210\u4f60\u7684 910B \u6a21\u578b\u683c\u5f0f\n    prompt = messages[-1].get(\"content\", \"\") if messages else \"\"\n    if messages and messages[0].get(\"role\") == \"system\":\n        prompt = messages[0][\"content\"] + \"\\n\\n\" + prompt\n\n    # \u5982\u679c\u5e26 tools\uff0c\u628a\u51fd\u6570\u5b9a\u4e49\u6ce8\u5165 prompt\n    if tools:\n        tool_desc = \"\\n\".join(\n            f\"Function: {t['function']['name']}\\n{t['function']['description']}\"\n            for t in tools\n        )\n        prompt = f\"{tool_desc}\\n\\nUser request: {prompt}\\n\\nRespond with function call JSON.\"\n\n    payload = {\n        \"prompt\": prompt,\n        \"max_new_tokens\": req.get(\"max_tokens\", 2048),\n        \"temperature\": req.get(\"temperature\", 0),\n    }\n\n    async with httpx.AsyncClient() as client:\n        resp = await client.post(YOUR_MODEL_ENDPOINT,\n            json=payload, timeout=300)\n    result = resp.json()\n\n    return {\n        \"id\": f\"chatcmpl-{uuid.uuid4().hex[:12]}\",\n        \"object\": \"chat.completion\",\n        \"model\": req[\"model\"],\n        \"choices\": [{\n            \"index\": 0,\n            \"message\": {\"role\": \"assistant\", \"content\": result[\"text\"]},\n            \"finish_reason\": \"stop\"\n        }],\n        \"usage\": {\"prompt_tokens\": 0, \"completion_tokens\": 0, \"total_tokens\": 0}\n    }<\/code><\/pre>\n<p class=\"wp-block-paragraph\">\u542f\u52a8\u9002\u914d\u5c42\uff1a<\/p>\n<pre class=\"wp-block-code\"><code>pip install fastapi uvicorn httpx\nuvicorn adapter:app --host 0.0.0.0 --port 8000<\/code><\/pre>\n<p class=\"wp-block-paragraph\">\u7136\u540e EvalScope \u6307\u5411 `http:\/\/127.0.0.1:8000\/v1` \u5373\u53ef\u3002<\/p>\n<h2 class=\"wp-block-heading\">\u516d\u3001\u5192\u70df\u9a8c\u8bc1<\/h2>\n<p class=\"wp-block-paragraph\">\u5728\u6b63\u5f0f\u8dd1\u4e4b\u524d\uff0c\u53ef\u4ee5\u7528\u5185\u7f6e mock API \u9a8c\u8bc1 EvalScope \u94fe\u8def\u662f\u5426\u901a\uff1a<\/p>\n<pre class=\"wp-block-code\"><code># \u7ec8\u7aef1\uff1a\u542f\u52a8 mock API\ndocker run --rm -p 18080:18080 evalscope-offline:arm64 \\\n  python -c \"\nfrom http.server import *\nimport json\nclass H(BaseHTTPRequestHandler):\n    def do_GET(s):\n        s.send_response(200); s.end_headers()\n        s.wfile.write(json.dumps({'object':'list','data':[{'id':'mock','object':'model'}]}).encode())\n    def do_POST(s):\n        s.send_response(200); s.end_headers()\n        s.wfile.write(json.dumps({'id':'x','object':'chat.completion','model':'mock','choices':[{'index':0,'message':{'role':'assistant','content':'ANSWER: A'},'finish_reason':'stop'}],'usage':{'prompt_tokens':1,'completion_tokens':1,'total_tokens':2}}).encode())\nHTTPServer(('0.0.0.0',18080),H).serve_forever()\n\"\n\n# \u7ec8\u7aef2\uff1a\u8dd1\u8bc4\u6d4b\ndocker run --rm --network host \\\n  -v \/data\/eval\/modelscope_cache:\/workspace\/cache\/modelscope \\\n  -v \/data\/eval\/reports:\/workspace\/reports \\\n  evalscope-offline:arm64 \\\n  evalscope eval \\\n    --model mock \\\n    --api-url http:\/\/127.0.0.1:18080\/v1 \\\n    --api-key \"***\" \\\n    --eval-type openai_api \\\n    --dataset-hub modelscope \\\n    --dataset-dir \/workspace\/cache\/modelscope\/datasets \\\n    --datasets humaneval \\\n    --limit 1<\/code><\/pre>\n<p class=\"wp-block-paragraph\">\u770b\u5230 `Benchmark humaneval evaluation finished` \u5373\u8868\u793a\u94fe\u8def\u6b63\u5e38\u3002<\/p>\n<h2 class=\"wp-block-heading\">\u4e03\u3001\u6ce8\u610f\u4e8b\u9879<\/h2>\n<p class=\"wp-block-paragraph\">1. **\u6570\u636e\u96c6\u4e0d\u5728\u955c\u50cf\u91cc**\uff1a\u5fc5\u987b `-v` \u6302\u8f7d `modelscope_cache`\uff0c\u5426\u5219 EvalScope \u4f1a\u5c1d\u8bd5\u8054\u7f51\u4e0b\u8f7d\u800c\u5931\u8d25<\/p>\n<p class=\"wp-block-paragraph\">2. **\u53ea\u8bfb\u6302\u8f7d\u9677\u9631**\uff1a\u4e0d\u8981\u7528 `:ro`\uff0cEvalScope \u8fd0\u884c\u65f6\u4f1a\u5199\u7f13\u5b58\u5230 dataset \u76ee\u5f55<\/p>\n<p class=\"wp-block-paragraph\">3. **SWE-bench \u9700\u8981 Docker socket**\uff1a`-v \/var\/run\/docker.sock:\/var\/run\/docker.sock`<\/p>\n<p class=\"wp-block-paragraph\">4. **BFCL \u79bb\u7ebf\u9650\u5236**\uff1a\u8df3\u8fc7 `live_*`\uff08\u9700\u8981\u771f\u5b9e API\uff09\u548c `memory_*`\uff08\u9700\u8981\u5411\u91cf\u6a21\u578b\uff09\u5b50\u96c6<\/p>\n<p class=\"wp-block-paragraph\">5. **torch\/NVIDIA \u5305**\uff1a\u955c\u50cf\u5185\u7684 torch \u662f ARM64 CPU \u7248\uff0cCUDA \u5305\u53ea\u662f stub\u2014\u2014\u8bc4\u6d4b\u8d70 HTTP API\uff0c\u4e0d\u4f9d\u8d56 GPU<\/p>\n<p class=\"wp-block-paragraph\">6. **\u5185\u5b58\u5efa\u8bae**\uff1a10GB \u955c\u50cf + \u6570\u636e\u96c6\u52a0\u8f7d + \u6a21\u578b\u63a8\u7406\uff0c\u5efa\u8bae 910B >= 32GB \u5185\u5b58<\/p>\n<p class=\"wp-block-paragraph\">7. **\u6b63\u5f0f\u8dd1\u53bb\u6389 `--limit`**\uff1a\u4e0d\u52a0\u6b64\u53c2\u6570\u5373\u8dd1\u5168\u91cf<\/p>\n<p class=\"wp-block-paragraph\">\u7b80\u8bb0\u3002<\/p>","protected":false},"excerpt":{"rendered":"<p>EvalScope \u79bb\u7ebf\u8bc4\u6d4b\u90e8\u7f72\u7b80\u8bb0\uff08\u534e\u4e3a910B ARM + OpenAI \u517c\u5bb9\u63a5\u53e3\uff09 \u80cc\u666f \u5728\u534e\u4e3a 910B (ARM64)  &#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[9,11,6,7,13,12,8,10],"class_list":["post-1688","post","type-post","status-publish","format-standard","hentry","category-uncategorized","tag-arm64","tag-bfcl","tag-evalscope","tag-llm","tag-openai","tag-swe-bench","tag-910b","tag-10"],"_links":{"self":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1688","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/comments?post=1688"}],"version-history":[{"count":2,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1688\/revisions"}],"predecessor-version":[{"id":1690,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/posts\/1688\/revisions\/1690"}],"wp:attachment":[{"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/media?parent=1688"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/categories?post=1688"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.izhuhn.cn\/index.php\/wp-json\/wp\/v2\/tags?post=1688"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}