-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy path404.html
More file actions
1 lines (1 loc) · 50.9 KB
/
404.html
File metadata and controls
1 lines (1 loc) · 50.9 KB
1
<!doctype html><html lang=en class=no-js> <head><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=description content="GPUStack is an open-source GPU cluster manager designed for efficient AI model deployment. It lets you run models efficiently on your own GPU hardware by choosing the best inference engines, scheduling GPU resources, analyzing model architectures, and automatically configuring deployment parameters."><meta name=author content=GPUStack.ai><link rel=icon href=/2.1/assets/logo.png><meta name=generator content="mkdocs-1.6.0, mkdocs-material-9.5.30"><title>GPUStack</title><link rel=stylesheet href=/2.1/assets/stylesheets/main.3cba04c6.min.css><link rel=stylesheet href=/2.1/assets/stylesheets/palette.06af60db.min.css><link rel=preconnect href=https://fonts.gstatic.com crossorigin><link rel=stylesheet href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback"><style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style><link rel=stylesheet href=/2.1/stylesheets/extra.css><link rel=stylesheet href=https://unpkg.com/katex@0/dist/katex.min.css><script>__md_scope=new URL("/2.1",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script></head> <body dir=ltr data-md-color-scheme=default data-md-color-primary=indigo data-md-color-accent=indigo> <input class=md-toggle data-md-toggle=drawer type=checkbox id=__drawer autocomplete=off> <input class=md-toggle data-md-toggle=search type=checkbox id=__search autocomplete=off> <label class=md-overlay for=__drawer></label> <div data-md-component=skip> </div> <div data-md-component=announce> </div> <div data-md-color-scheme=default data-md-component=outdated hidden> <aside class="md-banner md-banner--warning"> <div class="md-banner__inner md-grid md-typeset"> You're not viewing the latest version. <a href=..//2.1> <strong>Click here to go to latest.</strong> </a> </div> <script>var el=document.querySelector("[data-md-component=outdated]"),outdated=__md_get("__outdated",sessionStorage);!0===outdated&&el&&(el.hidden=!1)</script> </aside> </div> <header class=md-header data-md-component=header> <nav class="md-header__inner md-grid" aria-label=Header> <a href=/2.1/. title=GPUStack class="md-header__button md-logo" aria-label=GPUStack data-md-component=logo> <img src=/2.1/assets/logo-white.png alt=logo> </a> <label class="md-header__button md-icon" for=__drawer> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg> </label> <div class=md-header__title data-md-component=header-title> <div class=md-header__ellipsis> <div class=md-header__topic> <span class=md-ellipsis> GPUStack </span> </div> <div class=md-header__topic data-md-component=header-topic> <span class=md-ellipsis> </span> </div> </div> </div> <form class=md-header__option data-md-component=palette> <input class=md-option data-md-color-media=(prefers-color-scheme) data-md-color-scheme=default data-md-color-primary=indigo data-md-color-accent=indigo aria-label="Switch to light mode" type=radio name=__palette id=__palette_0> <label class="md-header__button md-icon" title="Switch to light mode" for=__palette_1 hidden> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="m14.3 16-.7-2h-3.2l-.7 2H7.8L11 7h2l3.2 9h-1.9M20 8.69V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69m-9.15 3.96h2.3L12 9l-1.15 3.65Z"/></svg> </label> <input class=md-option data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme=default data-md-color-primary=custom data-md-color-accent=orange aria-label="Switch to dark mode" type=radio name=__palette id=__palette_1> <label class="md-header__button md-icon" title="Switch to dark mode" for=__palette_2 hidden> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69Z"/></svg> </label> <input class=md-option data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme=slate data-md-color-primary=light-blue data-md-color-accent=orange aria-label="Switch to system preference" type=radio name=__palette id=__palette_2> <label class="md-header__button md-icon" title="Switch to system preference" for=__palette_0 hidden> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12c0-2.42-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69Z"/></svg> </label> </form> <script>var media,input,key,value,palette=__md_get("__palette");if(palette&&palette.color){"(prefers-color-scheme)"===palette.color.media&&(media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']"),palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent"));for([key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script> <label class="md-header__button md-icon" for=__search> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg> </label> <div class=md-search data-md-component=search role=dialog> <label class=md-search__overlay for=__search></label> <div class=md-search__inner role=search> <form class=md-search__form name=search> <input type=text class=md-search__input name=query aria-label=Search placeholder=Search autocapitalize=off autocorrect=off autocomplete=off spellcheck=false data-md-component=search-query required> <label class="md-search__icon md-icon" for=__search> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg> </label> <nav class=md-search__options aria-label=Search> <a href=javascript:void(0) class="md-search__icon md-icon" title=Share aria-label=Share data-clipboard data-clipboard-text data-md-component=search-share tabindex=-1> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7 0-.24-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91 1.61 0 2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08Z"/></svg> </a> <button type=reset class="md-search__icon md-icon" title=Clear aria-label=Clear tabindex=-1> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg> </button> </nav> <div class=md-search__suggest data-md-component=search-suggest></div> </form> <div class=md-search__output> <div class=md-search__scrollwrap tabindex=0 data-md-scrollfix> <div class=md-search-result data-md-component=search-result> <div class=md-search-result__meta> Initializing search </div> <ol class=md-search-result__list role=presentation></ol> </div> </div> </div> </div> </div> <div class=md-header__source> <a href=https://github.com/gpustack/gpustack title="Go to repository" class=md-source data-md-component=source> <div class="md-source__icon md-icon"> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 496 512"><!-- Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg> </div> <div class=md-source__repository> gpustack/gpustack </div> </a> </div> </nav> </header> <div class=md-container data-md-component=container> <nav class=md-tabs aria-label=Tabs data-md-component=tabs> <div class=md-grid> <ul class=md-tabs__list> <li class=md-tabs__item> <a href=/2.1/overview/ class=md-tabs__link> Home </a> </li> <li class=md-tabs__item> <a href=/2.1/performance-lab/overview/ class=md-tabs__link> Inference Performance Lab </a> </li> <li class=md-tabs__item> <a href=/2.1/image-selector/ class=md-tabs__link> Container Image Selector </a> </li> </ul> </div> </nav> <main class=md-main data-md-component=main> <div class="md-main__inner md-grid"> <div class="md-sidebar md-sidebar--primary" data-md-component=sidebar data-md-type=navigation> <div class=md-sidebar__scrollwrap> <div class=md-sidebar__inner> <nav class="md-nav md-nav--primary md-nav--lifted" aria-label=Navigation data-md-level=0> <label class=md-nav__title for=__drawer> <a href=/2.1/. title=GPUStack class="md-nav__button md-logo" aria-label=GPUStack data-md-component=logo> <img src=/2.1/assets/logo-white.png alt=logo> </a> GPUStack </label> <div class=md-nav__source> <a href=https://github.com/gpustack/gpustack title="Go to repository" class=md-source data-md-component=source> <div class="md-source__icon md-icon"> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 496 512"><!-- Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg> </div> <div class=md-source__repository> gpustack/gpustack </div> </a> </div> <ul class=md-nav__list data-md-scrollfix> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_1> <label class=md-nav__link for=__nav_1 id=__nav_1_label tabindex=0> <span class=md-ellipsis> Home </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=1 aria-labelledby=__nav_1_label aria-expanded=false> <label class=md-nav__title for=__nav_1> <span class="md-nav__icon md-icon"></span> Home </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/overview/ class=md-nav__link> <span class=md-ellipsis> Overview </span> </a> </li> <li class=md-nav__item> <a href=/2.1/quickstart/ class=md-nav__link> <span class=md-ellipsis> Quickstart </span> </a> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_1_3> <label class=md-nav__link for=__nav_1_3 id=__nav_1_3_label tabindex=0> <span class=md-ellipsis> Installation </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=2 aria-labelledby=__nav_1_3_label aria-expanded=false> <label class=md-nav__title for=__nav_1_3> <span class="md-nav__icon md-icon"></span> Installation </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/installation/requirements/ class=md-nav__link> <span class=md-ellipsis> Requirements </span> </a> </li> <li class=md-nav__item> <a href=/2.1/installation/installation/ class=md-nav__link> <span class=md-ellipsis> Installation </span> </a> </li> <li class=md-nav__item> <a href=/2.1/installation/air-gapped/ class=md-nav__link> <span class=md-ellipsis> Air-Gapped Installation </span> </a> </li> <li class=md-nav__item> <a href=/2.1/installation/uninstallation/ class=md-nav__link> <span class=md-ellipsis> Uninstallation </span> </a> </li> </ul> </nav> </li> <li class=md-nav__item> <a href=/2.1/upgrade/ class=md-nav__link> <span class=md-ellipsis> Upgrade </span> </a> </li> <li class=md-nav__item> <a href=/2.1/migration/ class=md-nav__link> <span class=md-ellipsis> Migration </span> </a> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_1_6> <label class=md-nav__link for=__nav_1_6 id=__nav_1_6_label tabindex=0> <span class=md-ellipsis> User Guide </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=2 aria-labelledby=__nav_1_6_label aria-expanded=false> <label class=md-nav__title for=__nav_1_6> <span class="md-nav__icon md-icon"></span> User Guide </label> <ul class=md-nav__list data-md-scrollfix> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_1_6_1> <div class="md-nav__link md-nav__container"> <a href=/2.1/user-guide/playground/ class="md-nav__link "> <span class=md-ellipsis> Playground </span> </a> <label class="md-nav__link " for=__nav_1_6_1 id=__nav_1_6_1_label tabindex=0> <span class="md-nav__icon md-icon"></span> </label> </div> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_1_6_1_label aria-expanded=false> <label class=md-nav__title for=__nav_1_6_1> <span class="md-nav__icon md-icon"></span> Playground </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/user-guide/playground/chat/ class=md-nav__link> <span class=md-ellipsis> Chat </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/playground/image/ class=md-nav__link> <span class=md-ellipsis> Image </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/playground/audio/ class=md-nav__link> <span class=md-ellipsis> Audio </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/playground/embedding/ class=md-nav__link> <span class=md-ellipsis> Embedding </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/playground/rerank/ class=md-nav__link> <span class=md-ellipsis> Rerank </span> </a> </li> </ul> </nav> </li> <li class=md-nav__item> <a href=/2.1/user-guide/model-catalog/ class=md-nav__link> <span class=md-ellipsis> Model Catalog </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/model-deployment-management/ class=md-nav__link> <span class=md-ellipsis> Model Deployment Management </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/model-route-management/ class=md-nav__link> <span class=md-ellipsis> Model Route Management </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/model-provider-management/ class=md-nav__link> <span class=md-ellipsis> Model Provider Management </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/inference-backend-management/ class=md-nav__link> <span class=md-ellipsis> Inference Backend Management </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/built-in-inference-backends/ class=md-nav__link> <span class=md-ellipsis> Built-in Inference Backends </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/compatibility-check/ class=md-nav__link> <span class=md-ellipsis> Compatibility Check </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/model-file-management/ class=md-nav__link> <span class=md-ellipsis> Model File management </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/cluster-management/ class=md-nav__link> <span class=md-ellipsis> Cluster Management </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/cloud-credential-management/ class=md-nav__link> <span class=md-ellipsis> Cloud Credential Management </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/api-key-management/ class=md-nav__link> <span class=md-ellipsis> API Key Management </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/user-management/ class=md-nav__link> <span class=md-ellipsis> User Management </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/sso/ class=md-nav__link> <span class=md-ellipsis> Single Sign-On (SSO) Authentication </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/observability/ class=md-nav__link> <span class=md-ellipsis> Observability </span> </a> </li> <li class=md-nav__item> <a href=/2.1/user-guide/benchmarking/ class=md-nav__link> <span class=md-ellipsis> Benchmarking </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_1_7> <label class=md-nav__link for=__nav_1_7 id=__nav_1_7_label tabindex=0> <span class=md-ellipsis> Using Models </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=2 aria-labelledby=__nav_1_7_label aria-expanded=false> <label class=md-nav__title for=__nav_1_7> <span class="md-nav__icon md-icon"></span> Using Models </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/using-models/using-large-language-models/ class=md-nav__link> <span class=md-ellipsis> Using Large Language Models </span> </a> </li> <li class=md-nav__item> <a href=/2.1/using-models/using-vision-language-models/ class=md-nav__link> <span class=md-ellipsis> Using Vision Language Models </span> </a> </li> <li class=md-nav__item> <a href=/2.1/using-models/using-embedding-models/ class=md-nav__link> <span class=md-ellipsis> Using Embedding Models </span> </a> </li> <li class=md-nav__item> <a href=/2.1/using-models/using-reranker-models/ class=md-nav__link> <span class=md-ellipsis> Using Reranker Models </span> </a> </li> <li class=md-nav__item> <a href=/2.1/using-models/using-image-generation-models/ class=md-nav__link> <span class=md-ellipsis> Using Image Generation Models </span> </a> </li> <li class=md-nav__item> <a href=/2.1/using-models/using-audio-models/ class=md-nav__link> <span class=md-ellipsis> Using Audio Models </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_1_8> <label class=md-nav__link for=__nav_1_8 id=__nav_1_8_label tabindex=0> <span class=md-ellipsis> Tutorials </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=2 aria-labelledby=__nav_1_8_label aria-expanded=false> <label class=md-nav__title for=__nav_1_8> <span class="md-nav__icon md-icon"></span> Tutorials </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/tutorials/running-deepseek-r1-671b-with-distributed-vllm/ class=md-nav__link> <span class=md-ellipsis> Running DeepSeek R1 671B with Distributed vLLM </span> </a> </li> <li class=md-nav__item> <a href=/2.1/tutorials/running-deepseek-r1-671b-with-distributed-ascend-mindie/ class=md-nav__link> <span class=md-ellipsis> Running DeepSeek R1 671B with Distributed Ascend Mindie </span> </a> </li> <li class=md-nav__item> <a href=/2.1/tutorials/inference-on-cpus/ class=md-nav__link> <span class=md-ellipsis> Inference On CPUs </span> </a> </li> <li class=md-nav__item> <a href=/2.1/tutorials/inference-with-tool-calling/ class=md-nav__link> <span class=md-ellipsis> Inference with Tool Calling </span> </a> </li> <li class=md-nav__item> <a href=/2.1/tutorials/using-custom-backends/ class=md-nav__link> <span class=md-ellipsis> Using Custom Inference Backend </span> </a> </li> <li class=md-nav__item> <a href=/2.1/tutorials/adding-gpucluster-using-digitalocean/ class=md-nav__link> <span class=md-ellipsis> Adding a GPU Cluster Using DigitalOcean </span> </a> </li> <li class=md-nav__item> <a href=/2.1/tutorials/adding-gpucluster-using-kubernetes/ class=md-nav__link> <span class=md-ellipsis> Adding a GPU Cluster Using Kubernetes </span> </a> </li> <li class=md-nav__item> <a href=/2.1/tutorials/managing-model-routes/ class=md-nav__link> <span class=md-ellipsis> Managing Model Routes </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_1_9> <label class=md-nav__link for=__nav_1_9 id=__nav_1_9_label tabindex=0> <span class=md-ellipsis> Integrations </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=2 aria-labelledby=__nav_1_9_label aria-expanded=false> <label class=md-nav__title for=__nav_1_9> <span class="md-nav__icon md-icon"></span> Integrations </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/integrations/inference-apis/ class=md-nav__link> <span class=md-ellipsis> Inference APIs </span> </a> </li> <li class=md-nav__item> <a href=/2.1/integrations/integrate-with-claude-code/ class=md-nav__link> <span class=md-ellipsis> Integrate with Claude Code </span> </a> </li> <li class=md-nav__item> <a href=/2.1/integrations/integrate-with-dify/ class=md-nav__link> <span class=md-ellipsis> Integrate with Dify </span> </a> </li> <li class=md-nav__item> <a href=/2.1/integrations/integrate-with-ragflow/ class=md-nav__link> <span class=md-ellipsis> Integrate with RAGFlow </span> </a> </li> <li class=md-nav__item> <a href=/2.1/integrations/integrate-with-cherrystudio/ class=md-nav__link> <span class=md-ellipsis> Integrate with CherryStudio </span> </a> </li> <li class=md-nav__item> <a href=/2.1/integrations/integrate-with-openclaw/ class=md-nav__link> <span class=md-ellipsis> Integrate with OpenClaw </span> </a> </li> <li class=md-nav__item> <a href=/2.1/integrations/integrate-with-n8n/ class=md-nav__link> <span class=md-ellipsis> Integrate with n8n </span> </a> </li> <li class=md-nav__item> <a href=/2.1/integrations/integrate-with-maxkb/ class=md-nav__link> <span class=md-ellipsis> Integrate with MaxKB </span> </a> </li> </ul> </nav> </li> <li class=md-nav__item> <a href=/2.1/architecture/ class=md-nav__link> <span class=md-ellipsis> Architecture </span> </a> </li> <li class=md-nav__item> <a href=/2.1/scheduler/ class=md-nav__link> <span class=md-ellipsis> Scheduler </span> </a> </li> <li class=md-nav__item> <a href=/2.1/troubleshooting/ class=md-nav__link> <span class=md-ellipsis> Troubleshooting </span> </a> </li> <li class=md-nav__item> <a href=/2.1/faq/ class=md-nav__link> <span class=md-ellipsis> FAQ </span> </a> </li> <li class=md-nav__item> <a href=/2.1/api-reference/ class=md-nav__link> <span class=md-ellipsis> API Reference </span> </a> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_1_15> <label class=md-nav__link for=__nav_1_15 id=__nav_1_15_label tabindex=0> <span class=md-ellipsis> CLI Reference </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=2 aria-labelledby=__nav_1_15_label aria-expanded=false> <label class=md-nav__title for=__nav_1_15> <span class="md-nav__icon md-icon"></span> CLI Reference </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/cli-reference/start/ class=md-nav__link> <span class=md-ellipsis> Start </span> </a> </li> <li class=md-nav__item> <a href=/2.1/cli-reference/download-tools/ class=md-nav__link> <span class=md-ellipsis> Download Tools </span> </a> </li> <li class=md-nav__item> <a href=/2.1/cli-reference/reload-config/ class=md-nav__link> <span class=md-ellipsis> Reload Config </span> </a> </li> <li class=md-nav__item> <a href=/2.1/cli-reference/list-images/ class=md-nav__link> <span class=md-ellipsis> List Images </span> </a> </li> <li class=md-nav__item> <a href=/2.1/cli-reference/save-images/ class=md-nav__link> <span class=md-ellipsis> Save Images </span> </a> </li> <li class=md-nav__item> <a href=/2.1/cli-reference/load-images/ class=md-nav__link> <span class=md-ellipsis> Load Images </span> </a> </li> <li class=md-nav__item> <a href=/2.1/cli-reference/copy-images/ class=md-nav__link> <span class=md-ellipsis> Copy Images </span> </a> </li> </ul> </nav> </li> <li class=md-nav__item> <a href=/2.1/environment-variables/ class=md-nav__link> <span class=md-ellipsis> Environment Variables </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2> <label class=md-nav__link for=__nav_2 id=__nav_2_label tabindex=0> <span class=md-ellipsis> Inference Performance Lab </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=1 aria-labelledby=__nav_2_label aria-expanded=false> <label class=md-nav__title for=__nav_2> <span class="md-nav__icon md-icon"></span> Inference Performance Lab </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/overview/ class=md-nav__link> <span class=md-ellipsis> Overview </span> </a> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2> <label class=md-nav__link for=__nav_2_2 id=__nav_2_2_label tabindex=0> <span class=md-ellipsis> Optimizing Throughput </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=2 aria-labelledby=__nav_2_2_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2> <span class="md-nav__icon md-icon"></span> Optimizing Throughput </label> <ul class=md-nav__list data-md-scrollfix> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_1> <label class=md-nav__link for=__nav_2_2_1 id=__nav_2_2_1_label tabindex=0> <span class=md-ellipsis> Qwen3.5-35B-A3B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_1_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_1> <span class="md-nav__icon md-icon"></span> Qwen3.5-35B-A3B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3.5-35b-a3b/h200/ class=md-nav__link> <span class=md-ellipsis> H200 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_2> <label class=md-nav__link for=__nav_2_2_2 id=__nav_2_2_2_label tabindex=0> <span class=md-ellipsis> Qwen3.5-9B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_2_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_2> <span class="md-nav__icon md-icon"></span> Qwen3.5-9B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3.5-9b/h100/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_3> <label class=md-nav__link for=__nav_2_2_3 id=__nav_2_2_3_label tabindex=0> <span class=md-ellipsis> GLM-4.5-Air </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_3_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_3> <span class="md-nav__icon md-icon"></span> GLM-4.5-Air </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/glm-4.5-air/a100/ class=md-nav__link> <span class=md-ellipsis> A100 </span> </a> </li> <li class=md-nav__item> <a href=/2.1/performance-lab/glm-4.5-air/h100/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_4> <label class=md-nav__link for=__nav_2_2_4 id=__nav_2_2_4_label tabindex=0> <span class=md-ellipsis> GLM-4.x </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_4_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_4> <span class="md-nav__icon md-icon"></span> GLM-4.x </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/glm-4.x/a100/ class=md-nav__link> <span class=md-ellipsis> A100 </span> </a> </li> <li class=md-nav__item> <a href=/2.1/performance-lab/glm-4.x/h100/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> <li class=md-nav__item> <a href=/2.1/performance-lab/glm-4.x/h200/ class=md-nav__link> <span class=md-ellipsis> H200 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_5> <label class=md-nav__link for=__nav_2_2_5 id=__nav_2_2_5_label tabindex=0> <span class=md-ellipsis> GPT-OSS-20B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_5_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_5> <span class="md-nav__icon md-icon"></span> GPT-OSS-20B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/gpt-oss-20b/a100/ class=md-nav__link> <span class=md-ellipsis> A100 </span> </a> </li> <li class=md-nav__item> <a href=/2.1/performance-lab/gpt-oss-20b/h100/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_6> <label class=md-nav__link for=__nav_2_2_6 id=__nav_2_2_6_label tabindex=0> <span class=md-ellipsis> GPT-OSS-120B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_6_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_6> <span class="md-nav__icon md-icon"></span> GPT-OSS-120B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/gpt-oss-120b/a100/ class=md-nav__link> <span class=md-ellipsis> A100 </span> </a> </li> <li class=md-nav__item> <a href=/2.1/performance-lab/gpt-oss-120b/h100/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_7> <label class=md-nav__link for=__nav_2_2_7 id=__nav_2_2_7_label tabindex=0> <span class=md-ellipsis> DeepSeek-R1 </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_7_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_7> <span class="md-nav__icon md-icon"></span> DeepSeek-R1 </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/deepseek-r1/h200/ class=md-nav__link> <span class=md-ellipsis> H200 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_8> <label class=md-nav__link for=__nav_2_2_8 id=__nav_2_2_8_label tabindex=0> <span class=md-ellipsis> DeepSeek-V3.2 </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_8_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_8> <span class="md-nav__icon md-icon"></span> DeepSeek-V3.2 </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/deepseek-v3.2/h200/ class=md-nav__link> <span class=md-ellipsis> H200 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_9> <label class=md-nav__link for=__nav_2_2_9 id=__nav_2_2_9_label tabindex=0> <span class=md-ellipsis> Qwen3-8B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_9_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_9> <span class="md-nav__icon md-icon"></span> Qwen3-8B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3-8b/910b/ class=md-nav__link> <span class=md-ellipsis> 910B </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_10> <label class=md-nav__link for=__nav_2_2_10 id=__nav_2_2_10_label tabindex=0> <span class=md-ellipsis> Qwen3-14B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_10_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_10> <span class="md-nav__icon md-icon"></span> Qwen3-14B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3-14b/a100/ class=md-nav__link> <span class=md-ellipsis> A100 </span> </a> </li> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3-14b/h100/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_11> <label class=md-nav__link for=__nav_2_2_11 id=__nav_2_2_11_label tabindex=0> <span class=md-ellipsis> Qwen3-32B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_11_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_11> <span class="md-nav__icon md-icon"></span> Qwen3-32B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3-32b/a100/ class=md-nav__link> <span class=md-ellipsis> A100 </span> </a> </li> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3-32b/h100/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_12> <label class=md-nav__link for=__nav_2_2_12 id=__nav_2_2_12_label tabindex=0> <span class=md-ellipsis> Qwen3-30B-A3B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_12_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_12> <span class="md-nav__icon md-icon"></span> Qwen3-30B-A3B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3-30b-a3b/910b/ class=md-nav__link> <span class=md-ellipsis> 910B </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_2_13> <label class=md-nav__link for=__nav_2_2_13 id=__nav_2_2_13_label tabindex=0> <span class=md-ellipsis> Qwen3-235B-A22B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_2_13_label aria-expanded=false> <label class=md-nav__title for=__nav_2_2_13> <span class="md-nav__icon md-icon"></span> Qwen3-235B-A22B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3-235b-a22b/a100/ class=md-nav__link> <span class=md-ellipsis> A100 </span> </a> </li> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3-235b-a22b/h100/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> </ul> </nav> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_3> <label class=md-nav__link for=__nav_2_3 id=__nav_2_3_label tabindex=0> <span class=md-ellipsis> Optimizing Latency </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=2 aria-labelledby=__nav_2_3_label aria-expanded=false> <label class=md-nav__title for=__nav_2_3> <span class="md-nav__icon md-icon"></span> Optimizing Latency </label> <ul class=md-nav__list data-md-scrollfix> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_3_1> <label class=md-nav__link for=__nav_2_3_1 id=__nav_2_3_1_label tabindex=0> <span class=md-ellipsis> Qwen3.5-35B-A3B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_3_1_label aria-expanded=false> <label class=md-nav__title for=__nav_2_3_1> <span class="md-nav__icon md-icon"></span> Qwen3.5-35B-A3B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3.5-35b-a3b/h200-latency/ class=md-nav__link> <span class=md-ellipsis> H200 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_3_2> <label class=md-nav__link for=__nav_2_3_2 id=__nav_2_3_2_label tabindex=0> <span class=md-ellipsis> Qwen3.5-9B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_3_2_label aria-expanded=false> <label class=md-nav__title for=__nav_2_3_2> <span class="md-nav__icon md-icon"></span> Qwen3.5-9B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3.5-9b/h100-latency/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_3_3> <label class=md-nav__link for=__nav_2_3_3 id=__nav_2_3_3_label tabindex=0> <span class=md-ellipsis> Qwen3-8B </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=3 aria-labelledby=__nav_2_3_3_label aria-expanded=false> <label class=md-nav__title for=__nav_2_3_3> <span class="md-nav__icon md-icon"></span> Qwen3-8B </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/qwen3-8b/h100-latency/ class=md-nav__link> <span class=md-ellipsis> H100 </span> </a> </li> </ul> </nav> </li> </ul> </nav> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type=checkbox id=__nav_2_4> <label class=md-nav__link for=__nav_2_4 id=__nav_2_4_label tabindex=0> <span class=md-ellipsis> References </span> <span class="md-nav__icon md-icon"></span> </label> <nav class=md-nav data-md-level=2 aria-labelledby=__nav_2_4_label aria-expanded=false> <label class=md-nav__title for=__nav_2_4> <span class="md-nav__icon md-icon"></span> References </label> <ul class=md-nav__list data-md-scrollfix> <li class=md-nav__item> <a href=/2.1/performance-lab/references/the-impact-of-quantization-on-vllm-inference-performance/ class=md-nav__link> <span class=md-ellipsis> The Impact of Quantization on vLLM Inference Performance </span> </a> </li> <li class=md-nav__item> <a href=/2.1/performance-lab/references/evaluating-lmcache-prefill-acceleration-in-vllm/ class=md-nav__link> <span class=md-ellipsis> Evaluating LMCache Prefill Acceleration in vLLM </span> </a> </li> </ul> </nav> </li> </ul> </nav> </li> <li class=md-nav__item> <a href=/2.1/image-selector/ class=md-nav__link> <span class=md-ellipsis> Container Image Selector </span> </a> </li> </ul> </nav> </div> </div> </div> <div class="md-sidebar md-sidebar--secondary" data-md-component=sidebar data-md-type=toc> <div class=md-sidebar__scrollwrap> <div class=md-sidebar__inner> <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> </nav> </div> </div> </div> <div class=md-content data-md-component=content> <article class="md-content__inner md-typeset"> <h1>404 - Not found</h1> </article> </div> <script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var tab,labels=set.querySelector(".tabbed-labels");for(tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script> <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script> </div> <button type=button class="md-top md-icon" data-md-component=top hidden> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12Z"/></svg> Back to top </button> </main> <footer class=md-footer> <div class="md-footer-meta md-typeset"> <div class="md-footer-meta__inner md-grid"> <div class=md-copyright> <div class=md-copyright__highlight> Copyright © 2026 GPUStack.ai </div> </div> <div class=md-social> <a href=https://github.com/gpustack target=_blank rel=noopener title=github.com class=md-social__link> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 496 512"><!-- Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg> </a> <a href=https://hub.docker.com/r/gpustack target=_blank rel=noopener title=hub.docker.com class=md-social__link> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 640 512"><!-- Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M349.9 236.3h-66.1v-59.4h66.1v59.4zm0-204.3h-66.1v60.7h66.1V32zm78.2 144.8H362v59.4h66.1v-59.4zm-156.3-72.1h-66.1v60.1h66.1v-60.1zm78.1 0h-66.1v60.1h66.1v-60.1zm276.8 100c-14.4-9.7-47.6-13.2-73.1-8.4-3.3-24-16.7-44.9-41.1-63.7l-14-9.3-9.3 14c-18.4 27.8-23.4 73.6-3.7 103.8-8.7 4.7-25.8 11.1-48.4 10.7H2.4c-8.7 50.8 5.8 116.8 44 162.1 37.1 43.9 92.7 66.2 165.4 66.2 157.4 0 273.9-72.5 328.4-204.2 21.4.4 67.6.1 91.3-45.2 1.5-2.5 6.6-13.2 8.5-17.1l-13.3-8.9zm-511.1-27.9h-66v59.4h66.1v-59.4zm78.1 0h-66.1v59.4h66.1v-59.4zm78.1 0h-66.1v59.4h66.1v-59.4zm-78.1-72.1h-66.1v60.1h66.1v-60.1z"/></svg> </a> <a href=https://pypi.org/project/gpustack/ target=_blank rel=noopener title=pypi.org class=md-social__link> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 448 512"><!-- Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.8 200.5c-7.7-30.9-22.3-54.2-53.4-54.2h-40.1v47.4c0 36.8-31.2 67.8-66.8 67.8H172.7c-29.2 0-53.4 25-53.4 54.3v101.8c0 29 25.2 46 53.4 54.3 33.8 9.9 66.3 11.7 106.8 0 26.9-7.8 53.4-23.5 53.4-54.3v-40.7H226.2v-13.6h160.2c31.1 0 42.6-21.7 53.4-54.2 11.2-33.5 10.7-65.7 0-108.6zM286.2 404c11.1 0 20.1 9.1 20.1 20.3 0 11.3-9 20.4-20.1 20.4-11 0-20.1-9.2-20.1-20.4.1-11.3 9.1-20.3 20.1-20.3zM167.8 248.1h106.8c29.7 0 53.4-24.5 53.4-54.3V91.9c0-29-24.4-50.7-53.4-55.6-35.8-5.9-74.7-5.6-106.8.1-45.2 8-53.4 24.7-53.4 55.6v40.7h106.9v13.6h-147c-31.1 0-58.3 18.7-66.8 54.2-9.8 40.7-10.2 66.1 0 108.6 7.6 31.6 25.7 54.2 56.8 54.2H101v-48.8c0-35.3 30.5-66.4 66.8-66.4zm-6.7-142.6c-11.1 0-20.1-9.1-20.1-20.3.1-11.3 9-20.4 20.1-20.4 11 0 20.1 9.2 20.1 20.4s-9 20.3-20.1 20.3z"/></svg> </a> <a href=https://discord.gg/VXYJzuaqwD target=_blank rel=noopener title=discord.gg class=md-social__link> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 640 512"><!-- Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M524.531 69.836a1.5 1.5 0 0 0-.764-.7A485.065 485.065 0 0 0 404.081 32.03a1.816 1.816 0 0 0-1.923.91 337.461 337.461 0 0 0-14.9 30.6 447.848 447.848 0 0 0-134.426 0 309.541 309.541 0 0 0-15.135-30.6 1.89 1.89 0 0 0-1.924-.91 483.689 483.689 0 0 0-119.688 37.107 1.712 1.712 0 0 0-.788.676C39.068 183.651 18.186 294.69 28.43 404.354a2.016 2.016 0 0 0 .765 1.375 487.666 487.666 0 0 0 146.825 74.189 1.9 1.9 0 0 0 2.063-.676A348.2 348.2 0 0 0 208.12 430.4a1.86 1.86 0 0 0-1.019-2.588 321.173 321.173 0 0 1-45.868-21.853 1.885 1.885 0 0 1-.185-3.126 251.047 251.047 0 0 0 9.109-7.137 1.819 1.819 0 0 1 1.9-.256c96.229 43.917 200.41 43.917 295.5 0a1.812 1.812 0 0 1 1.924.233 234.533 234.533 0 0 0 9.132 7.16 1.884 1.884 0 0 1-.162 3.126 301.407 301.407 0 0 1-45.89 21.83 1.875 1.875 0 0 0-1 2.611 391.055 391.055 0 0 0 30.014 48.815 1.864 1.864 0 0 0 2.063.7A486.048 486.048 0 0 0 610.7 405.729a1.882 1.882 0 0 0 .765-1.352c12.264-126.783-20.532-236.912-86.934-334.541ZM222.491 337.58c-28.972 0-52.844-26.587-52.844-59.239s23.409-59.241 52.844-59.241c29.665 0 53.306 26.82 52.843 59.239 0 32.654-23.41 59.241-52.843 59.241Zm195.38 0c-28.971 0-52.843-26.587-52.843-59.239s23.409-59.241 52.843-59.241c29.667 0 53.307 26.82 52.844 59.239 0 32.654-23.177 59.241-52.844 59.241Z"/></svg> </a> <a href=https://github.com/gpustack/gpustack/blob/main/docs/assets/wechat-group-qrcode.jpg target=_blank rel=noopener title=github.com class=md-social__link> <svg xmlns=http://www.w3.org/2000/svg viewbox="0 0 576 512"><!-- Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M385.2 167.6c6.4 0 12.6.3 18.8 1.1C387.4 90.3 303.3 32 207.7 32 100.5 32 13 104.8 13 197.4c0 53.4 29.3 97.5 77.9 131.6l-19.3 58.6 68-34.1c24.4 4.8 43.8 9.7 68.2 9.7 6.2 0 12.1-.3 18.3-.8-4-12.9-6.2-26.6-6.2-40.8-.1-84.9 72.9-154 165.3-154zm-104.5-52.9c14.5 0 24.2 9.7 24.2 24.4 0 14.5-9.7 24.2-24.2 24.2-14.8 0-29.3-9.7-29.3-24.2.1-14.7 14.6-24.4 29.3-24.4zm-136.4 48.6c-14.5 0-29.3-9.7-29.3-24.2 0-14.8 14.8-24.4 29.3-24.4 14.8 0 24.4 9.7 24.4 24.4 0 14.6-9.6 24.2-24.4 24.2zM563 319.4c0-77.9-77.9-141.3-165.4-141.3-92.7 0-165.4 63.4-165.4 141.3S305 460.7 397.6 460.7c19.3 0 38.9-5.1 58.6-9.9l53.4 29.3-14.8-48.6C534 402.1 563 363.2 563 319.4zm-219.1-24.5c-9.7 0-19.3-9.7-19.3-19.6 0-9.7 9.7-19.3 19.3-19.3 14.8 0 24.4 9.7 24.4 19.3 0 10-9.7 19.6-24.4 19.6zm107.1 0c-9.7 0-19.3-9.7-19.3-19.6 0-9.7 9.7-19.3 19.3-19.3 14.5 0 24.4 9.7 24.4 19.3.1 10-9.9 19.6-24.4 19.6z"/></svg> </a> </div> </div> </div> </footer> </div> <div class=md-dialog data-md-component=dialog> <div class="md-dialog__inner md-typeset"></div> </div> <script id=__config type=application/json>{"base": "/2.1", "features": ["search.suggest", "search.highlight", "content.tabs.link", "navigation.indexes", "content.tooltips", "navigation.path", "navigation.tabs", "content.code.annotate", "content.code.copy", "content.code.select", "content.action.view", "content.action.edit", "navigation.top", "navigation.footer", "navigation.tracking", "search.share", "toc.follow"], "search": "/2.1/assets/javascripts/workers/search.b8dbb3d2.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": {"provider": "mike"}}</script> <script src=/2.1/assets/javascripts/bundle.fe8b6f2b.min.js></script> <script src=/2.1/image-selector/assets/extra.js></script> <script src=https://unpkg.com/katex@0/dist/katex.min.js></script> <script src=/2.1/javascripts/katex.js></script> <script src=https://unpkg.com/katex@0/dist/contrib/auto-render.min.js></script> <script src=https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js></script> <script src=/2.1/javascripts/tablesort.js></script> </body> </html>