-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsearchindex.js
1 lines (1 loc) · 110 KB
/
searchindex.js
1
Search.setIndex({"alltitles": {"/generate Endpoint": [[10, "generate-endpoint"]], "API configuration": [[13, "api-configuration"]], "Achieving Peak Throughput": [[3, "achieving-peak-throughput"]], "Add a Runner": [[18, "add-a-runner"]], "Add the model to the test suite": [[38, "add-the-model-to-the-test-suite"]], "Advanced Features": [[21, null]], "Advanced Usage": [[5, "Advanced-Usage"]], "Apply SGLang on NVIDIA Jetson Orin": [[35, null]], "Avoid out-of-memory by Tuning --chunked-prefill-size, --mem-fraction-static, --max-running-requests": [[3, "avoid-out-of-memory-by-tuning-chunked-prefill-size-mem-fraction-static-max-running-requests"]], "Backend Tutorial": [[21, null]], "Basic Usage": [[20, "Basic-Usage"]], "Basic example": [[28, "basic-example"]], "Batches": [[6, "Batches"]], "Batching": [[20, "Batching"]], "Benchmark": [[25, "benchmark"]], "Benchmark and Profiling": [[25, null]], "Benchmarking Model Accuracy": [[22, "benchmarking-model-accuracy"]], "Block-wise FP8": [[27, "block-wise-fp8"]], "CUDA error: an illegal memory access was encountered": [[39, "cuda-error-an-illegal-memory-access-was-encountered"]], "CUDA out of memory": [[39, "cuda-out-of-memory"]], "Cache-Aware Load-Balancing Router": [[40, "cache-aware-load-balancing-router"]], "Caching torch.compile": [[27, "caching-torch-compile"]], "Chat Completions": [[6, "Chat-Completions"]], "Chat Template": [[8, "Chat-Template"]], "Check if the metrics are being collected": [[37, "check-if-the-metrics-are-being-collected"]], "Check if the variables are created": [[37, "check-if-the-variables-are-created"]], "Choices Methods in SGLang": [[19, null]], "Classify (reward model)": [[4, "Classify-(reward-model)"]], "Co-launch Router and Runtimes": [[40, "co-launch-router-and-runtimes"]], "Code Formatting with Pre-Commit": [[26, "code-formatting-with-pre-commit"]], "Common Notes": [[41, "common-notes"]], "Common launch commands": [[13, "common-launch-commands"]], "Completions": [[6, "Completions"]], "Complex Prompts": [[20, "Complex-Prompts"]], "Configuration Parameters": [[40, "configuration-parameters"]], "Constrained Decoding": [[13, "constrained-decoding"], [20, "Constrained-Decoding"]], "Constrained decoding": [[10, "constrained-decoding"]], "Contribution Guide": [[26, null]], "Control flow": [[20, "Control-flow"]], "Core Parameters": [[10, "core-parameters"]], "Custom Chat Template": [[1, null]], "Custom Logit Processor": [[10, "custom-logit-processor"]], "Customizing Benchmark Scripts": [[22, "customizing-benchmark-scripts"]], "Data Parallelism Attention": [[27, "data-parallelism-attention"]], "Data parallelism": [[13, "data-parallelism"]], "Debug": [[28, "debug"]], "Debug options": [[13, "debug-options"]], "DeepSeek Usage": [[27, null]], "DeepSeek V3/R1": [[34, "deepseek-v3-r1"]], "Define Messages": [[2, "Define-Messages"]], "Define Tools for Function Call": [[2, "Define-Tools-for-Function-Call"]], "Define a Tool Function": [[2, "Define-a-Tool-Function"]], "Deploy On Kubernetes": [[28, null]], "Development Guide Using Docker": [[16, null]], "Disable NUMA Auto-Balancing": [[24, "disable-numa-auto-balancing"]], "Docs Workflow": [[0, "docs-workflow"]], "Double Sparsity": [[13, "double-sparsity"]], "Download Weights": [[27, "download-weights"]], "Dynamic Scaling APIs": [[40, "dynamic-scaling-apis"]], "EAGLE Decoding": [[14, "EAGLE-Decoding"]], "EAGLE Decoding via Frequency-Ranked Speculative Sampling": [[14, "EAGLE-Decoding-via-Frequency-Ranked-Speculative-Sampling"]], "EAGLE Decoding with torch.compile": [[14, "EAGLE-Decoding-with-torch.compile"]], "EAGLE-3 Decoding": [[14, "EAGLE-3-Decoding"]], "EBNF": [[15, "EBNF"], [15, "id2"], [15, "id6"]], "Embedding Models": [[38, "embedding-models"]], "Enabling cache for torch.compile": [[3, "enabling-cache-for-torch-compile"]], "Encode (embedding model)": [[4, "Encode-(embedding-model)"]], "Evaluation": [[16, "evaluation"]], "Examples": [[10, "examples"], [24, "examples"]], "Examples of Offline Model Quantization": [[9, "examples-of-offline-model-quantization"]], "Execute the Tool": [[2, "Execute-the-Tool"]], "Expert parallelism": [[13, "expert-parallelism"]], "Extending Evaluation Capabilities": [[22, "extending-evaluation-capabilities"]], "FAQ": [[27, "faq"]], "Fault Tolerance": [[40, "fault-tolerance"]], "Flush Cache": [[4, "Flush-Cache"]], "Fork and Clone the Repository": [[26, "fork-and-clone-the-repository"]], "Frequently Asked Questions": [[29, null]], "Frontend Tutorial": [[21, null]], "General Guidance": [[30, null]], "Generate (text generation model)": [[4, "Generate-(text-generation-model)"]], "Generative Models": [[38, "generative-models"]], "Get Model Info": [[4, "Get-Model-Info"]], "Get Server Info": [[4, "Get-Server-Info"]], "Grafana Dashboard": [[37, "grafana-dashboard"]], "Greedy Token Selection": [[19, "greedy-token-selection"]], "H100": [[16, "h100"]], "H200": [[16, "h200"]], "HTTP Server configuration": [[13, "http-server-configuration"]], "Handle Tools": [[2, "Handle-Tools"], [2, "id1"]], "Hardware Supports": [[31, null]], "Health Check": [[4, "Health-Check"]], "How to Support a New Language Model": [[38, "how-to-support-a-new-language-model"]], "How to Support a New vLM": [[38, "how-to-support-a-new-vlm"]], "How to support a new model?": [[2, "How-to-support-a-new-model?"]], "Hyperparameter Tuning": [[3, null]], "Initialize the Client": [[2, "Initialize-the-Client"]], "Install Dependencies & Build": [[26, "install-dependencies-build"]], "Install Dependency": [[0, "install-dependency"]], "Install SGLang": [[41, null]], "Install Using Docker (Recommended)": [[24, "install-using-docker-recommended"]], "Install from Source": [[24, "install-from-source"]], "Installation": [[21, null], [40, "installation"]], "Installing SGLang": [[24, "installing-sglang"]], "Installing and running SGLang with Jetson Containers": [[35, "installing-and-running-sglang-with-jetson-containers"]], "Interactive debugging": [[38, "interactive-debugging"]], "JSON": [[15, "JSON"], [15, "id1"], [15, "id5"]], "JSON Format": [[1, "json-format"]], "Jinja Format": [[1, "jinja-format"]], "Kernel backend": [[13, "kernel-backend"]], "Keys to success": [[28, "keys-to-success"]], "Launch A Server": [[4, "Launch-A-Server"], [6, "Launch-A-Server"], [7, "Launch-A-Server"], [8, "Launch-A-Server"], [11, "Launch-A-Server"], [20, "Launch-A-Server"]], "Launch DeepSeek V3 with SGLang": [[27, "launch-deepseek-v3-with-sglang"]], "Launch Runtimes and Router Separately": [[40, "launch-runtimes-and-router-separately"]], "Launch with One node of 8 H200": [[27, "launch-with-one-node-of-8-h200"]], "Launching the Server": [[2, "Launching-the-Server"], [12, "Launching-the-Server"]], "Learn more": [[32, null]], "Llama 3.1 405B": [[34, "llama-3-1-405b"]], "LoRA": [[13, "lora"]], "Logging": [[13, "logging"]], "Make a release in GitHub": [[17, "make-a-release-in-github"]], "Measuring Model Accuracy in SGLang": [[22, null]], "Memory and scheduling": [[13, "memory-and-scheduling"]], "Method 1: With pip or uv": [[41, "method-1-with-pip-or-uv"]], "Method 2: From source": [[41, "method-2-from-source"]], "Method 3: Using docker": [[41, "method-3-using-docker"]], "Method 4: Using docker compose": [[41, "method-4-using-docker-compose"]], "Method 5: Using Kubernetes": [[41, "method-5-using-kubernetes"]], "Method 6: Run on Kubernetes or Clouds with SkyPilot": [[41, "method-6-run-on-kubernetes-or-clouds-with-skypilot"]], "Methods": [[19, "methods"]], "Model Selection": [[0, "model-selection"]], "Model and tokenizer": [[13, "model-and-tokenizer"]], "Multi Node Tensor Parallelism": [[27, "multi-node-tensor-parallelism"]], "Multi modal": [[10, "multi-modal"]], "Multi-Node Deployment": [[23, null], [34, null]], "Multi-Node Inference on SLURM": [[34, "multi-node-inference-on-slurm"]], "Multi-head Latent Attention (MLA) Throughput Optimizations": [[27, "multi-head-latent-attention-mla-throughput-optimizations"]], "Multi-modal Generation": [[20, "Multi-modal-Generation"]], "Multi-node distributed serving": [[13, "multi-node-distributed-serving"]], "Multi-token Prediction": [[27, "multi-token-prediction"]], "Multi-turn Dialog": [[20, "Multi-turn-Dialog"]], "Multiple-Image Inputs": [[8, "Multiple-Image-Inputs"]], "Native API and SGLang Runtime (SRT)": [[2, "Native-API-and-SGLang-Runtime-(SRT)"], [15, "Native-API-and-SGLang-Runtime-(SRT)"]], "Non-Streaming Request": [[2, "Non-Streaming-Request"], [12, "Non-Streaming-Request"]], "Non-streaming Asynchronous Generation": [[5, "Non-streaming-Asynchronous-Generation"]], "Non-streaming Synchronous Generation": [[5, "Non-streaming-Synchronous-Generation"]], "Normal": [[10, "normal"]], "Offline Batch Inference": [[5, "Offline-Batch-Inference"]], "Offline Engine API": [[2, "Offline-Engine-API"], [5, null], [12, "Offline-Engine-API"], [15, "Offline-Engine-API"]], "Offline Quantization": [[9, "offline-quantization"]], "Online Quantization": [[9, "online-quantization"]], "OpenAI APIs - Completions": [[6, null]], "OpenAI APIs - Embedding": [[7, null]], "OpenAI APIs - Vision": [[8, null]], "OpenAI Compatible API": [[2, "OpenAI-Compatible-API"], [12, "OpenAI-Compatible-API"], [15, "OpenAI-Compatible-API"]], "Optimization": [[13, "optimization"]], "Optimizations": [[27, "optimizations"]], "Other options": [[10, "other-options"]], "Other runtime options": [[13, "other-runtime-options"]], "Other tips": [[25, "other-tips"]], "Parallelism": [[13, "parallelism"], [20, "Parallelism"]], "Parameters": [[6, "Parameters"], [6, "id2"]], "Penalizers": [[10, "penalizers"]], "Performance Highlights": [[14, "Performance-Highlights"]], "Performance Tuning": [[36, null]], "Port Allocation and CI Efficiency": [[0, "port-allocation-and-ci-efficiency"]], "Port a model from vLLM to SGLang": [[38, "port-a-model-from-vllm-to-sglang"]], "Prerequisites": [[28, "prerequisites"], [35, "prerequisites"]], "Production Metrics": [[37, null]], "Profile": [[16, "profile"]], "Profile with Nsight": [[25, "profile-with-nsight"]], "Profile with PyTorch Profiler": [[25, "profile-with-pytorch-profiler"]], "Prompt Alignment Example": [[0, "prompt-alignment-example"]], "PyPI Package Release Process": [[17, null]], "Quantization": [[9, null]], "RDMA RoCE case": [[28, "rdma-roce-case"]], "Reasoning Content for DeepSeek R1": [[27, "reasoning-content-for-deepseek-r1"]], "Reasoning Parser": [[12, null]], "Reference": [[9, "reference"]], "References": [[14, "References"], [21, null], [35, "references"]], "Registering an external model implementation": [[38, "registering-an-external-model-implementation"]], "Regular expression": [[15, "Regular-expression"], [15, "id3"], [15, "id7"]], "Remaining issues": [[28, "remaining-issues"]], "Reward Models": [[38, "reward-models"]], "RoCE scenario": [[28, "roce-scenario"]], "Router for Data Parallelism": [[40, null]], "Routing Strategies": [[40, "routing-strategies"]], "Running DeepSeek-V3": [[24, "running-deepseek-v3"]], "Running Inference": [[35, "running-inference"]], "Running Llama3.1": [[24, "running-llama3-1"]], "Running Unit Tests & Adding to CI": [[26, "running-unit-tests-adding-to-ci"]], "Running examples on Multi-node": [[27, "running-examples-on-multi-node"]], "Running quantization with TorchAO": [[35, "running-quantization-with-torchao"]], "SGLang Documentation": [[0, null], [21, null]], "SGLang Frontend Language": [[20, null]], "SGLang Native API": [[12, "SGLang-Native-API"]], "SGLang Native APIs": [[4, null]], "SGLang Router": [[21, null]], "SGLang on AMD": [[24, null]], "Sampling Parameters": [[10, null]], "Sampling params": [[10, "sampling-params"]], "Send Results Back to Model": [[2, "Send-Results-Back-to-Model"]], "Sending Requests": [[11, null]], "Server Arguments": [[13, null]], "Serving: HTTP & API": [[13, "serving-http-api"]], "Set Up Self-Hosted Runners for GitHub Action": [[18, null]], "Setting Up & Building from Source": [[26, "setting-up-building-from-source"]], "Setup Docker Container": [[16, "setup-docker-container"]], "Setup Guide": [[37, "setup-guide"]], "Setup VSCode": [[16, "setup-vscode"]], "Skip Tokenizer and Detokenizer": [[4, "Skip-Tokenizer-and-Detokenizer"]], "Speculative Decoding": [[14, null]], "Speculative decoding": [[13, "speculative-decoding"]], "Step 1: Start a docker container.": [[18, "step-1-start-a-docker-container"]], "Step 2: Configure the runner by config.sh": [[18, "step-2-configure-the-runner-by-config-sh"]], "Step 3: Run the runner by run.sh": [[18, "step-3-run-the-runner-by-run-sh"]], "Streaming": [[10, "streaming"], [11, "Streaming"], [11, "id1"], [20, "Streaming"]], "Streaming Asynchronous Generation": [[5, "Streaming-Asynchronous-Generation"]], "Streaming Request": [[2, "Streaming-Request"], [12, "Streaming-Request"]], "Streaming Synchronous Generation": [[5, "Streaming-Synchronous-Generation"]], "Structural Tag": [[15, "Structural-Tag"], [15, "id4"], [15, "id8"]], "Structured Outputs": [[15, null]], "Structured Outputs (JSON, Regex, EBNF)": [[6, "Structured-Outputs-(JSON,-Regex,-EBNF)"], [10, "structured-outputs-json-regex-ebnf"]], "Structured output with XGrammar": [[35, "structured-output-with-xgrammar"]], "Supported Models": [[12, "Supported-Models"], [38, null]], "Supporting New Reasoning Model Schemas": [[12, "Supporting-New-Reasoning-Model-Schemas"]], "System Configuration": [[24, "system-configuration"]], "TODO": [[28, "todo"]], "Tensor parallelism": [[13, "tensor-parallelism"]], "Test the correctness": [[38, "test-the-correctness"]], "The results are not deterministic, even with a temperature of 0": [[29, "the-results-are-not-deterministic-even-with-a-temperature-of-0"]], "Tips for Newcomers": [[26, "tips-for-newcomers"]], "Token Length Normalized": [[19, "token-length-normalized"]], "Tool and Function Calling": [[2, null]], "Troubleshooting": [[37, "troubleshooting"], [39, null]], "Tune --dp-size and --tp-size": [[3, "tune-dp-size-and-tp-size"]], "Tune --schedule-conservativeness": [[3, "tune-schedule-conservativeness"]], "Tune --schedule-policy": [[3, "tune-schedule-policy"]], "Tune Your Request Submission Speed": [[3, "tune-your-request-submission-speed"]], "Unconditional Likelihood Normalized": [[19, "unconditional-likelihood-normalized"]], "Update Documentation": [[0, "update-documentation"]], "Update GRUB Settings": [[24, "update-grub-settings"]], "Update Weights From Disk": [[4, "Update-Weights-From-Disk"]], "Update the version in code": [[17, "update-the-version-in-code"]], "Upload the PyPI package": [[17, "upload-the-pypi-package"]], "Usage": [[6, "Usage"], [6, "id1"], [12, "Usage"]], "Use Models From ModelScope": [[33, null]], "Using GPTQModel": [[9, "using-gptqmodel"]], "Using Input IDs": [[7, "Using-Input-IDs"]], "Using LLM Compressor": [[9, "using-llm-compressor"]], "Using Native Generation APIs": [[11, "Using-Native-Generation-APIs"]], "Using OpenAI Python Client": [[7, "Using-OpenAI-Python-Client"], [8, "Using-OpenAI-Python-Client"], [11, "Using-OpenAI-Python-Client"]], "Using Python Requests": [[7, "Using-Python-Requests"], [8, "Using-Python-Requests"], [11, "Using-Python-Requests"]], "Using cURL": [[7, "Using-cURL"], [8, "Using-cURL"], [11, "Using-cURL"]], "Warmup Step": [[24, "warmup-step"]], "Why this approach?": [[0, "why-this-approach"]], "Writing Documentation & Running Docs CI": [[26, "writing-documentation-running-docs-ci"]]}, "docnames": ["README", "backend/custom_chat_template", "backend/function_calling", "backend/hyperparameter_tuning", "backend/native_api", "backend/offline_engine_api", "backend/openai_api_completions", "backend/openai_api_embeddings", "backend/openai_api_vision", "backend/quantization", "backend/sampling_params", "backend/send_request", "backend/separate_reasoning", "backend/server_arguments", "backend/speculative_decoding", "backend/structured_outputs", "developer/development_guide_using_docker", "developer/release_process", "developer/setup_github_runner", "frontend/choices_methods", "frontend/frontend", "index", "references/accuracy_evaluation", "references/advanced_deploy", "references/amd", "references/benchmark_and_profiling", "references/contribution_guide", "references/deepseek", "references/deploy_on_k8s", "references/faq", "references/general", "references/hardware", "references/learn_more", "references/modelscope", "references/multi_node", "references/nvidia_jetson", "references/performance_tuning", "references/production_metrics", "references/supported_models", "references/troubleshooting", "router/router", "start/install"], "envversion": {"nbsphinx": 4, "sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1}, "filenames": ["README.md", "backend/custom_chat_template.md", "backend/function_calling.ipynb", "backend/hyperparameter_tuning.md", "backend/native_api.ipynb", "backend/offline_engine_api.ipynb", "backend/openai_api_completions.ipynb", "backend/openai_api_embeddings.ipynb", "backend/openai_api_vision.ipynb", "backend/quantization.md", "backend/sampling_params.md", "backend/send_request.ipynb", "backend/separate_reasoning.ipynb", "backend/server_arguments.md", "backend/speculative_decoding.ipynb", "backend/structured_outputs.ipynb", "developer/development_guide_using_docker.md", "developer/release_process.md", "developer/setup_github_runner.md", "frontend/choices_methods.md", "frontend/frontend.ipynb", "index.rst", "references/accuracy_evaluation.md", "references/advanced_deploy.rst", "references/amd.md", "references/benchmark_and_profiling.md", "references/contribution_guide.md", "references/deepseek.md", "references/deploy_on_k8s.md", "references/faq.md", "references/general.rst", "references/hardware.rst", "references/learn_more.md", "references/modelscope.md", "references/multi_node.md", "references/nvidia_jetson.md", "references/performance_tuning.rst", "references/production_metrics.md", "references/supported_models.md", "references/troubleshooting.md", "router/router.md", "start/install.md"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"": [0, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 19, 20, 22, 24, 26, 27, 28, 35, 37, 38, 41], "0": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 20, 22, 24, 25, 28, 33, 34, 35, 37, 40, 41], "00": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 28, 34], "000": 6, "0000": [3, 6, 28], "00001": 9, "0001": [28, 40], "0006895065307617188": [4, 7], "0006909370422363281": 7, "0006928443908691406": [4, 7], "0007052421569824219": 7, "001": 37, "002086639404296875": 7, "0020923614501953125": [4, 7], "0030384063720703125": [4, 7], "003055572509765625": 7, "005": 37, "006198883056640625": [4, 7], "007507552643049313": 37, "008087158203125": [4, 7], "00809478759765625": 7, "00830841064453125": [4, 7], "00897216796875": 7, "0089874267578125": [4, 7], "00it": 5, "01": [2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 28, 37], "01024": 9, "0123748779296875": 7, "01238250732421875": [4, 7], "01438140869140625": 7, "0143890380859375": [4, 7], "015": 37, "02": [2, 4, 5, 6, 7, 8, 11, 14, 15, 20, 28, 37], "0200": 28, "0225": 28, "0229d89dd6fb42e7aa7945cc27239fc1": 6, "025": 37, "03": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 37], "037291765213012695": 15, "039435e": 37, "03ca73f6": 6, "04": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 18, 20, 28, 37], "05": [4, 6, 7, 11, 14, 15, 20, 28, 37], "0546875": 4, "05it": [8, 11], "06": [6, 7, 11, 12, 14, 15, 20, 37], "06935faad9484797a329a8b53086f1b2": 6, "06it": 4, "07": [2, 4, 6, 11, 14, 15, 28], "075": 37, "07500000000000001": 37, "0767ed5581e6": 6, "08": [2, 4, 6, 7, 12, 28, 37], "081984d34d6b4f0184a6291cffb072f1": 15, "08it": 20, "09": [4, 7, 11, 12, 14, 20], "09it": 4, "0dd7": 28, "0ecbe7047cc1450eb243c90e981ef839": 2, "0x0": 28, "1": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 22, 25, 27, 28, 35, 37, 38, 40], "10": [2, 4, 6, 7, 12, 14, 15, 20, 22, 25, 28, 37], "100": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 25], "1000": 25, "100mb": 25, "101": [2, 6, 20], "102": 2, "10239": 6, "1024": [2, 9, 12, 25, 37, 41], "103": [2, 12, 20], "1035": 37, "104": [2, 12], "105": 20, "106": [6, 20], "108": 12, "10it": 2, "11": [2, 4, 5, 6, 8, 11, 12, 14, 15, 20, 28, 37], "11008": 37, "1101": 4, "1102": 4, "11219": 6, "11228": 37, "1125": 37, "11293": 6, "11346": 6, "11368": 6, "11389": 6, "11401": 4, "11463": 6, "11488": 6, "11495": 4, "115": 20, "116": 6, "116093850019932e": 37, "1165": 6, "11685": 37, "118": 6, "119": 6, "11b": 8, "12": [2, 4, 6, 8, 11, 12, 14, 15, 18, 20, 34, 35, 41], "120": [6, 12], "1202": 4, "121": 20, "12109": 6, "12138247489929199": 4, "122": 6, "123": 6, "12366": 4, "123859": 37, "124": 12, "12430": 6, "125": 12, "127": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28, 40], "128": [6, 9, 10, 13, 15, 16, 24, 27, 35, 41], "128000": [4, 20], "128009": [4, 6, 8, 11, 14, 15], "128902e": 37, "12895": 8, "128g": 18, "128x128": 27, "129": 6, "12916": 8, "12956": 8, "12it": 6, "13": [2, 4, 6, 7, 8, 11, 14, 15, 20], "130": 6, "13020": 4, "131": 6, "131072": [4, 6, 7, 8, 11, 12, 14, 15], "132": 6, "133": 6, "1330": 37, "1335": 29, "13496": 6, "13fb4eed2acf471da48fba89a60805bd": 15, "14": [2, 4, 6, 7, 14, 15, 20, 37], "140": 37, "14006": 37, "14007": 37, "14183": 4, "1455": 4, "146": 6, "148": 2, "14it": 5, "15": [2, 4, 6, 11, 12, 14, 15, 20, 28, 37], "150": 6, "15054": 6, "1513": 37, "15360": 4, "156": 14, "158": [6, 20], "15b3": 28, "15it": [4, 11], "16": [4, 6, 12, 14, 15, 20, 27, 28, 34], "160": [2, 4, 6, 7, 8, 11, 12, 15, 20], "161": 20, "162": 37, "16384": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "163840": 28, "165": 12, "16586": 6, "16730": 4, "16777216": 40, "168": 13, "16825f0684034f218b7b011ed5ea54c7": 6, "16875": 37, "1690": 4, "16g": [24, 41], "16it": 2, "17": [4, 6, 8, 11, 12, 14, 15, 20, 28], "171662e": 37, "172": 34, "1729": 29, "1742792163": 14, "1742792179": 2, "1742792182": 2, "1742792215": 14, "1742792246": 8, "1742792252": 8, "1742792253": 8, "1742792285": 14, "1742792291": 11, "1742792319": 6, "1742792321": 6, "1742792322": 6, "1742792325": 6, "1742792347": 14, "17563796043395996": 15, "1756422519683838": 15, "175645112991333": 15, "17630": 6, "1778": 4, "178": 6, "17it": 8, "18": [2, 4, 6, 8, 12, 14, 15, 20, 28, 34], "1815": 37, "183638762": 4, "186": 6, "18656": 6, "188": 20, "18it": 2, "19": [2, 4, 6, 8, 12, 14, 15, 20], "190": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "192": 13, "194": 20, "197": 4, "1970": 37, "1980": 6, "1989": 4, "19it": 5, "1b": [2, 4, 9, 38], "1e45a3b14e07496cb6180eb382933a92": 14, "1e63f651d4534985b9d3f3bd2e4273dc": 11, "2": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 22, 25, 27, 28, 34, 37, 38], "20": [2, 5, 6, 8, 12, 14, 20, 27, 28, 37], "200": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "2000": 16, "20000": [28, 34], "2023todai": 15, "2024": 15, "2025": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "20374": 6, "20474": 4, "2048": [3, 13, 14, 22, 39], "20480": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "20628142356872559": 15, "208": 20, "21": [2, 4, 8, 12, 15, 37], "2141000": 15, "2147000": 15, "21500000": 15, "215182126": 14, "2162": 4, "21743083000183105": 15, "21e331f3898946df8ab27c856cae5a6c": 8, "21it": [11, 15], "22": [2, 4, 6, 12, 14, 15], "22176458073a43c9b3253d73d8b0fcd9": 14, "222": 20, "224gb": 34, "22703": 4, "22910": 4, "2295": 6, "22it": 15, "23": [4, 6, 7, 12, 15, 20, 28], "231": 2, "2326": 37, "233": 3, "238": 20, "23it": 2, "24": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 28], "2407": 2, "2417": 37, "247": 37, "24c9ccd8918143b3aec11735b0b4e3d7": 15, "24h": 6, "24it": 6, "25": [2, 4, 5, 6, 11, 14, 15, 20, 28, 37], "250": 6, "25000": 13, "2513": 37, "253125": 37, "256": [2, 4, 6, 7, 8, 9, 11, 12, 14, 15, 16, 20, 25], "2574440": 20, "2574441": 7, "2576651": 20, "2576805": 14, "2577005": 20, "2577265": 2, "2578527": 14, "2579888": 8, "2580263": 8, "2580719": 14, "2581617": 11, "2583196": 6, "2583892": 14, "2584811": 12, "2586276": 15, "2586701": 4, "2588477": 4, "2589506": 4, "2590243": 4, "25it": 20, "26": [4, 6, 7, 12, 14, 15, 20], "26306266784668": 37, "264": 4, "266055e": 37, "27": [4, 7, 14, 15, 28, 37], "274": 2, "275": [2, 6], "276": 2, "278774516": 2, "279": 4, "27b": 38, "27it": [8, 20], "28": [2, 6, 7, 12, 20, 37], "2814453125": 37, "282": 2, "2826": 37, "289": 2, "28it": [2, 20], "29": [2, 4, 5, 7, 12, 14, 20], "2950": 6, "296752e": 37, "297": 14, "2975": 6, "298": 20, "2987": 6, "29a7": 6, "29it": [4, 20], "2a": 0, "2b": 38, "2x": 28, "3": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 20, 22, 24, 25, 28, 35, 37, 38, 40], "30": [2, 4, 6, 11, 12, 14, 15, 20, 37], "300": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "3000": [6, 37], "30000": [1, 9, 10, 13, 22, 24, 33, 34, 37, 38, 40, 41], "30001": 40, "301": 4, "304": 4, "3050": 6, "307": 20, "30it": [2, 5, 6], "31": [2, 4, 7, 8, 12, 20, 41], "310": 2, "31070": 4, "311": 4, "312226e": 37, "314": 37, "315": 4, "316": 14, "3168": 4, "317": 3, "318": 20, "31it": [4, 11, 15, 20], "32": [2, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 20, 25, 27, 28, 37, 40, 41], "3202": 12, "322": 2, "323": [2, 4], "32454": 14, "3248779296875": 37, "32618": 20, "32768": [2, 20], "329": 2, "32988": 14, "32b": [2, 12], "32g": [16, 41], "32it": 15, "33": [4, 6, 8, 14, 20], "330": [4, 6], "33133": 8, "33451": 14, "33599": 4, "3363": 4, "33846": 20, "3393": 6, "339675e": 37, "33it": 20, "34": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "340": 20, "34188": 4, "34437": 7, "3467566967010498": 11, "34996": 4, "35": [6, 8, 14, 20], "350": 2, "35004": 4, "35020": 4, "3518979474117756e": 37, "353": 2, "3540": 6, "35592": 4, "3570": 6, "357747e": 37, "36": [4, 7, 8, 11, 14, 15], "3600": [6, 27], "36066": 11, "36105": 4, "362": 2, "36278": 12, "3629": 4, "3660": 6, "369": [2, 4], "37": [4, 6, 8, 12, 14, 15, 20], "370959": 3, "37144": 20, "37152": 20, "37154": 20, "37162": 20, "37170": 20, "37186": 20, "37192": 20, "37200": 20, "37210": 20, "37216": 20, "37232": 20, "37244": 20, "372acb2dae944ce49a66b64afccb0379": 11, "373": 20, "374": 4, "375": 6, "37611": 14, "37754": 4, "37758": 4, "37760": 4, "37925": 6, "3796875": 37, "37972": 2, "379956953": 11, "37it": [2, 4, 8], "38": [2, 4, 6, 20], "380": 20, "382": 37, "38212": 4, "38214": 4, "38222": 4, "38224": 14, "38236": 14, "38238": [4, 14], "389414e": 37, "38it": [2, 15], "39": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "390": 2, "39054": 4, "39068": 2, "39082": 2, "39096": 2, "39110": 2, "392132406": 14, "3923": 4, "3925": 4, "39496": 15, "3967": 4, "39928": 11, "39938": 11, "39944": 11, "39954": 11, "39958": 11, "39968": 11, "39it": 15, "3rd": 5, "4": [0, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 18, 20, 22, 27, 28, 34, 37, 38, 40], "40": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 37], "400": 4, "4000": 24, "40000": 28, "400757e": 37, "405b": 13, "40656": 8, "40668": 8, "40672": 8, "40686": 8, "40696": 6, "40704": 6, "40708": 6, "40710": 6, "40746": 14, "40756": 14, "40768": 14, "40834": 20, "40840": 20, "409": 2, "4096": [2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 39], "40it": [6, 20], "41": [4, 6, 7, 11, 14, 20], "41154": 20, "41162": 20, "41178": 20, "413": 20, "41652": 14, "41798": 4, "41it": [8, 20], "42": [2, 6, 7, 11, 12, 14, 20], "424": 2, "4245": 4, "425": 6, "42530": 14, "4257": 6, "42it": 20, "43": [4, 6, 7, 11, 12, 14], "433413": 37, "4360": 6, "439": 4, "439e": 6, "44": [2, 4, 12, 14], "4414": 6, "44564": 4, "4482": 6, "449": 4, "4492": 6, "45": [2, 6, 8, 12, 14, 34], "450": 6, "45076": 8, "45092": 8, "45098": 8, "45112": 8, "453": 20, "456928683": 6, "45764": 15, "45766": 15, "45772": 15, "45782": 15, "45786": 15, "4588": 6, "4594": 3, "45f28078ce4c438f9c42b640302f41c0": 15, "46": [2, 4, 6, 14, 15, 20], "4612": 6, "4623": 6, "46230": 7, "46894": 14, "469": 4, "469339879": 14, "46it": [2, 15, 20], "47": [4, 14, 20], "47304": 7, "47312": 7, "47320": 7, "47336": 7, "47352": 7, "47360": 7, "4742": 6, "4753": 6, "476": 15, "47670": 14, "47678": 14, "47684": 14, "47e8": 6, "48": [4, 6, 14, 15, 20, 41], "486": 37, "4867": 6, "4871": 6, "487316894531251": 37, "48908": 28, "48924": 28, "49": [2, 4, 6, 7, 8, 20, 37], "4959": 6, "49606": 4, "49996": 6, "49b9": 6, "4a0d": 6, "4b": 8, "4bit": 9, "4cd5": 6, "4de5": 6, "5": [2, 4, 5, 6, 7, 8, 10, 11, 12, 14, 15, 20, 22, 28, 33, 37, 38, 40], "50": [2, 3, 4, 5, 6, 7, 11, 12, 14, 15, 20, 28, 37], "500": 3, "5000": 6, "50000": 13, "50002": 6, "501": 15, "502": 15, "50634": 4, "50814177726902": 37, "50870": 6, "509": 15, "50it": 15, "51": [2, 4, 6, 8, 11, 12, 14, 20], "510": 6, "512": [20, 25], "5131": 6, "514771912145079": 37, "51508": 20, "51518": 20, "51524": 20, "51532": 20, "51544": 20, "51550": 20, "51712": 4, "51716": 4, "51726": 4, "51986": 20, "51996": 20, "51it": 4, "52": [4, 6, 12, 14, 15, 20, 34], "52012": 20, "527617": 15, "53": [2, 7, 12, 14], "5342435836791992": 15, "535": 15, "54": [4, 5, 6, 7, 12, 14, 20], "54286": 12, "54302": 12, "545552634": 4, "54870": 15, "55": [2, 4, 6, 7, 8, 12, 14, 15, 20], "55162": 4, "55178": 4, "55186": 4, "55188": 4, "55204": 4, "55208": 4, "55224": 4, "55232": 4, "5526": 4, "55350": 12, "55360": 12, "55370": 12, "55380": 12, "557572e": 37, "56": [2, 4, 8, 14, 20], "5690": 6, "56953125": 37, "57": [4, 7, 8, 11, 12, 14, 20], "57120": 11, "57132": 11, "57140": 11, "57296": 4, "5766": 6, "57829": 4, "5791549598": 37, "58": [2, 4, 6, 8, 11, 12, 14, 20], "589": 14, "58916": 14, "58920": 14, "58932": 14, "58942": 14, "58it": 20, "59": [4, 6, 7, 11, 12, 14, 15, 20], "593": 37, "596463012695313": 37, "59678": 2, "59692": 2, "5b": [2, 22, 38], "5x": 27, "6": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 35, 37, 38, 40], "60": [2, 6, 8, 11, 12, 14, 15, 20, 25, 28, 37, 40], "60494": 15, "60510": 15, "60526": 15, "60726": 6, "60888": 6, "60a3ac5eb9204ba286a0950278bdd585": 11, "61": [4, 6, 12, 14, 15, 20], "61024": 4, "6106": 6, "610689785": 20, "62": [4, 14, 20], "627": 4, "62it": 5, "63": [4, 6, 8, 14, 15], "6334": 6, "64": [4, 6, 8, 9, 10, 11, 12, 14, 15, 20, 25], "6452": 8, "6462": 8, "6463": 8, "646765918": 20, "6469": 8, "6480": 8, "6481": 8, "6496": 8, "65": [6, 8, 14], "6536": 8, "6538": 8, "655488413": 15, "66": [6, 7, 14], "6608": 6, "66it": 11, "67": [2, 14, 15, 20], "6777": 6, "67it": 14, "68": [4, 12], "68134bd8ab2b": 6, "6864": 4, "69": [6, 12], "6e8d71e7": 6, "7": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28, 37], "70": [4, 6, 7, 12, 14, 15, 25], "7040631a4da2461d9e1d7e8dfee0353b": 8, "71": [4, 6, 7, 15], "712400": 28, "72": 14, "7230": 6, "72b": [8, 38], "73": [6, 7, 12, 14, 37], "730975341796876": 37, "731c2b28eda1": 6, "7393223a309540c08bc21f2e65ce97ad": 4, "73it": 6, "74": [4, 6, 7, 12, 14], "75": [2, 4, 5, 6, 8, 11, 14, 15, 20, 37], "7584": 6, "75it": [14, 20], "76": [4, 6, 14, 22], "77": [11, 14], "78": [4, 6, 8, 14, 20], "79": [4, 15], "791": 4, "7b": [1, 2, 4, 7, 8, 10, 12, 14, 20, 33, 38], "7f": 28, "7fa2af80": 25, "7x": 27, "8": [2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 20, 22, 24, 25, 28, 34, 35, 37, 38, 41], "80": [8, 12, 20], "800": 27, "8000": 34, "8080": 0, "80928667": 14, "81": [2, 4, 6, 8, 14, 20], "8192": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28, 35], "82": [3, 4, 6], "82it": 15, "83": [6, 15], "8336": 6, "836": [8, 20], "838741688": 12, "83it": 15, "84": [4, 12, 14, 20, 28], "841345594": 8, "84204177856446": 37, "845": 37, "85": [2, 6, 12, 20], "8542968750000001": 37, "855": 37, "857674824": 4, "8588": 6, "86": [4, 6, 7, 11, 14, 37], "866964": 37, "87": [6, 12], "8703": 6, "8753": 4, "88": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "8828": 4, "8832519531250003": 37, "89": [2, 4, 6, 7, 15, 28], "89469451904297": 37, "8998": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "89a2": 6, "8b": [0, 2, 3, 4, 5, 6, 9, 10, 11, 13, 14, 15, 24, 25, 35, 37, 38, 40, 41], "8x": 27, "9": [2, 3, 4, 5, 6, 8, 12, 13, 14, 15, 20, 28, 37], "90": [2, 6, 8, 12, 14], "9090": 37, "91": [2, 6, 8, 12, 14, 15, 20], "915a": 6, "918": 6, "92": [2, 11, 20], "9221679687500002": 37, "9257": 4, "9284": 4, "92931": 4, "93": [2, 6, 14, 28], "93it": 14, "94": [6, 11, 20], "941": 37, "95": [2, 5, 6, 12, 15, 29], "950195e": 37, "95551": 4, "95it": 4, "96": [2, 6, 14, 20], "967b": 6, "97": [6, 11, 12, 20], "9734": 6, "98": [6, 12, 20], "9822": 4, "988861749": 4, "99": [2, 6], "9928": 4, "994310165": 7, "9998": 3, "9b1d": 6, "9d03": 6, "9dff": 28, "9x": 27, "A": [3, 5, 10, 25, 28, 35, 40, 41], "As": [5, 8], "At": 28, "By": [1, 2, 3, 13, 14, 20, 27], "For": [0, 2, 6, 9, 10, 11, 12, 13, 14, 15, 19, 22, 24, 25, 26, 27, 28, 38, 40, 41], "If": [0, 1, 2, 3, 9, 10, 12, 13, 15, 16, 24, 25, 26, 27, 28, 37, 39, 40, 41], "In": [2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 24, 25, 28, 29, 37, 38], "It": [1, 2, 3, 4, 5, 6, 10, 13, 14, 15, 19, 20, 21, 27, 41], "NOT": 1, "No": 4, "OFED": 28, "Of": 20, "On": [3, 23], "One": 10, "Or": [14, 33], "THE": 6, "The": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24, 25, 27, 28, 35, 37, 38, 40, 41], "Then": [9, 12, 18, 28, 34, 37], "There": [1, 8], "These": [22, 26, 38], "To": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 20, 22, 24, 25, 27, 29, 33, 37, 38, 41], "Will": 13, "With": [0, 5, 24, 27], "_": 20, "__call__": 10, "__init__": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 20], "_work": 14, "a10": 41, "a100": [27, 41], "a2": 28, "a2dc": 28, "a3ec": 6, "a505d13eeefb": 6, "a738ec4b3862": 6, "a800": 27, "abbrevi": [2, 15, 41], "abc4e855af9f": 6, "abil": 5, "abl": [20, 37, 38], "abnorm": 9, "about": [1, 2, 3, 5, 6, 13, 15, 20, 29, 32], "abov": [10, 19, 20, 25, 28, 38, 39, 41], "abs_threshold": 40, "absolut": 40, "absorpt": 27, "ac37e883": 6, "acceler": [3, 13, 14, 27, 41], "accept": [6, 10, 13, 14, 34], "access": [5, 13, 15, 27, 28, 37, 41], "accord": [2, 3, 9, 25, 28], "accordingli": [0, 2, 12, 40], "account": [5, 26, 29], "accumul": [12, 29], "accur": [0, 2], "accuraci": [5, 14, 36], "achiev": [6, 20, 27, 28, 29], "across": [0, 5, 13, 19, 27, 29, 34, 40], "act": 2, "action": [5, 9], "activ": [9, 20, 21, 27, 28, 34], "actual": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "ad": [12, 27, 40, 41], "ad04316fbac64505bd499d55035ca195": 15, "adapt": [5, 13, 28], "add": [0, 2, 3, 5, 7, 8, 9, 10, 12, 13, 15, 16, 22, 24, 25, 27, 29, 37, 40, 41], "add_generation_prompt": [2, 12, 15], "add_safe_glob": 14, "add_work": 40, "addit": [2, 5, 12, 19, 38], "addition": [5, 14, 20, 27, 28], "additionali": 14, "addr": [13, 28, 34], "address": [13, 20, 28, 29, 34], "adjust": [5, 13, 14, 22, 27, 40], "administr": 5, "adopt": [5, 21], "adv": 25, "advanc": [10, 25], "advertis": 20, "advis": 15, "aerob": 20, "aflah02": 34, "after": [0, 5, 11, 12, 13, 14, 25, 27, 35, 40], "afterward": [14, 24], "ag": 28, "again": [6, 9, 14, 24], "against": [9, 19, 38], "agx": 35, "ahrenheit": 2, "ai": [0, 5, 6, 8, 12, 13, 16, 24, 27, 28, 35, 41], "aim": [14, 27, 28], "airport": 5, "algorithm": [9, 13, 14, 27, 40], "alia": [24, 41], "alibaba": [4, 7, 38], "alien": 6, "aliv": 20, "all": [0, 2, 3, 8, 10, 12, 13, 14, 16, 18, 19, 20, 25, 26, 27, 33, 38, 40, 41], "all_hip": [24, 41], "all_other_model": 38, "allenai": 9, "alloc": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "allow": [13, 14, 25, 27], "allow_auto_trunc": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "allowlist": 14, "almost": 38, "along": [5, 20], "alreadi": [5, 9, 27], "also": [1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 20, 24, 26, 27, 28, 29, 34, 35, 38, 39, 40], "altern": [0, 8, 10, 19, 22], "although": 5, "alwai": [0, 3, 5, 10, 13, 15, 26], "am": 5, "amd": [18, 31, 41], "america": 15, "among": [13, 14], "amount": 13, "an": [0, 2, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 33, 37, 40, 41], "analysi": [6, 16], "analyt": 5, "analyz": 5, "ancient": [5, 6], "andj": 20, "ani": [6, 10, 13, 14, 20, 25, 26, 27, 28, 38, 41], "annot": 25, "anoth": [8, 20, 24, 38], "answer": [0, 2, 12, 15, 19, 20, 22, 27], "antidisestablishmentarian": 19, "anyon": 11, "anyth": 6, "apart": 4, "api": [1, 10, 19, 20, 21, 25, 28, 38, 41], "api_kei": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "apivers": 28, "app": 5, "appalachian": 5, "appear": 10, "append": [2, 6, 22, 24], "appli": [0, 6, 9, 13, 26, 27, 28, 31, 41], "applic": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 21, 28], "apply_chat_templ": [2, 4, 12, 15], "appreci": 26, "approach": [5, 27, 38, 41], "appropri": [0, 2, 9], "approxim": [5, 40], "apt": [18, 25], "aqueduct": 6, "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 24, 25, 26, 27, 28, 35, 38, 40, 41], "arbitrari": 14, "arch": [6, 18], "archiev": 40, "architectur": [4, 6, 9, 11, 22, 25], "area": 5, "arg": [12, 19, 25, 41], "argument": [2, 4, 9, 15, 21, 25, 27, 35, 40, 41], "arguments_non_stream": 2, "aris": 29, "arm": 5, "arriv": 12, "art": [4, 5, 11], "articl": 5, "artifici": [5, 6], "artist": 5, "ask": [12, 20, 26, 30], "aspect": [5, 20], "assert": [4, 6, 10], "asset": 8, "assign": 10, "assist": [0, 1, 2, 4, 5, 6, 8, 10, 11, 14, 15, 19, 20, 22], "assistant_begin": 20, "assistant_end": 20, "assistantgener": 15, "associ": [4, 5, 27], "assum": [2, 12, 37], "async": 5, "async_gener": 5, "async_stream_and_merg": 5, "asyncio": [0, 5], "atmospher": 5, "attach": 8, "attain": 3, "attent": [2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 21, 28, 35, 38, 41], "attention_backend": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "attract": [5, 6, 19], "audio": [2, 6, 11, 14], "auror": 20, "australia": [6, 11, 14, 20], "auto": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 20], "autom": 24, "automat": [0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 20], "autoregress": 14, "autosc": 41, "autotoken": [2, 4, 7, 9, 12, 15], "autotun": 3, "avail": [0, 2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 27, 28, 37, 40, 41], "available_tool": 2, "avali": 37, "averag": 19, "avoid": [0, 6, 28, 33, 39, 40, 41], "await": 5, "awar": 5, "awq": [9, 21, 27], "awq_marlin": 9, "b": [14, 27, 28, 41], "b1a7": 6, "b3b424a24f03432abcc7c8a6628feda6": 14, "b498": 6, "b5317254a5a4450c80b2d41105f8a3e1": 8, "b76b": 6, "ba207039d2284261b243c10ec55b4013": 15, "back": [5, 8, 21], "backend": [4, 10, 15, 19, 20, 24, 25, 34, 35, 37, 41], "backend_input_fil": 6, "backend_result_fil": 6, "background": [8, 40], "backtrac": 25, "bad": [4, 19], "baichuan2": 38, "balanc": [6, 13, 20, 27], "balance_abs_threshold": 40, "balance_rel_threshold": 40, "banner": 20, "base": [3, 9, 10, 13, 14, 19, 24, 25, 27, 28, 34, 40], "base64": 10, "base_gpu_id": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "base_url": [2, 6, 7, 8, 11, 12, 14, 15], "baseformatdetector": 2, "baseimageprocessor": 38, "basemodel": 15, "basereasoningformatdetector": 12, "bash": [0, 17, 18, 22, 25, 34, 35], "bashrc": 25, "basic": [13, 25], "basic_qa": 20, "batch": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16, 21, 25, 27, 28, 29, 41], "batch_04420212": 6, "batch_2058b675": 6, "batch_88376548": 6, "batch_detail": 6, "batch_id": 6, "batch_job": 6, "batch_request": 6, "batch_respons": 6, "batch_siz": 9, "batchrequestcount": 6, "batchsiz": [13, 40], "batteri": 5, "bc": 5, "bce": 6, "becaus": [3, 8, 20], "becom": 5, "been": [5, 6, 9, 25, 28], "befor": [0, 2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 25, 26, 27, 35, 38], "beforehand": 27, "begin": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "behavior": [0, 5, 10, 13, 20], "beij": [6, 15], "being": [3, 5, 20], "below": [2, 10, 13, 14, 16, 18, 20, 24, 40, 41], "bench_offline_throughput": 25, "bench_one_batch": [16, 25, 38, 41], "bench_serv": [24, 25, 37], "bench_sglang": [16, 22], "bench_specul": 27, "benchmark": [9, 13, 14, 16, 24, 36], "berlin": [15, 19], "berlin3": 20, "besid": 20, "bespok": 19, "best": [13, 27], "better": [0, 3, 6, 9, 10, 13, 15, 27, 38, 41], "between": [4, 10, 13, 20, 40], "beyond": 27, "bf16": 27, "bfloat16": [2, 4, 6, 8, 11, 12, 13, 14, 15, 20, 35], "bin": [13, 14, 16, 18, 34], "binari": 5, "bind": 13, "bit": [5, 9], "bitsandbyt": 9, "black": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "blackwood": 6, "blob": [8, 10, 14, 20], "block": 20, "blog": [27, 32, 34], "blogpost": [13, 19], "blood": 20, "blue": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "bluefield": 28, "bmm": 27, "bnf": [10, 15], "board": 8, "bodi": 6, "bogart": 20, "bool": [10, 12], "boost": [13, 27], "born": 20, "boston": 2, "bot": 15, "both": [3, 5, 10, 12, 20, 25, 27, 38, 39, 40], "bottleneck": 3, "bound": 4, "box": [12, 22], "bra": 6, "branch": [0, 5, 14, 26, 41], "bras\u00edlia": [6, 11, 14, 20], "brave_search": 15, "brave_searchy": 15, "brazil": [6, 11, 14, 20], "break": [4, 10, 11], "brightli": 6, "brightston": 5, "brooklyn": 5, "browser": [0, 25], "budget": 13, "buffer": 12, "bug": [6, 9, 26], "build": [0, 5, 8, 9, 13, 17, 20, 24, 35, 41], "built": [6, 22, 26, 41], "busi": [5, 40], "c": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 24, 28, 41], "c3ec": 28, "c4": 9, "c43fe09311bb454989a985f3203d60d1": 2, "c7": 28, "c79a": 28, "ca": [2, 15], "cab": 8, "cach": [2, 6, 7, 8, 11, 12, 13, 14, 15, 16, 18, 20, 21, 24, 25, 28, 29, 33, 37, 39, 41], "cache_hit_r": 37, "cache_threshold": 40, "cached_token": [4, 11, 15], "cafe": 5, "calcul": [9, 12, 20], "calibr": 9, "calibration_dataset": 9, "california": [2, 15], "call": [5, 12, 13, 15, 19, 20, 21, 25, 41], "call_data": 2, "can": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 24, 25, 27, 28, 29, 32, 34, 35, 37, 38, 39, 40, 41], "canada": 20, "canberra": [6, 11, 14], "canberra3": 20, "cancel": 6, "cancelled_job": 6, "candid": [13, 14], "cannot": [25, 28, 41], "cap": [24, 41], "capabl": 24, "capac": [14, 40], "capit": [0, 4, 5, 6, 10, 11, 14, 15, 19, 20, 40], "capital_info": 15, "capitalinfo": 15, "captain": 6, "captur": [9, 13, 27, 28], "car": 8, "care": 5, "carolina": 5, "carri": 5, "cascad": 14, "case": [3, 4, 5, 13, 14], "cast": [13, 14, 27], "cat": 22, "catch": 20, "cathedr": [4, 5], "caus": [22, 25, 29], "cave": 4, "caveat": 25, "cd": [17, 24, 37, 41], "celesti": 6, "celsiu": [2, 15], "center": 6, "central": 5, "centuri": 5, "certain": [14, 24], "chain": 21, "challeng": 5, "chang": [0, 10, 16, 18, 24, 26, 27, 28, 29, 38], "channel": [9, 13, 26, 27], "charact": [5, 13, 20, 40], "character_gen": 20, "character_regex": 20, "charl": 5, "charm": 5, "chat": [2, 10, 11, 12, 13, 14, 15, 21, 35, 38], "chat_exampl": 20, "chat_templ": [1, 2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20], "chatbot": 5, "chatcomplet": [2, 6, 11, 14], "chatcompletionmessag": [2, 6, 11, 14], "chatcompletionmessagetoolcal": 2, "chatglm": 38, "chatml": [1, 8, 10, 38], "check": [0, 2, 6, 10, 13, 26, 27, 28, 41], "check_output": [7, 8, 11], "checkout": 14, "checkpoint": [2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 25], "chief": 5, "child": [8, 20], "china": [6, 15], "choic": [2, 6, 8, 11, 12, 13, 14, 15, 20, 21], "choicedeltatoolcal": 2, "choicedeltatoolcallfunct": 2, "choices_method": 19, "choos": [5, 13, 15, 20], "chore": 20, "chosen": 13, "chrome": 25, "chunk": [2, 6, 8, 10, 11, 12, 13, 14, 20, 21, 39], "chunk_text": 12, "chunked_prefill_s": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28], "ci": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "citi": [2, 4, 5, 11, 15], "civil": 6, "clariti": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "class": [2, 10, 12, 15, 38], "classifi": 11, "clean": [0, 6], "cleaned_chunk": 5, "clear": [3, 5], "cli": [4, 25], "click": 37, "client": [6, 12, 14, 15, 25, 28], "climat": 5, "climb": 5, "clone": [24, 35, 41], "cloth": [8, 20], "clothespin": 20, "cloudi": 2, "cloudli": 2, "cluster": [13, 28, 34, 41], "clusterfirstwithhostnet": 28, "cm1": 28, "co": [16, 21], "code": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 16, 20, 24, 25, 27, 28, 29, 38], "codebas": [0, 26], "coher": 14, "collect": [5, 9], "colleg": 5, "collis": 0, "color": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 25], "colosseum": 6, "com": [8, 10, 12, 14, 16, 17, 18, 20, 22, 24, 25, 26, 28, 35, 41], "combin": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 27, 40], "come": 3, "command": [5, 7, 9, 11, 16, 18, 24, 25, 27, 28, 38, 40, 41], "commit": 0, "common": [5, 28, 39], "commonli": [2, 20], "commun": [5, 6, 21, 28], "compar": [27, 38], "comparison": [19, 38], "compat": [1, 4, 6, 7, 8, 10, 13, 14, 27, 28], "compil": [0, 13, 28], "complet": [2, 4, 5, 7, 8, 11, 12, 14, 15, 20, 21, 28, 35], "completion_templ": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "completion_token": [2, 4, 6, 8, 11, 14, 15], "completion_tokens_detail": [2, 6, 11, 14], "completion_window": 6, "completionchoic": 6, "completionusag": [2, 6, 11, 14], "complex": [5, 26], "compon": 38, "compos": 37, "compressedtensorsconfig": 9, "comput": [6, 8, 9, 13, 14, 20, 25, 27, 29, 35], "concis": [0, 5, 6, 26], "conclud": 12, "concurr": [0, 13], "conda": 41, "condit": [2, 40], "confid": 19, "config": [9, 13, 25, 35], "configur": [0, 9, 22, 26, 27, 28], "configuratin": 27, "confirm": [2, 24], "conflict": 0, "congress": 5, "conjunct": 40, "connect": [6, 14, 34, 37, 40], "connectx": 28, "conquest": 6, "conserv": 13, "consid": [10, 14, 40], "consist": 26, "constant": 10, "constitut": 5, "constrain": 15, "constrained_json_whitespace_pattern": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "constraint": [0, 10, 13, 15, 20], "construct": 14, "consult": 13, "consumpt": [0, 5, 13, 14], "contain": [13, 19, 25, 28, 41], "container": 28, "containerd": 28, "containerport": 28, "content": [2, 4, 6, 8, 11, 12, 14, 15], "context": [14, 35], "context_len": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "context_length": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "continu": [4, 5, 14, 21], "contract": 12, "contribut": [1, 14, 22, 30], "contributor": [0, 26], "control": [10, 13, 14, 21, 25, 28], "conv": 4, "conveni": [9, 20, 24, 25], "convers": [1, 5], "convert": [9, 15, 38], "convert_dict_to_tool": 2, "copi": [3, 13, 41], "core": [20, 21], "corner": 8, "correct": [13, 25], "correctli": [25, 28], "correspond": [2, 9, 10, 13], "cosmo": 6, "cost": 6, "cot": 12, "could": [5, 14, 24, 28], "count": [6, 40], "counter": 37, "countri": [5, 6, 11, 14, 15, 20], "cours": 20, "cover": [6, 7, 8, 28], "coverag": 38, "cpu": [13, 21, 34], "cpu_offload_gb": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "creat": [0, 2, 5, 6, 7, 8, 11, 12, 14, 15, 24, 26, 38, 41], "created_at": 6, "creativ": [5, 6], "critic": 25, "crucial": [22, 28], "ctrl": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "cu124": 41, "cubla": 29, "cuda": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 25, 27, 28, 29, 35, 41], "cuda_graph": 13, "cuda_graph_b": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "cuda_graph_max_b": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "cuda_hom": 41, "cuda_vers": 35, "cuda_visible_devic": 18, "cudagraph": 3, "cudagraphrunn": 13, "cudnn": 35, "cuisin": [4, 11], "cultur": [5, 11], "cumul": 10, "curl": [4, 10, 18, 37, 40], "curl_command": [8, 11], "curl_id": 7, "curl_text": 7, "current": [2, 3, 5, 6, 9, 10, 12, 13, 14, 15, 27, 29, 41], "custom": [0, 5, 6, 13, 21, 27], "custom_id": 6, "custom_logit_processor": 10, "custom_param": 10, "custom_param_list": 10, "custom_serv": 5, "customlogitprocessor": 10, "cutlass": 9, "cycl": 40, "d": [6, 7, 8, 11, 20, 22, 25, 34, 41], "d028": 6, "d9bf6ac94fdf4317b8ca5071f25c7ca": 15, "daili": 5, "dalgeti": 20, "dame": [4, 5], "dark": 6, "data": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 21, 24, 37, 41], "data1": 28, "data_fil": 9, "dataset": [9, 22, 24, 25, 37], "datasourc": 37, "date": [5, 15], "dbazur": 16, "dbrx": 38, "dca5": 6, "de": 5, "deactiv": 41, "deadlock": 13, "death": 20, "deb": 25, "debat": 5, "debug_tensor_dump_inject": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "debug_tensor_dump_input_fil": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "debug_tensor_dump_output_fold": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "decad": 6, "deceas": 20, "decemb": 15, "decis": 5, "decod": [2, 3, 4, 6, 8, 11, 12, 15, 21, 27, 39], "decode_attent": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "decode_log_interv": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "decode_unicod": [10, 11], "decreas": [3, 13, 39], "dedic": [10, 38], "deepep": 13, "deeper": [14, 26], "deepseek": [8, 9, 12, 13, 16, 21, 28, 35, 38, 41], "deepseek_v3": 16, "deepseek_v3_mo": 28, "deepseekr1detector": 12, "def": [2, 5, 10, 12, 15, 19, 20, 22, 38], "default": [1, 2, 3, 8, 10, 12, 13, 14, 15, 16, 19, 20, 22, 24, 27, 28, 40, 41], "defin": [1, 10, 12, 15, 20, 34], "degrad": [8, 13, 14, 28], "degre": 2, "del_respons": 6, "delai": [9, 25], "delet": [3, 6, 13, 41], "delete_ckpt_after_load": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "delta": [2, 6, 11, 12], "demonstr": [0, 2, 5, 38], "dep": [24, 41], "depend": 41, "depict": [8, 20], "deploi": [3, 23, 27, 41], "deploy": [21, 28, 41], "deprec": [4, 13], "depth": 14, "deriv": 14, "describ": [8, 10, 19, 22, 24, 38, 41], "descript": [2, 15, 25, 27], "design": [0, 5, 12, 21, 27], "desir": [3, 15, 22], "destin": [4, 11], "detail": [2, 4, 5, 6, 10, 13, 14, 15, 22, 25, 26, 27, 28, 35, 38, 40], "detailed_tip": 20, "detect": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 40], "detect_and_pars": 12, "detector": [2, 12], "detector_class": 12, "detectormap": 12, "determin": [2, 19, 40], "determinist": 13, "deterministiclogitprocessor": 10, "detoken": 10, "dev": [13, 16, 18, 24, 25, 28, 41], "devel": 18, "develop": [5, 6, 25, 26, 27, 41], "deviat": 13, "devic": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 18, 20, 24, 28, 35, 41], "device_map": 9, "devkit": 35, "devtool": 25, "diagnos": [5, 28], "diagnosi": 5, "dict": [2, 10, 12, 15], "dictionari": 2, "didn": 2, "diet": 20, "differ": [0, 3, 4, 5, 8, 13, 16, 20, 22, 24, 27, 28, 29, 33, 38, 40], "dir": 28, "direct": [5, 40], "directli": [0, 5, 9, 13, 14, 15, 20, 26, 40], "directori": [3, 13, 16, 33, 38], "disabl": [4, 7, 8, 9, 12, 13, 16, 20, 25, 27, 29], "disable_cuda_graph": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "disable_cuda_graph_pad": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "disable_custom_all_reduc": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "disable_mla": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "disable_outlines_disk_cach": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "disable_overlap_schedul": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "disable_radix_cach": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "disaggregation_bootstrap_port": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "disaggregation_mod": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "discourag": 10, "discov": 5, "discrep": 22, "discuss": [13, 26, 27], "discussion_r1950153599": 12, "diseas": 5, "disk": [13, 33], "dismal": 5, "dispatch": 29, "displai": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 24], "dist": [13, 27, 28, 34], "dist_init_addr": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "dist_timeout": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "distil": [12, 35], "distrib_releas": 25, "distribut": [2, 4, 6, 7, 8, 10, 11, 12, 14, 15, 20, 28, 40, 41], "distro": 24, "divers": [5, 6, 10, 14], "dlami": 16, "dn": 20, "dnspolici": 28, "do": [0, 3, 5, 6, 8, 9, 18, 20, 25, 26, 27, 34, 38, 41], "doc": [10, 16, 19, 24, 25, 35, 41], "docker": [25, 27, 28, 33, 35, 37], "dockerfil": [24, 41], "dockerx": [24, 41], "document": [1, 3, 5, 13, 24, 27, 28, 34, 35, 41], "doe": [3, 13, 16, 25, 38], "doesn": 28, "dome": 6, "don": [10, 13, 14, 16, 24, 26, 37], "donald": 19, "done": [10, 11, 13, 18, 34], "down": [13, 19], "downcast": [4, 7], "download": [9, 10, 14, 16, 22, 25], "download_data": 22, "download_dir": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "dp": [13, 27, 40], "dp_size": [2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "dpkg": 25, "dr": 5, "draft": [13, 14, 27], "dri": [18, 24, 41], "drink": 20, "driver": 28, "drop": 40, "drun": [24, 41], "dry": 20, "ds_channel_config_path": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "ds_heavy_channel_num": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "ds_heavy_channel_typ": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "ds_heavy_token_num": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "ds_sparse_decode_threshold": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "dshm": 28, "dtype": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 27, 35], "dublin": 15, "duck": 19, "due": [0, 3, 4, 19, 25, 35, 39], "dummi": [10, 25], "dump": [6, 7, 10, 15], "durat": [20, 25], "dure": [3, 4, 5, 6, 9, 10, 13, 14, 39, 40], "dusti": 35, "duti": 5, "dv": 16, "dye": 5, "dynam": [0, 9, 14, 25, 29], "e": [0, 2, 6, 12, 13, 14, 15, 16, 18, 24, 25, 26, 34, 37, 38, 41], "e2e_lat": [4, 11, 15], "e2e_request_latency_second": 37, "e2e_request_latency_seconds_bucket": 37, "e2e_request_latency_seconds_count": 37, "e2e_request_latency_seconds_sum": 37, "e4m3": 27, "e5": [7, 21], "each": [5, 10, 13, 22, 27, 28, 34, 40], "eager": 5, "eagl": [13, 27], "eagle2": 14, "eagle3": 14, "eagle_work": 14, "earli": [3, 5], "earlier": 19, "easi": [20, 21, 38, 39], "easier": 16, "easili": 5, "eater": 20, "ebnf_grammar": 15, "ecc": 13, "echo": [18, 24, 25, 34], "economi": 5, "ecosystem": 5, "edit": 18, "educ": 5, "eec95a79e569": 6, "effect": [5, 14, 28], "effici": [5, 10, 14, 21, 27, 35], "eiffel": [4, 5], "eight": 28, "either": [13, 41], "elabor": 9, "elaps": 28, "elect": 5, "elector": 5, "element": [6, 13], "eleutherai": 19, "elif": 20, "elimin": 40, "els": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "embed": [6, 11, 13, 21], "embedding_process": [4, 7], "empir": 6, "emploi": [10, 14], "emptydir": 28, "en": 9, "enabl": [0, 2, 5, 6, 7, 8, 9, 10, 12, 13, 14, 16, 24, 27, 29, 35, 37, 41], "enable_cache_report": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_custom_logit_processor": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "enable_deepep_mo": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_double_spars": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_dp_attent": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_ep_mo": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_flashinfer_mla": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_flashmla": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "enable_hierarchical_cach": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "enable_memory_sav": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "enable_metr": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_mixed_chunk": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_nan_detect": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_nccl_nvl": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "enable_p2p_check": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "enable_torch_compil": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "encod": [7, 10, 11, 13], "encount": [13, 24, 27, 28, 41], "encourag": [6, 10, 26], "end": [2, 4, 5, 6, 7, 8, 10, 11, 12, 14, 15, 20, 28, 37, 38], "end_tag": 15, "endless": 6, "endoftext": 22, "endpoint": [4, 6, 11, 15, 20, 41], "enforc": 13, "engin": [0, 4, 6, 8, 9, 13, 14, 20, 21, 24, 25, 27, 29, 35], "england": [10, 15], "enhanc": [5, 14, 20, 27], "enjoi": [5, 20], "enlighten": 4, "enough": [3, 13], "ensur": [0, 14, 20, 24, 26, 27, 34, 35, 37, 38], "entir": [15, 24], "entryclass": 38, "entrypoint": 38, "enum": [2, 15], "enumer": [10, 20], "env": [24, 28, 33, 41], "env_fold": 34, "environ": [0, 2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 18, 20, 24, 26, 28, 33, 34, 41], "eo": [3, 10], "eos_token_id": 4, "ep": 13, "ep_siz": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "equal": 12, "equival": [11, 24, 29], "era": 5, "err": 34, "error": [0, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 26, 27, 28, 34, 40], "especi": [2, 3, 5, 16], "essenti": [5, 28], "establish": [6, 12], "etc": [2, 21, 24, 25], "eth": 28, "ethernet": 28, "europ": 5, "evalu": [4, 14], "even": [5, 19, 20], "event": [5, 25], "everi": [5, 6], "evict": 40, "eviction_interv": 40, "evolv": 5, "exampl": [2, 3, 4, 5, 6, 7, 13, 15, 16, 18, 19, 22, 25, 33, 34, 37, 38, 40, 41], "example_function_nam": 15, "example_imag": [8, 10, 20], "example_nam": 15, "example_valu": 15, "exaon": 38, "exce": [10, 13, 40], "exceed": 40, "excel": 26, "except": 6, "excit": 5, "exclud": 22, "exec": [0, 16, 25], "execut": [0, 5, 9, 14, 15, 25, 28, 41], "exercis": 20, "exist": [6, 38, 40], "exit": [8, 20, 34], "expand": [20, 38], "expans": [6, 14], "expect": [0, 2, 5], "experi": 5, "experiment": [13, 14], "expert": [5, 27], "explain": 5, "explan": [5, 35], "explicitli": [14, 15], "explor": [5, 6], "export": [3, 13, 16, 18, 24, 25, 27, 33, 41], "export_deepseek_nextn": 27, "expos": [25, 37], "express": [10, 20], "extend": [13, 14, 19, 38], "extens": [6, 21, 38], "extern": 21, "extra_bodi": [12, 15], "extract": [5, 12, 22], "ey": 20, "f": [0, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 24, 28, 37, 41], "f5717ff5": 6, "f7ff": 28, "f849a967a5fc48569f1fe52c8ac02692": 14, "f_": 14, "f_1": 14, "f_k": 14, "fabdb6a30b49f79a7aba0f2ad9df9b399473380f": 16, "face": [1, 6, 8, 9, 13], "facilit": [6, 27], "factor": [9, 14, 29], "factual": 5, "fahrenheit": [2, 15], "fail": [0, 4, 6, 8, 13, 19, 20, 26, 34, 40], "failur": [40, 41], "fair": 6, "fallback": 15, "fals": [2, 4, 6, 7, 8, 10, 11, 12, 14, 15, 20, 22], "famou": [4, 5], "far": [5, 10], "fashion": [4, 5, 11], "fast": 21, "faster": [21, 41], "fastest": 14, "fauna": 5, "favor": 3, "fcf": [2, 3, 4, 6, 7, 8, 11, 12, 14, 15, 20], "fe3469df28ee4e18a833a2d2ce5d2aa5": 11, "fe36": 28, "fe64": 28, "fe6e": 28, "fe73": 28, "fe80": 28, "feather": 20, "featur": [0, 9, 13, 14, 26, 27], "feder": 5, "feel": [22, 26], "felt": [5, 6], "femi": 5, "fetch": [2, 15, 25], "few": [5, 22, 24], "few_shot_exampl": 22, "few_shot_gsm8k": 22, "fi": 34, "fiction": [5, 6], "field": [5, 15], "fieldpath": 28, "fieldref": 28, "file": [0, 1, 6, 10, 13, 14, 25, 26, 28, 37, 38, 39], "file_respons": 6, "file_storage_path": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "fill": 20, "fillmor": 19, "final": [2, 6, 12, 14, 22, 29, 40], "final_respons": 2, "financ": 5, "find": [0, 2, 12, 15, 28, 32, 38, 40, 41], "finish": 27, "finish_reason": [2, 4, 6, 8, 11, 14, 15], "fire": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 24, 28], "first": [0, 2, 3, 4, 7, 8, 12, 13, 25, 26, 27, 34, 37, 40, 41], "first_answ": 20, "firstli": 25, "fit": [20, 41], "fix": [0, 26, 39, 41], "flag": [10, 16, 27, 37], "flase": 12, "flashinf": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 27, 35, 41], "flashinfer_mla_disable_rag": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "flexibl": [11, 21], "flexibli": 27, "flip": 14, "float": [10, 40], "float16": [4, 7, 14], "float32": [4, 7, 13], "flora": 5, "flow": [5, 21], "fluenci": 6, "flush": [5, 10, 11, 20], "flush_cach": 4, "fly": 9, "focu": 5, "focus": [5, 6], "folder": [0, 3, 18, 22, 25], "follow": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 25, 26, 27, 28, 34, 35, 37, 38, 39, 40, 41], "foo": 15, "food": 20, "forc": [5, 10, 24, 41], "forev": 18, "fork": [20, 25], "form": [10, 25], "format": [0, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 27], "forum": 6, "forward": [5, 13, 38], "forward_batch": 38, "fossil": 5, "found": [2, 4, 5, 13, 28, 35, 40], "four": [5, 27], "fp16": [13, 34], "fp8": [3, 9, 13, 21, 34, 41], "fp8_dynam": 9, "fp8_e5m2": [13, 27], "fp8_kernel": 9, "fp8dq": [9, 13], "fp8wo": [9, 13], "fr": [13, 14], "frac": 25, "fraction": [8, 13, 14, 20, 22, 28, 35, 39], "fragment": 2, "framework": [21, 26], "franc": [0, 4, 5, 10, 11, 14, 15, 19, 20, 40], "francisco": [2, 15], "free": [13, 22, 26], "freelanc": 5, "french": [4, 5], "freq_32768": 14, "frequenc": [10, 13], "frequency_penalti": [6, 10], "frequent": [3, 30], "from": [0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 22, 25, 27, 29, 30, 35, 40], "from_pretrain": [2, 4, 7, 9, 12, 15], "frontend": [1, 41], "fruit": 20, "full": [3, 14, 27, 28], "full_argu": 2, "full_text": 12, "fulli": [0, 6, 24, 26], "func_latency_second": 37, "func_latency_seconds_bucket": 37, "func_latency_seconds_count": 37, "func_latency_seconds_sum": 37, "funcion": 2, "function": [14, 15, 19, 20, 21, 22, 37, 38], "function_cal": [2, 6, 11, 14], "function_call_input": 2, "function_call_pars": 2, "function_call_respons": 2, "function_call_response_json": 2, "function_dict": 2, "function_nam": 15, "functioncallpars": 2, "functool": 38, "further": [5, 14, 41], "furthermor": 14, "futur": [4, 5, 12, 13, 14, 38], "futurewarn": 14, "fx": 3, "g": [0, 2, 8, 12, 13, 15, 16, 18, 25, 26, 37, 38, 41], "gaug": 37, "gaull": 5, "gaze": 6, "gb": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28], "gemm": 13, "gemma": [8, 21, 38], "gemma2forsequenceclassif": 38, "gemma3": [8, 38], "gen": [2, 3, 4, 6, 8, 11, 12, 15, 19, 20, 22], "gen_data": [2, 12], "gen_respons": [2, 12], "gen_throughput": 37, "gen_url": [2, 12], "gener": [2, 3, 6, 8, 12, 13, 14, 15, 21, 24, 25, 28, 37, 40], "generate_request": 37, "generated_text": [2, 12], "generation_tokens_tot": 37, "geographi": 15, "geologist": 5, "germani": [15, 20], "get": [2, 6, 7, 8, 11, 12, 13, 14, 15, 20, 25, 28, 38, 41], "get_answer_valu": 22, "get_current_d": 15, "get_current_weath": [2, 15], "get_max_total_num_token": 4, "get_memory_pool_s": 4, "get_messag": [2, 15], "get_model_info": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "get_one_exampl": 22, "get_server_arg": 4, "get_server_info": 4, "ggml": [10, 15], "gguf": 9, "gid": 28, "git": [18, 24, 26, 35, 41], "github": [8, 10, 12, 14, 20, 22, 24, 26, 35, 39, 41], "githubusercont": 8, "give": [15, 18, 20, 38], "given": [2, 10, 14, 15, 22, 27, 40], "glaciat": 5, "glm": 38, "global": [5, 13, 15], "gloo_socket_ifnam": 28, "gme": 38, "gnupg": 25, "go": [24, 37], "goal": 20, "good": [3, 5, 24, 26], "googl": [8, 20], "govern": [5, 6], "gptq": [9, 21], "gptq_marlin": 9, "gpu": [0, 3, 13, 14, 16, 18, 24, 27, 28, 33, 34, 40, 41], "gpu_id_step": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "gradual": 20, "grain": 20, "grammar": [10, 13, 15, 34], "grammar_backend": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "granit": 38, "graph": [3, 9, 10, 13, 14, 16, 25, 27, 28], "gre": 34, "great": [5, 13], "greater": 14, "greedi": 10, "greedy_token_select": 19, "greet": 10, "grep": [22, 28], "grok": 38, "group": [18, 24, 27, 35, 41], "group_siz": [9, 13], "grow": [5, 10], "grub_cmdline_linux": 24, "gryffindor": 20, "gsm8k": [16, 22], "gt": [2, 6, 20], "gte": [4, 7, 21, 38], "guarante": [10, 15], "guard": 9, "guid": [2, 10, 11, 13, 15, 22, 24, 25, 27, 28, 30, 41], "guidanc": [21, 41], "guidelin": 22, "gz": [9, 16], "h": 11, "h100": [14, 27, 41], "h20": [27, 28], "ha": [3, 5, 9, 25, 27, 28, 38, 40], "had": 6, "half": [20, 35], "hand": 3, "handl": [10, 11, 12, 25, 27, 28, 38], "hang": 20, "happen": [3, 6, 39], "happi": 26, "har": 22, "hard": 28, "hardwar": [21, 24], "harm": 16, "harri": 20, "hasattr": 6, "hash": 38, "hasn": 28, "have": [3, 5, 8, 12, 13, 14, 15, 19, 24, 26, 27, 28, 29, 37], "he": 5, "head": [5, 13, 25, 34, 38], "head_nod": 34, "health": 20, "health_gener": 4, "healthcar": 5, "healthi": [3, 20], "heavi": 13, "hei": 10, "hello": [5, 10], "help": [0, 2, 3, 5, 6, 10, 13, 15, 20, 24, 26, 37, 38, 39, 40], "her": 6, "here": [4, 5, 6, 9, 10, 11, 13, 14, 15, 20, 22, 24, 27, 28, 37, 38], "heritag": 5, "hf": [1, 9, 14, 38], "hf_home": 18, "hf_token": [18, 24, 41], "hf_xxx": 18, "hi": [10, 34], "hicache_ratio": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "hidden": [5, 10, 14], "high": [2, 3, 5, 6, 9, 10, 13, 14, 19, 20, 27, 35], "higher": [6, 10, 14], "highest": [10, 19, 40], "highest_token_prob": 10, "highli": [10, 13], "highlight": [2, 4, 6, 7, 8, 11, 12, 15, 20], "hike": 5, "histogram": 37, "histor": 5, "histori": [2, 4, 5, 11, 40], "historian": 6, "hit": [10, 28, 37], "holm": 20, "home": [4, 5, 24, 41], "hood": [29, 40], "horizon": 6, "host": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 24, 28, 33, 35, 37, 40, 41], "hostipc": 28, "hostnam": [13, 34], "hostnetwork": 28, "hostpath": 28, "hot_token_id": 14, "hour": 27, "hous": [5, 20], "household": 20, "how": [5, 9, 13, 14, 19, 22, 24, 26, 34], "howev": [20, 28], "html": 25, "http": [0, 2, 4, 5, 6, 7, 8, 10, 11, 12, 14, 15, 16, 17, 18, 20, 22, 24, 25, 26, 28, 35, 37, 40, 41], "http_server": 38, "hub": [5, 24, 41], "hufflepuff": 20, "hug": [1, 6, 8, 9, 13], "huge": 27, "huggingfac": [13, 16, 18, 24, 27, 33, 38, 41], "humor": 20, "hydrat": 20, "hyperparamet": [13, 21], "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 24, 25, 27, 28, 29, 34, 35, 37, 38, 39, 40, 41], "ib": 28, "ibdev2netdev": 28, "ibm": 38, "ibstatu": 28, "ibv_devic": 28, "ibv_devinfo": 28, "icon": [6, 8], "id": [2, 4, 6, 8, 10, 11, 12, 13, 14, 15, 25, 40], "idea": 26, "ident": [24, 38], "identifi": 12, "idl": 27, "ignor": [2, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 20], "ignore_eo": 10, "im_end": [1, 10, 20, 22], "im_start": [1, 10, 20], "imag": [5, 10, 13, 20, 24, 28, 38, 41], "image_byt": 20, "image_data": 10, "image_fil": 20, "image_id": 41, "image_nam": 35, "image_processing_llama": 41, "image_processor": 38, "image_qa": 20, "image_url": [8, 20], "imageprocessor": 38, "imbal": 40, "imbalanc": 40, "imit": 38, "immers": 5, "impact": 5, "implement": [6, 12, 14, 19, 22, 27, 29], "implicitli": 14, "import": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 20, 25, 37, 38, 40, 41], "import_model_class": 38, "import_new_model_class": 38, "importerror": 41, "impress": 6, "improv": [0, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 27], "inaccur": 25, "includ": [2, 4, 5, 6, 9, 12, 13, 15, 20, 21, 22, 27], "incorpor": [14, 20], "incorrect": [14, 19], "increas": [3, 5, 13, 14, 20, 27], "increasingli": 5, "incred": 5, "increment": 12, "incur": 19, "independ": [27, 40, 41], "indetermin": 29, "index": [0, 2, 4, 6, 8, 11, 14, 25, 28], "indic": [3, 13, 28], "inductor_root_cach": 3, "industri": [5, 21], "inf": [10, 37], "infer": [0, 9, 24, 27, 28], "infiniband": 28, "influenc": 6, "info": [2, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28, 34], "inform": [2, 4, 5, 10, 15, 20], "infra": 41, "inherit": [2, 38], "init": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28, 34], "initi": [6, 7, 8, 10, 12, 13, 19, 20, 29, 34], "initialdelaysecond": 28, "innov": [5, 27], "input": [2, 4, 6, 10, 12, 13, 14, 16, 20, 21, 24, 25, 27, 37, 38, 41], "input_file_id": 6, "input_file_path": 6, "input_id": [2, 4, 7, 10], "input_ids_embed": 7, "input_text": 4, "input_token": 4, "insid": [18, 25], "inspect": 25, "instabl": 0, "instal": [9, 11, 17, 18, 25, 28], "installationguid": 25, "instanc": [0, 9, 19, 37], "instantli": 25, "instead": [10, 13, 14, 39, 40, 41], "instinct": [24, 41], "instruct": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 20, 22, 24, 25, 26, 28, 33, 34, 35, 37, 38, 40, 41], "int": [5, 10, 13, 15], "int4": [9, 21, 35], "int4wo": [9, 13, 35], "int8": [9, 13, 27], "int8_kernel": 9, "int8dq": [9, 13], "int8wo": [9, 13], "integ": [10, 15, 40], "integr": [4, 5, 21, 28, 38], "intellig": [5, 6], "intens": 20, "interact": [5, 21], "interest": [5, 26], "interfac": [21, 38], "interleav": 8, "intermedi": 13, "intern": [5, 13, 16, 28], "internlm": 38, "internlm2": 38, "internlm2forrewardmodel": 38, "interpret": [2, 5, 6, 12], "intersect": 5, "interv": [13, 40], "intfloat": 7, "introduc": [4, 27, 29], "introduct": [5, 26], "introvert": 5, "intuit": 21, "investig": [5, 29], "invok": [2, 9], "involv": [5, 12, 20, 27], "io": [5, 28], "io_struct": 2, "iommu": 24, "ip": [20, 34], "ipc": [16, 24, 33, 41], "ipynb": 0, "ireland": 15, "iron": 8, "is_embed": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "is_gener": 4, "is_in_ci": [0, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "is_multimodal_model": 38, "is_valid_list_of_imag": 41, "isn": 28, "isol": [9, 28], "issu": [9, 13, 14, 20, 22, 24, 26, 29, 39, 41], "itali": [6, 15, 20], "itd": 16, "iter": 4, "iter_lin": [10, 11], "its": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 19, 20, 25, 27, 40], "j": 34, "j_master": 34, "j_node": 34, "jamesliu1": 14, "janu": 38, "japan": [6, 11, 14, 20], "jetpack": 35, "jetson": 31, "job": [6, 34], "jog": 20, "join": 2, "joke": 6, "json": [2, 4, 7, 8, 9, 11, 12, 13, 20, 25, 37, 40], "json_model_override_arg": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "json_output": 20, "json_schema": [10, 15], "jsonl": [6, 22], "jul": 15, "julian": 6, "jupyt": 0, "just": [1, 6, 18, 41], "k": [2, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 20, 22], "k8": [28, 41], "kaida": 5, "keep": [0, 6, 13, 16, 34], "kei": [4, 10, 13, 15, 20, 24, 25, 27, 38], "kernel": [9, 22, 24, 25, 27, 29, 39, 41], "kfd": [18, 24, 41], "kill": [0, 13, 25], "kind": 28, "kingdom": 20, "kit": 35, "know": 2, "knowledg": [5, 6, 15], "known": [4, 5], "kubectl": [28, 41], "kubernet": 23, "kv": [2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 25, 27, 39, 41], "kv_cache_dtyp": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "l": [8, 10, 34], "l4": 41, "l40": [27, 41], "lab": [8, 10, 38], "label": [18, 22, 26, 28, 37], "landmark": [4, 5], "lang": [8, 10, 20], "languag": [1, 6, 8, 11, 21, 41], "larg": [3, 8, 21, 25, 27, 41], "larger": [0, 10, 13, 27], "largest": 5, "last": [12, 14, 18, 41], "last_gen_throughput": 4, "late": 6, "latenc": [25, 27, 37], "latent": 13, "later": [2, 18, 19, 35], "latest": [28, 33, 41], "launch": [0, 1, 3, 5, 9, 10, 15, 22, 24, 25, 33, 34, 35, 38, 41], "launch_rout": 40, "launch_serv": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 22, 24, 25, 27, 28, 33, 34, 35, 37, 38, 40, 41], "launch_server_cmd": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "law": [6, 27], "layer": [2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 25, 27, 29, 38], "layer_id": 38, "le": 37, "lead": [0, 8, 10, 13, 14, 29], "leader": 28, "leadertempl": 28, "leaderworkerset": 28, "leaderworkertempl": 28, "leaf": 40, "lean": 20, "learn": [5, 6, 13, 24, 30, 38], "least": [10, 28, 40], "left": 8, "legisl": 5, "len": [10, 11, 22, 25], "length": [6, 10, 11, 13, 20, 25, 35], "less": [6, 25], "let": [2, 12, 13, 20], "letter": [2, 15], "level": [6, 10, 13, 24, 25], "leverag": 24, "lid": 28, "light": [4, 6], "like": [2, 3, 4, 5, 6, 9, 12, 13, 22, 27, 28, 40], "lilith": 5, "limit": [4, 5, 9, 13, 14, 19, 20, 25, 27, 28, 35], "line": [5, 6, 9, 15, 22], "linear": 9, "linearli": 10, "link": [28, 41], "link_lay": 28, "link_up": 28, "linkup": 28, "lint": [0, 26], "linux": 18, "linux64cli": 16, "list": [2, 6, 8, 10, 11, 12, 13, 14, 20, 25, 27, 38, 39], "live": 5, "ll": [2, 5, 12, 20, 28], "llama": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 21, 24, 25, 35, 37, 38, 40, 41], "llama2": 14, "llama3": [2, 8, 14, 38], "llama_3_vis": 8, "llamaembeddingmodel": 38, "llamaforcausallm": [4, 6, 11, 14, 15], "llamaforcausallmeagl": 14, "llamaforcausallmeagle3": 14, "llamaforsequenceclassif": [4, 38], "llava": [8, 10, 21, 38], "llava_llama_3": [8, 38], "llguidanc": 15, "llm": [0, 2, 5, 12, 14, 15, 19, 21, 24, 38, 41], "llmcompressor": 9, "lm": 22, "lm_head": [9, 14], "lmhead": 14, "lmm": [8, 10, 38], "lmsy": [14, 27, 34], "lmsysorg": [16, 18, 33, 41], "load": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 27], "load_balance_method": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "load_dataset": 9, "load_format": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "load_imag": [10, 20], "local": [0, 5, 6, 7, 8, 13, 20, 26, 38, 41], "localhost": [0, 2, 4, 6, 7, 8, 10, 11, 12, 14, 15, 20, 37, 40], "locat": [0, 2, 5, 13, 15, 20], "log": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 20, 28], "log_level": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "log_level_http": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "log_request": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "log_requests_level": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "logic": 2, "logit": [13, 38], "logitsprocessor": 38, "logo": 8, "logprob": [2, 6, 8, 10, 11, 14, 19], "logprob_start_len": 10, "london": [15, 19, 20], "long": [5, 6, 13, 25, 27], "longer": [6, 14, 19], "longest": 3, "look": [1, 3, 5, 15, 26], "loop": 20, "lora": 10, "lora_backend": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "lora_path": [2, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 20], "loss": 9, "louvr": [4, 5], "love": [5, 11], "low": [10, 13, 14, 25], "lower": [3, 6, 9, 12, 26], "lpm": 3, "lru": 40, "lru_cach": 38, "lsb": 25, "lspci": 28, "lt": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "lw": 28, "lws_group_siz": 28, "lws_leader_address": 28, "lws_worker_index": 28, "m": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 22, 24, 25, 27, 28, 33, 34, 35, 37, 38, 40, 41], "ma": 2, "machin": [3, 5, 24, 41], "madrid2": 20, "magic": 20, "mai": [13, 14, 16, 20, 22, 24, 25, 28, 29, 33, 39], "main": [0, 5, 8, 10, 14, 20, 26, 40], "mainli": [4, 13], "maintain": [26, 38, 40], "mainten": 40, "major": [5, 6, 15, 26, 28, 38], "make": [0, 3, 5, 6, 13, 15, 21, 22, 25, 26, 28, 38], "malici": 14, "man": [8, 20], "manag": [0, 2, 13], "mani": [3, 4, 5, 13, 19, 29, 38], "manner": [10, 14], "manual": [0, 13, 25, 26, 33, 37], "map": [14, 16], "markdown": 0, "marlin": 9, "marn": 5, "mask": 10, "massachusett": 2, "match": [3, 4, 10, 15, 22, 40], "matched_stop": [2, 6, 8, 11, 14], "materi": [5, 32], "math": [20, 22, 38], "math_ev": 22, "mathemat": 29, "matrix": 27, "max": [13, 14, 27, 28, 39], "max_check": 6, "max_load": 40, "max_loras_per_batch": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "max_new_token": [2, 3, 4, 10, 11, 12, 15], "max_prefill_token": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28], "max_req_input_len": 4, "max_running_request": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28], "max_seq_len": 13, "max_token": [2, 6, 8, 11, 14, 15, 20, 22], "max_total_num_token": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "max_total_retri": 40, "max_total_token": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "max_tree_s": 40, "max_worker_retri": 40, "maxim": 14, "maximum": [9, 10, 13, 14, 40], "mcdse": 21, "md": [14, 26], "me": [5, 6, 15, 20], "mean": [2, 3, 5, 15, 24, 27], "measur": [10, 36], "mechan": [9, 27], "media": 5, "medic": 5, "medicin": 5, "medium": 28, "meet": [5, 13], "mellanox": 28, "mem": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 22, 25, 28, 34, 35, 39], "mem_fraction_stat": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "memori": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 22, 25, 27, 28, 35, 40], "mention": [6, 8], "merg": 4, "merged_output": 5, "messag": [4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "met": 40, "meta": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 24, 25, 34, 35, 37, 40, 41], "meta_info": [4, 11, 15], "metadata": [13, 28], "method": [6, 9, 21, 27, 38], "metric": [13, 30], "metropolitan": 5, "mi": 41, "mi300x": [24, 27], "microsoft": 16, "mid": 14, "middl": 8, "might": [3, 10, 13], "mild": 6, "mile": 6, "militari": 6, "millard": 19, "million": 5, "min": 10, "min_load": 40, "min_new_token": 10, "min_p": 10, "min_seq_len": 13, "mind": 26, "minicpm": [8, 38], "minicpmv": [8, 38], "minim": 14, "minimum": [9, 27, 40], "ministri": 20, "minor": 20, "mislead": 19, "miss": 1, "mistral": [2, 7, 21, 38], "mistralai": 2, "mix": [5, 13], "mixtral": 38, "mixtur": 27, "ml": 28, "mla": 13, "mllama": 8, "mllamaforconditionalgener": 8, "mlx5_bond_0": 28, "mlx5_bond_1": 28, "mlx5_bond_2": 28, "mlx5_bond_3": 28, "mmlu": 22, "mmmu": 38, "mnt": 16, "mobil": 5, "modal": [13, 21, 38], "mode": [2, 3, 5, 6, 13, 14, 28, 29, 35, 40], "model": [1, 3, 6, 7, 8, 10, 11, 14, 15, 16, 18, 19, 20, 21, 24, 25, 27, 28, 30, 34, 35, 36, 37, 40, 41], "model_arch_name_to_cl": 38, "model_class": 38, "model_config": 38, "model_dump_json": 15, "model_id": 9, "model_json_schema": 15, "model_nam": [2, 12, 37, 38], "model_path": [0, 2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 34], "model_typ": 12, "model_valid": 15, "model_validate_json": 15, "modelcloud": 9, "modelregistri": 38, "modelscop": 30, "moder": 6, "modern": 6, "modif": [22, 28], "modifi": [9, 16, 25, 28, 38, 41], "modul": [13, 14], "moe": [13, 27, 38], "monitor": [6, 37], "mood": 5, "more": [4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 20, 21, 22, 25, 26, 27, 28, 29, 30, 38, 41], "most": [0, 1, 2, 3, 4, 5, 20, 28, 38, 40], "mostli": 29, "mount": 18, "mountain": 5, "mountpath": 28, "move": [5, 40], "mt43244": 28, "mtp": 27, "much": 5, "muggl": 20, "multi": [21, 28, 38, 40, 41], "multi_turn_qa": 20, "multifacet": 5, "multiformatpars": 2, "multimod": [8, 20, 38], "multipl": [0, 6, 10, 13, 14, 20, 27, 34, 38, 40], "museum": [4, 5], "must": [9, 10, 11, 12, 15], "my": [0, 5, 26], "my_model": 1, "my_model_templ": 1, "myself": 5, "n": [2, 5, 6, 10, 11, 14, 15, 20, 34], "n1": [6, 11, 14], "n2": [6, 11, 14], "n3": [6, 11, 14], "naiv": 13, "name": [0, 1, 2, 4, 5, 9, 10, 15, 16, 18, 19, 20, 24, 25, 28, 37, 41], "name_non_stream": 2, "namespac": 37, "nan": 13, "nativ": [10, 21, 40], "natur": [5, 6], "nbecaus": 6, "nbstripout": 0, "nc": 34, "ncapit": 14, "nccl": [27, 28, 34], "nccl_debug": 28, "nccl_ib_gid_index": [16, 24, 28], "nccl_init_addr": 34, "nccl_socket_ifnam": 28, "ndescrib": 10, "ndr": 28, "ndv5": 24, "nearli": [6, 24], "necessari": [20, 26, 28, 41], "need": [1, 2, 5, 8, 9, 12, 13, 16, 18, 20, 22, 24, 25, 28, 33, 38, 40, 41], "neg": 10, "nemo": [2, 38], "netdev": 28, "network": [4, 6, 16, 22, 24, 28, 35, 41], "neural": 4, "neuralmag": 9, "neutral": 5, "new": [0, 3, 4, 5, 6, 7, 8, 10, 11, 13, 14, 15, 17, 18, 20, 21, 22, 26, 28, 37, 40], "new_token_ratio": [3, 6], "new_york": 15, "newlin": 13, "newmodeldetector": 2, "next": [8, 10, 12, 14, 27, 38, 40], "nextn": 27, "ngener": [5, 15], "nic": 28, "nice": 5, "nlist": 6, "nlocat": 4, "nlp": [4, 7, 38], "nn": 28, "nnode": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28, 34], "no_stop_trim": 10, "node": [4, 14, 21, 25, 28, 40, 41], "node1": 28, "node_rank": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "non": 20, "nondeterminist": 29, "none": [2, 4, 6, 7, 8, 10, 11, 12, 14, 15, 20], "normal": [2, 12], "normal_text": [2, 12], "north": 5, "northern": 5, "northwest": 5, "note": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 24, 25, 26, 27, 33, 34, 38, 40], "notebook": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "notic": [28, 29], "notr": [4, 5], "nousresearch": 24, "novel": [5, 6], "now": [4, 14, 20, 24], "npari": 4, "nprompt": 5, "npython": 6, "nreason": 2, "nsub": 22, "nsy": [16, 25], "ntask": 34, "nthe": [4, 6], "null": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 41], "num": [14, 16, 24, 25, 27, 37], "num_continuous_decode_step": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "num_hidden_lay": 25, "num_key_value_head": 25, "num_paused_request": 4, "num_quest": 22, "num_queue_req": 37, "num_running_req": 37, "num_shot": 22, "num_used_token": 37, "numa_balanc": 24, "number": [3, 10, 12, 13, 25, 27, 37, 40], "numer": [4, 27, 29], "nutrient": 20, "nv": 35, "nvidia": [9, 18, 25, 28, 31], "nvme": 16, "nvpmodel": 35, "nvtx": 25, "ny": 15, "nyou": 10, "o": [5, 6, 7, 10, 15, 16, 18, 20, 25, 34], "oak": 20, "object": [2, 6, 8, 10, 11, 14, 15], "obtain": [2, 14, 19], "occasion": 3, "occup": [20, 25], "occur": [10, 13, 27], "ofed_info": 28, "off": [8, 13, 20, 24, 27], "offer": [10, 21, 40], "offic": 5, "offici": [1, 3, 13, 14, 24, 26, 27], "offlin": [8, 21, 25], "offload": 13, "often": [4, 5, 13], "ok": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "okai": 3, "old": 5, "older": 28, "olmo": 38, "omit": 19, "onc": [3, 4, 7, 8, 19, 28], "one": [2, 4, 5, 6, 8, 10, 12, 13, 15, 19, 27, 29, 41], "oneshot": 9, "onevis": [8, 10, 38], "onli": [2, 4, 6, 8, 10, 12, 13, 14, 15, 19, 20, 24, 25, 28, 29, 35, 38, 41], "onlin": [4, 25, 27, 34], "only_run": 38, "onto": 13, "oom": [3, 39], "op": 22, "open": [0, 5, 6, 14, 21, 22, 24, 25, 26, 41], "openai": [1, 4, 10, 13, 14, 19, 20, 21, 38, 40, 41], "openai_api_kei": 18, "openbmb": 8, "opencompass": 22, "oper": [2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 27, 41], "operand": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "opportun": 5, "opt": [9, 16, 24, 41], "optim": [0, 9, 14, 20, 22, 24, 40], "option": [2, 12, 14, 19, 41], "orang": 8, "order": [13, 20], "org": 34, "organ": 6, "origin": [2, 4, 6, 7, 8, 9, 11, 12, 14, 15, 20, 27], "orin": 31, "orion": 6, "orli": 5, "oserror": 41, "other": [3, 5, 6, 8, 19, 20, 22, 24, 34, 38, 41], "otherwis": [8, 13, 25], "ottawa3": 20, "our": [0, 5, 9, 10, 12, 13, 14, 22, 26, 29], "out": [5, 6, 13, 14, 20, 25, 26, 33, 34, 41], "outcom": 5, "outdoor": 20, "outlin": [10, 13, 15, 27], "output": [0, 2, 4, 5, 7, 8, 11, 12, 13, 14, 16, 20, 21, 24, 25, 27, 28, 29, 34, 37, 38, 41], "output_file_id": 6, "output_id": 4, "output_text": 4, "output_token": 4, "outsid": 5, "ov": [8, 10, 38], "over": [0, 5, 6, 9, 13], "overal": [20, 27], "overflow": 40, "overhead": [3, 5, 13, 14, 21, 40], "overlap": [4, 5, 7, 8, 13, 19, 20, 25], "overrid": [1, 5, 13, 25], "overridden": 13, "overse": 5, "overview": [15, 26], "own": [34, 41], "p": [10, 24, 33, 37, 41], "p2p": 13, "p_": 14, "pace": 5, "pacif": 5, "packag": [40, 41], "pad": [13, 29], "pad_input_id": 38, "page": [21, 39], "page_s": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "pai": 28, "pairwis": 4, "paleoecologi": 5, "pantheon": 6, "paper": 14, "paragraph": 20, "parallel": [0, 2, 3, 4, 6, 7, 8, 11, 12, 14, 15, 16, 21, 28], "param_dict": 10, "paramet": [2, 3, 4, 9, 11, 13, 14, 15, 21, 27, 34, 39], "pari": [0, 4, 5, 10, 11, 14, 15, 19, 20], "paris2": 20, "park": [5, 8], "pars": [2, 9, 12], "parse_function_cal": 2, "parse_non_stream": [2, 12], "parse_stream_chunk": 12, "parse_streaming_incr": 12, "parse_url": [2, 12], "parser": [2, 21], "part": [5, 27, 38], "partial": 2, "particular": 2, "partit": [27, 34], "partli": 2, "pass": [0, 8, 13, 14, 26, 38], "passion": 5, "patch": [0, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "path": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 20, 22, 24, 25, 27, 28, 33, 34, 35, 37, 38, 40, 41], "patient": 5, "patronu": 20, "pattern": [10, 15, 28], "payload": 15, "pci": 24, "peer": 13, "penalti": 6, "pend": 40, "peopl": 5, "per": [9, 10, 14, 27, 29, 34, 37, 40], "per_row": [9, 13], "per_tensor": [9, 13], "perfer": 13, "perfetto": 25, "perform": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 19, 20, 21, 24, 27, 28, 35], "period": [5, 40], "periodsecond": 28, "permiss": 26, "person": 5, "pervas": 5, "phase": [13, 14, 27], "phi": 38, "phoenix": 20, "phrase": 6, "phy": 28, "physical_st": 28, "pick": 26, "pickl": 14, "pid": [8, 20], "pil": 5, "pip": [0, 9, 17, 18, 24, 25, 40], "pip3": 26, "pipelin": [0, 14], "place": 8, "plai": 5, "plan": [5, 41], "plant": 5, "platform": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "playground": 38, "pleas": [4, 5, 8, 9, 10, 13, 14, 15, 20, 22, 24, 25, 26, 27, 28, 34, 35, 38, 39, 40, 41], "pleistocen": 5, "plenti": 20, "plu": 12, "plugin": [2, 28], "png": [8, 10, 20], "point": 5, "polici": [13, 14], "pool": [2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 39], "poorli": 19, "popul": [4, 5, 10, 15], "popular": [4, 6, 8, 9], "port": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 22, 24, 28, 33, 34, 37, 40, 41], "portion": [2, 29], "posit": 10, "possibl": [0, 5, 13, 14, 26], "post": [2, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 20, 28, 34, 40], "post1": [4, 18, 41], "potenti": [5, 14, 29, 40], "potter": 20, "power": [5, 24], "pr": [0, 13, 22, 26], "practic": 25, "pre": [0, 9], "precis": [9, 27], "predict": [5, 14, 19], "prefer": [0, 2, 5, 6], "prefil": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 21, 25, 27, 28, 37, 38, 39], "prefix": [3, 13, 21, 29, 40], "prepar": 28, "prerequisit": 25, "presenc": 20, "presence_penalti": [6, 10], "presid": [5, 19], "press": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "prev": [10, 11], "preval": 5, "prevent": [0, 16, 28, 40], "preview": [0, 37], "previou": 27, "primit": 19, "print": [2, 5, 6, 10, 11, 13, 20, 25, 40], "print_highlight": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "privileg": [16, 24, 28], "pro": 38, "probabl": [10, 14], "problem": [5, 12, 28, 41], "proc": 24, "proce": 2, "process": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 25, 26, 27, 28, 37, 38, 40], "processor": [20, 38], "product": 30, "profil": 36, "profile_log": 25, "profound": 5, "program": [6, 21], "programm": 6, "progress": [13, 29], "progress_bar": 20, "project": [1, 5, 8, 10, 12, 17, 18, 20, 24, 29, 32, 41], "prometheu": [13, 37], "promis": 5, "prompt": [2, 4, 5, 6, 10, 12, 13, 15, 21, 22, 24, 25, 37, 38], "prompt_token": [2, 4, 6, 8, 11, 14, 15], "prompt_tokens_detail": [2, 6, 8, 11, 14], "prompt_tokens_tot": 37, "proper": 28, "properli": [24, 28, 38], "properti": [2, 10, 15], "propos": 13, "protein": 20, "protocol": 28, "provid": [2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 20, 21, 24, 25, 26, 27, 34, 40, 41], "prowess": 6, "proxim": 20, "prss": 16, "pt": [14, 24], "ptq": 9, "pub": 25, "public_sglang_ci": 14, "pull": [0, 12, 18, 26], "pure": 20, "purpos": 6, "push": [0, 26], "put": [13, 15, 22], "pwd": 9, "py": [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 14, 15, 16, 17, 20, 22, 25, 27, 38, 41], "pydant": 15, "pypi": 41, "pyproject": [17, 41], "python": [0, 1, 2, 4, 5, 6, 10, 12, 13, 14, 15, 17, 20, 24, 25, 26, 33, 34, 35, 37, 38, 40, 41], "python3": [2, 3, 4, 8, 9, 10, 12, 14, 16, 18, 22, 24, 25, 27, 28, 33, 34, 37, 38, 41], "python_tag": 2, "pytorch": [3, 9, 13, 14, 29, 41], "q": 13, "qk": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "qp": 27, "qperf": 28, "quad": 12, "qualiti": [4, 9, 14, 15], "quant": 9, "quant_config": 9, "quant_path": 9, "quantiz": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 21, 27, 41], "quantization_param_path": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "quantizationmodifi": 9, "quantizeconfig": 9, "queri": [2, 15, 27, 38, 40], "queryy": 15, "question": [12, 16, 20, 22, 24, 26, 27, 30], "queue": [2, 3, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28, 37, 40], "quick": [11, 25, 41], "quickli": [0, 26], "quit": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "qwen": [2, 8, 12, 20, 21, 22, 33, 38], "qwen2": [2, 4, 7, 8, 10, 20, 22, 33, 38], "qwen25": 2, "qwen2_5_vlforconditionalgener": 20, "qwen2forcausallm": [2, 4, 7, 12, 20], "qwen2forrewardmodel": 38, "qwen2vl": 38, "qwenlm": 22, "qwq": [2, 12], "r": [0, 15, 20, 38], "r1": [9, 12, 24, 28, 35, 41], "radix": [4, 13, 14, 20, 25, 29, 40], "radix_attent": 13, "radixattent": [21, 38], "rag": 13, "rais": [4, 6, 12], "ram": 13, "random": [14, 24, 25, 29, 37], "random_se": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "randomli": 10, "rang": [3, 4, 5, 6, 9, 13, 14, 21, 22, 37], "rank": [13, 28, 34], "rapidli": 5, "rate": [14, 28, 37, 40], "rather": [3, 9, 13], "ratio": [37, 40], "ravenclaw": 20, "raw": [8, 10, 20, 40], "raw_tool": 2, "rb": 6, "rc": 25, "rc_rdma_write_bw": 28, "rdma": [16, 24], "re": [0, 26, 28], "reach": 5, "read": [5, 6, 24], "readi": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 24, 28, 34, 40], "readinessprob": 28, "readm": [22, 26], "real": [2, 15, 25], "realist": 20, "realloc": 24, "reason": [2, 6, 21, 22], "reasoning_cont": [2, 6, 8, 11, 12, 14], "reasoning_pars": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "reasoning_text": 12, "reasoningpars": 12, "reboot": 24, "rebuild": 0, "recaptur": 10, "recent": [5, 40], "recip": [5, 9], "recogn": [12, 27, 38], "recommend": [0, 9, 13, 14, 25, 26, 27, 41], "record": [4, 5], "recoveri": 41, "recreategrouponpodrestart": 28, "reduc": [0, 3, 5, 6, 8, 10, 13, 14, 20, 27, 39], "reduct": 27, "redund": 9, "refer": [3, 4, 6, 7, 8, 10, 11, 13, 22, 24, 26, 27, 28, 34, 38, 39, 41], "reference_hf": 38, "refus": [2, 6, 11, 14], "regex": [15, 20], "region": [5, 25], "regist": 25, "registri": 38, "regress": 9, "regular": [10, 14, 20], "regular_expression_gen": 20, "reinstal": [24, 41], "reject": 14, "rel": 40, "rel_threshold": 40, "relat": [1, 8, 14, 28, 41], "releas": [12, 14, 25, 41], "relev": [0, 14, 15, 40], "reli": 38, "reliabl": 0, "remain": 29, "rememb": [0, 7, 8], "remind": 15, "remot": [4, 5, 13, 16, 24, 27, 28, 38], "remov": [0, 5, 6, 10, 14, 16, 24, 38, 40], "remove_work": 40, "renderd176": 18, "renderd184": 18, "reorder": 27, "rep": 25, "repeat": [5, 10, 16], "repetit": 6, "repetition_penalti": 10, "replac": [5, 24, 34, 38, 40, 41], "repli": 15, "replica": 28, "repo": [13, 14, 22, 25, 26], "repons": 2, "report": [13, 22, 39], "repositori": [4, 24, 35], "repres": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "reproduc": 6, "req": [2, 3, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "req_0": 6, "req_1": 6, "request": [0, 4, 5, 6, 10, 13, 15, 20, 21, 22, 24, 25, 26, 27, 28, 29, 34, 37, 39, 40], "request_count": 6, "request_id": 6, "requir": [0, 2, 9, 10, 14, 15, 16, 22, 24, 27, 28, 41], "rerank": 14, "research": [5, 16], "reserv": 13, "resolv": [0, 26, 28], "resourc": [28, 35, 38, 41], "respect": [20, 27], "respons": [0, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 19, 40], "response1": 4, "response2": 4, "response_cont": 15, "response_data": 15, "response_format": 15, "response_json": 4, "response_non_stream": [2, 12], "response_stream": [2, 12], "rest": 12, "restart": [4, 18, 27, 28], "restartpolici": 28, "result": [6, 12, 13, 15, 19, 20, 22, 27], "result_cont": 6, "result_file_id": 6, "ret": 12, "reth0": 28, "reth2": 28, "reth4": 28, "reth6": 28, "retracted_req": [3, 6], "retri": 40, "retriev": [2, 6], "return": [2, 10, 12, 15, 20, 22, 38, 40], "return_hidden_st": 10, "return_logprob": [10, 15], "reus": 38, "rev": 28, "revis": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "revolut": 4, "revolution": 5, "reward": [11, 13, 21], "reward_process": 4, "rf": 41, "rich": [4, 5], "right": 8, "rise": 5, "risk": [5, 14], "river": 5, "rlhf": [4, 13], "rm": [18, 24, 35, 38, 41], "rmsnorm": 38, "road": 6, "robot": 5, "roce": [16, 24], "rock": 5, "rocm": [24, 41], "rocm630": [18, 41], "role": [2, 4, 5, 6, 8, 11, 12, 14, 15, 28], "roll": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 24, 28], "roman": 6, "romanc": 11, "romant": 5, "rome": [6, 15, 20], "root": [3, 10, 15, 16, 24, 25, 28, 29, 33, 41], "roughli": 29, "round": 20, "round_robin": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "router": [3, 13], "rst": 0, "run": [0, 2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 20, 22, 25, 28, 29, 33, 34, 37, 38, 39, 40], "run_batch": 20, "runner": 14, "runner_allow_runasroot": 18, "runtim": [0, 4, 9, 10, 21, 25, 35, 41], "runtimeendpoint": [19, 20], "safeailab": 14, "safetensor": [2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20], "sai": [6, 20], "same": [4, 9, 10, 20, 25, 29, 38, 40], "sampl": [4, 11, 13, 21, 38, 41], "sampler": 13, "sampling_backend": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "sampling_param": [2, 4, 5, 10, 11, 12, 15], "san": [2, 15], "save": [3, 4, 9, 13, 20, 37], "save_dir": 9, "save_pretrain": 9, "sbatch": 34, "scale": [9, 10, 27, 41], "scenario": [12, 27], "schedul": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20, 21], "schedule_conserv": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "schedule_polici": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "schema": [10, 15, 20], "schema_get_current_d": 15, "schema_get_current_weath": 15, "scheme": 9, "scienc": 6, "scientif": 6, "scontrol": 34, "script": [5, 24, 25, 27, 34, 35, 38], "sculptur": 5, "search": [15, 20, 27], "sec": 28, "seccomp": [24, 41], "second": [6, 8, 12, 27, 34, 37, 40], "second_answ": 20, "secret": [24, 41], "section": [10, 28, 39], "sector": 5, "secur": [14, 24, 25, 28, 41], "securitycontext": 28, "see": [3, 5, 10, 11, 13, 14, 15, 20, 24, 25, 27, 28, 37, 38, 39, 40, 41], "seed": [6, 29], "sein": 5, "select": [9, 10, 14, 22, 37, 41], "selector": 28, "self": [5, 6, 7, 8, 10, 12], "senat": 5, "send": [3, 8, 10, 15, 21, 24, 25, 29, 34, 40], "sens": 6, "sentenc": [8, 10], "sep": 1, "sep_styl": 1, "separ": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 27, 41], "separate_reason": 12, "separate_reasoning_data": 12, "separate_reasoning_response_json": 12, "seq": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "sequenc": [6, 10, 14, 25], "seri": [12, 20, 27], "serial": 14, "serv": [0, 3, 5, 21, 25, 27, 28, 34, 41], "served_model_nam": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "server": [0, 1, 3, 5, 9, 10, 14, 15, 21, 22, 24, 25, 27, 28, 29, 33, 34, 35, 37, 38, 40], "server_arg": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 38], "server_ip": 28, "server_process": [0, 2, 4, 6, 11, 12, 14, 15, 20], "serverarg": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "servic": [5, 6, 7, 8, 28, 41], "service_ti": [2, 6, 11, 14], "session": 25, "set": [1, 3, 5, 9, 10, 12, 13, 14, 16, 20, 25, 27, 28, 29, 33, 34, 35, 37, 41], "set_default_backend": 20, "setup": [13, 28, 41], "setup_rocm": 41, "sever": [5, 10, 25, 27, 38], "sgl": [0, 2, 5, 8, 9, 10, 12, 13, 15, 17, 18, 19, 20, 22, 24, 29, 32, 34, 41], "sgl_branch": 41, "sglang": [1, 3, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 40], "sglang_imag": 24, "sglang_is_in_ci": 18, "sglang_rout": [13, 40], "sglang_storag": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "sglang_torch_profiler_dir": 25, "sglang_use_modelscop": 33, "sglang_zhync": 16, "sh": [0, 17, 22, 24, 35], "shahizat": 35, "shape": [5, 10], "shard": [2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20], "share": [3, 5, 15, 16, 18, 25, 28], "sharegpt": 25, "she": [5, 6], "shell": [7, 8, 11, 25], "ship": [3, 6], "shirt": 8, "shm": [16, 18, 24, 28, 41], "shone": 6, "short": [5, 6, 10, 29], "shorter": 19, "shortest": 40, "shot": [16, 22], "should": [1, 2, 13, 22, 25, 27, 28, 37, 38], "show": [2, 8, 13, 22, 28, 34], "show_time_cost": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "showcas": [6, 34], "shown": 24, "shutdown": [0, 2, 5, 12, 15], "side": [22, 25], "sidewalk": [8, 20], "sig": 28, "sigmoid": 4, "signific": [5, 27], "significantli": 27, "siluandmul": 38, "similar": [4, 5, 6, 10, 38], "similarli": 29, "simpl": [9, 12, 20, 28], "simpli": [9, 15, 19], "simplifi": 25, "sinc": [2, 6, 27], "singl": [2, 6, 10, 13, 14, 24, 25, 27, 34, 38, 40, 41], "situat": 20, "size": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 16, 18, 20, 24, 25, 27, 28, 29, 34, 35, 38, 39, 40, 41], "sk": 18, "skew": 10, "skip": [3, 13], "skip_special_token": [2, 4, 10, 12], "skip_tokenizer_init": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "sky": [2, 41], "skyserv": 41, "skywork": [4, 21, 38], "slack": 26, "sleep": [6, 18, 34], "slide": [27, 32], "slight": 29, "slightli": [6, 29], "slow": [3, 13, 20], "slower": 29, "slurm_log": 34, "slurm_nodeid": 34, "slurm_nodelist": 34, "slurm_procid": 34, "slytherin": 20, "sm": 28, "sm75": 41, "small": [3, 5, 8, 13, 25, 26, 27, 29, 38], "smaller": [0, 8, 10, 13], "smallest": [10, 40], "smollm": 38, "smooth": [0, 6, 7, 8, 26], "smoother": 13, "smoothli": 25, "snippet": [2, 8, 25], "so": [0, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 27, 38, 41], "societi": 5, "socket": 28, "softwar": 5, "solut": [5, 12, 29, 41], "solv": 12, "some": [5, 9, 13, 18, 22, 25, 27, 28, 37, 38, 39], "sometim": [28, 39], "soon": 9, "sort": 10, "sourc": [14, 15, 21, 25, 34, 37, 38], "space": [6, 10, 13, 33, 40], "spaces_between_special_token": 10, "spain": 20, "spare": 5, "spars": 13, "sparseautomodelforcausallm": 9, "speak": 29, "spec": [13, 14, 28], "special": [5, 10], "specif": [2, 5, 10, 13, 20, 22, 24, 26, 27, 38, 41], "specifi": [1, 2, 6, 8, 9, 10, 12, 13, 14, 15, 18, 19, 20, 24, 27, 41], "specul": [5, 21, 27], "speculative_accept_threshold_acc": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "speculative_accept_threshold_singl": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "speculative_algorithm": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "speculative_draft_model_path": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "speculative_eagle_topk": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "speculative_num_draft_token": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "speculative_num_step": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "speculative_token_map": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "speed": [0, 13, 14, 27, 28], "speedup": 27, "split": [6, 9, 13], "srt": [4, 5, 6, 7, 8, 10, 11, 12, 14, 20, 38, 41], "srun": 34, "st": 41, "stabil": 27, "stabl": [6, 16, 24], "stablelm": 38, "stack": 25, "stag": 20, "stage": 13, "stai": [0, 13, 20], "stand": [3, 8], "standard": [5, 14, 28, 38], "star": 6, "start": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 20, 24, 25, 26, 27, 28, 33, 35, 37, 38], "start_tag": 15, "startswith": [10, 11], "startup": [2, 4, 6, 7, 8, 11, 12, 14, 15, 16, 20, 24, 27, 28], "starvat": 13, "state": [2, 5, 6, 10, 14, 15, 20, 28, 40], "statefulset": 41, "statement": 5, "static": [8, 13, 20, 22, 25, 28, 35, 39], "statist": 9, "statu": [4, 6, 15, 20, 28, 41], "status_cod": 6, "step": [3, 9, 12, 13, 14, 22, 26, 27, 28], "still": [6, 13, 20, 29], "stock": 20, "stop": [3, 4, 6, 8, 10, 11, 14, 15, 20, 22, 25], "stop_str": 1, "stop_token_id": [4, 10], "stopword": 22, "store": [13, 40], "stori": [5, 6], "str": [2, 10, 12, 13, 15], "strang": 13, "strategi": 13, "stream": [4, 6, 13, 15], "stream_and_merg": 5, "stream_interv": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "stream_output": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "stream_reason": 12, "streamingparseresult": 12, "street": [5, 8, 20], "strength": 20, "strictli": 20, "strike": 5, "string": [2, 3, 10, 15], "strip": [6, 10, 11], "strong": [5, 6, 19], "strongli": 24, "struct": 6, "structur": [0, 20, 21], "structural_tag": 15, "stuck": 28, "student": 20, "studi": 5, "style": [6, 14, 26], "sub": 27, "subclass": 12, "subdirectori": 0, "subject": 22, "submit": 34, "subprocess": [7, 8, 11], "subset": 19, "succeed": 4, "success": [4, 24], "successfulli": [4, 6, 28, 40], "sudo": [24, 35], "sugar": 20, "suggest": [3, 5, 9, 15, 20, 35], "sum": 12, "summar": [20, 24], "summari": 20, "super": 12, "suppli": 19, "support": [4, 5, 6, 7, 8, 9, 10, 13, 15, 19, 20, 21, 27, 30, 35, 40, 41], "sure": [0, 12, 13, 14, 20, 25, 26, 28, 38], "surgeri": 5, "suv": 20, "swamp": 5, "swim": 20, "switch": [28, 40, 41], "sy": 24, "synchron": 27, "syntat": 13, "sys_ptrac": [24, 41], "system": [1, 3, 5, 6, 9, 10, 15, 20, 22, 25, 28, 40, 41], "system_fingerprint": [2, 6, 11, 14], "systemcut": 15, "t": [2, 8, 10, 13, 14, 16, 24, 26, 28, 37, 41], "t4": 41, "t_": 14, "t_2": 14, "tab": 13, "tabl": 6, "tag": [2, 12], "tailor": 20, "take": [3, 5, 6, 9, 13, 24, 27, 28, 33], "tar": 16, "target": 9, "targetport": 28, "task": [13, 26, 34], "taxi": [8, 20], "tcp": [13, 28], "tcpsocket": 28, "teacher": 20, "team": [9, 16, 27], "technic": 15, "techniqu": 5, "technologi": [5, 28], "tee": 25, "tell": 6, "temperatur": [2, 4, 5, 6, 8, 10, 11, 12, 14, 15, 20], "templat": [6, 10, 13, 21, 38], "tend": 28, "tenni": 5, "tensor": [21, 28], "term": [5, 29], "termin": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 24, 41], "terminalt": 0, "terminate_process": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "test": [0, 2, 4, 5, 6, 7, 8, 10, 11, 12, 14, 15, 18, 20, 22, 25, 28, 34, 35, 37], "test_generation_model": 38, "test_oth": 38, "test_util": [0, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "test_vision_openai_serv": 38, "testgenerationmodel": 38, "text": [2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 38, 40], "text_complet": 6, "text_embed": 7, "text_it": 20, "text_qa": 20, "textual": 38, "than": [3, 9, 10, 13, 14, 20, 33, 41], "thank": [14, 26, 28, 34, 35], "thei": [0, 8, 10, 14, 28], "them": [13, 16, 22, 24, 26, 27, 37, 39, 40], "therapi": 5, "therefor": [4, 12], "thi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 20, 22, 24, 25, 26, 27, 28, 29, 34, 35, 38, 39, 40, 41], "thing": 3, "think": [5, 12], "third": 5, "thompson": 5, "those": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "thread": [13, 40], "three": [14, 15, 20], "threshold": [13, 40], "through": [2, 5, 13, 24, 26, 27], "throughput": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 37, 40], "thu": 16, "thunlp": 14, "till": 41, "time": [0, 3, 4, 5, 6, 7, 9, 10, 12, 13, 15, 25, 26, 27, 28, 29, 34, 37, 40], "time_per_output_token_second": 37, "time_per_output_token_seconds_bucket": 37, "time_per_output_token_seconds_count": 37, "time_per_output_token_seconds_sum": 37, "time_to_first_token_second": 37, "time_to_first_token_seconds_bucket": 37, "time_to_first_token_seconds_count": 37, "time_to_first_token_seconds_sum": 37, "timeout": [13, 27], "timestep": 14, "timezon": 15, "tip": [20, 39], "tip_suggest": 20, "tmp": [3, 13, 18], "to_str": 10, "todai": 2, "togeth": [2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 20, 27], "token": [1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 14, 15, 20, 21, 24, 28, 37, 38, 40, 41], "token_id": 10, "token_length_norm": 19, "token_map_path": 14, "token_usag": 37, "tokenizer_free_server_process": 4, "tokenizer_manag": 2, "tokenizer_mod": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "tokenizer_path": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "tokenizers_parallel": [7, 15], "tokyo": [6, 11, 14, 20], "toml": [17, 41], "too": [3, 13, 27], "tool": [15, 20, 21, 22, 25], "tool_cal": [2, 6, 8, 11, 14], "tool_call_pars": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "tool_dict": 2, "tool_get_current_d": 15, "tool_get_current_weath": 15, "tool_nam": 2, "tool_to_cal": 2, "tool_us": 20, "toolcallitem": 2, "toolkit": 35, "tools_tag_list": 2, "top": [5, 8, 10, 13, 14, 20, 27], "top_k": 10, "top_logprobs_num": 10, "top_p": [2, 5, 6, 10, 12, 15], "topic": [5, 6], "topk": [14, 27], "torch": [2, 4, 6, 7, 8, 11, 12, 13, 15, 20, 28, 41], "torch2": 41, "torch_compil": 13, "torch_compile_max_b": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "torch_dtyp": 9, "torchao": [9, 13], "torchao_config": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "torchinductor_cache_dir": [3, 13], "torchinductor_root": [3, 13], "total": [6, 12, 13, 40], "total_token": [2, 6, 8, 11, 14], "touch": 5, "tour": 5, "tourism": 5, "tourist": 4, "tournament": 5, "toward": 10, "tower": [4, 5], "town": 5, "tp": [13, 16, 24, 27, 28, 34, 38, 41], "tp0": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "tp1": 28, "tp16": 34, "tp2": 28, "tp3": 28, "tp4": 28, "tp5": 28, "tp6": 28, "tp7": 28, "tp8": 27, "tp_size": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 34], "tr": 25, "trace": [25, 28], "track": [29, 40], "trade": 6, "train": [4, 9, 14, 20, 25], "transform": [1, 2, 4, 5, 7, 9, 12, 15, 38, 41], "transit": [6, 7, 8], "transpar": 5, "treat": 4, "treatment": 5, "tree": [14, 40], "trend": 5, "trigger": [4, 15], "trim": 10, "triton": [2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 27, 41], "triton_attention_num_kv_split": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "triton_attention_reduce_in_fp32": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "triton_op": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 20], "troubleshoot": [28, 30], "true": [2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 18, 20, 25, 28, 33, 38], "truncat": [13, 14, 25], "trunk": 20, "trust": [5, 16, 24, 27, 28, 38], "trust_remote_cod": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "try": [3, 5, 6, 13, 27, 28, 39, 41], "tune": [13, 21, 24, 39], "tunnel": 16, "turn": [8, 13, 27], "tutori": [6, 7, 8], "twelv": 6, "twenti": 5, "twice": 29, "twine": 17, "two": [1, 2, 5, 6, 8, 10, 12, 13, 14, 15, 16, 20, 27, 28, 29, 34, 38, 40, 41], "txt": 0, "type": [2, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 20, 27, 37, 38], "typic": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 26, 28, 41], "u": 19, "ubiquit": 5, "ubuntu": [25, 28], "ubuntu1804": 25, "ubuntu22": 18, "ui": 25, "unbalanc": 40, "uncertain": 5, "unconditional_likelihood_norm": 19, "unconfin": [24, 41], "uncorrect": 13, "under": [0, 22, 25, 26, 27, 29, 38, 40], "understand": [0, 5, 26, 38], "unexpect": 13, "unexpectedli": [8, 20], "unintend": 10, "union": 10, "uniqu": 40, "unit": [2, 5, 6, 15, 20], "unittest": [26, 38], "unless": 14, "unlik": 14, "unnecessari": 5, "unpickl": 14, "unrestrict": 10, "unset": [13, 20], "unsupport": 12, "until": [10, 12, 27, 34], "untrust": 14, "unwant": 3, "up": [0, 2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20, 24, 25, 27, 28, 35, 37, 41], "updat": [2, 18, 25, 38], "update_weight": 4, "update_weights_from_disk": 4, "upgrad": [24, 41], "upload": [6, 13, 37], "upload_pypi": 17, "uploaded_fil": 6, "upon": [4, 6, 7], "urban": 20, "url": [4, 6, 8, 10, 11, 37, 40], "us": [0, 1, 2, 3, 4, 5, 6, 10, 12, 13, 14, 15, 18, 19, 20, 22, 25, 26, 27, 28, 29, 30, 34, 37, 38, 40], "us_president_exampl": 19, "usabl": [9, 35], "usag": [0, 2, 3, 4, 7, 8, 10, 11, 13, 14, 15, 19, 21, 25, 28, 35, 37, 39, 40, 41], "use_fast": 20, "user": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 14, 15, 19, 20, 25, 27], "usery": 15, "usr": 41, "usual": [6, 41], "utf": [6, 10, 11], "util": [0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 14, 15, 20, 24, 28, 40], "uvicorn": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "v": [2, 4, 6, 7, 8, 9, 11, 12, 14, 15, 16, 18, 20, 24, 33, 41], "v0": [2, 4, 18, 27, 38, 41], "v1": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20, 28], "v3": [9, 13, 16, 41], "v4": 20, "valid": [0, 6, 9, 15, 24], "valu": [2, 3, 9, 13, 14, 15, 25, 27, 28, 37], "valuabl": 38, "valueerror": 12, "valuefrom": 28, "variabl": [3, 13, 18, 28, 33, 41], "varianc": 29, "variant": 25, "varieti": 20, "variou": [5, 6, 8, 9], "vast": [5, 6], "ve": [5, 28], "vector": [14, 27], "veget": 20, "verbos": 13, "veri": [3, 6, 8, 10, 38], "verif": [13, 14], "verifi": [6, 13, 24, 35], "version": [0, 4, 5, 13, 27, 28, 41], "veto": 5, "via": [9, 37], "vice": 5, "vicuna_v1": 8, "video": [18, 24, 32, 41], "view": [0, 25, 28], "viewport": 6, "virtual": 5, "vision": [6, 10, 11, 21, 38], "vision_process": 8, "visionattent": 38, "visit": [9, 11], "visual": [25, 38], "visualstudio": 16, "vit": 38, "vl": [8, 20, 38], "vl2": [8, 38], "vllm": 9, "vlm": 5, "vm": 24, "vocabulari": 14, "volum": 28, "volumemount": 28, "vote": 5, "vram": 13, "vscode_cli_alpine_x64_cli": 16, "w": [6, 10, 14, 15, 20], "w8a8": 27, "w8a8_fp8": 9, "w8a8_int8": 9, "w8a8fp8config": 9, "wa": [6, 20], "wai": [0, 5, 20, 25, 38, 40], "wait": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20, 28, 34, 37, 40], "wait_for_serv": [0, 2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "walk": [20, 26], "wand": 20, "want": [2, 3, 9, 10, 13, 25, 26, 28, 38, 41], "warmup": [2, 4, 6, 7, 8, 11, 12, 14, 15, 20], "warn": [2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 20], "washington": 6, "watchdog": 13, "watchdog_timeout": [2, 4, 6, 7, 8, 11, 12, 13, 14, 15, 20], "water": 20, "we": [0, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 20, 22, 24, 25, 26, 27, 28, 29, 33, 40], "weather": [2, 15], "web": [5, 6], "weight": [2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 25, 35], "weights_onli": 14, "weilin": 14, "welcom": [1, 26], "well": [5, 13, 20, 38], "were": [6, 14], "western": 6, "wget": 16, "what": [0, 2, 4, 5, 6, 8, 11, 12, 15, 19, 20, 27, 40], "when": [0, 1, 2, 3, 4, 5, 6, 9, 10, 12, 13, 15, 24, 25, 27, 29, 40, 41], "whenev": 25, "where": [3, 5, 10, 14, 15, 19], "wherestart_tag": 15, "whether": [4, 10, 26], "which": [0, 1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 25, 26, 27, 28, 29, 38, 39], "while": [2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 18, 20, 25, 29, 34, 41], "white": 8, "whl": 41, "who": [5, 6, 11], "whole": 20, "whose": 10, "why": 6, "wide": [5, 6, 21], "widespread": 5, "within": [14, 20, 22, 28], "without": [4, 5, 6, 13, 14, 25, 38, 41], "won": 8, "wonder": 6, "wood": 20, "word": [6, 10, 20], "work": [1, 3, 5, 9, 10, 13, 18, 28, 40], "worker": [27, 28, 40], "worker_url_1": 40, "worker_url_2": 40, "workertempl": 28, "workflow": [4, 22, 26], "workload": [3, 24, 28], "workout": 20, "world": 5, "would": [2, 15, 20], "wrap": 12, "wrapper": [13, 27], "write": [0, 5, 6, 10], "writer": 5, "wrong": 4, "x": [27, 28, 40], "x64": 18, "x86_64": 25, "x_": 34, "xai": 5, "xf": 16, "xgrammar": [2, 4, 6, 7, 8, 10, 11, 12, 14, 15, 20, 34], "xvers": 38, "xxx": 18, "xxxxx": 25, "y": [18, 25], "yaml": [28, 37, 41], "year": 5, "yellow": [8, 20], "yet": [6, 28], "yi": 38, "yml": 41, "yoga": 20, "york": [5, 15], "you": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 25, 26, 27, 28, 29, 32, 33, 34, 37, 38, 39, 40, 41], "your": [0, 2, 6, 7, 8, 9, 11, 13, 15, 16, 20, 21, 22, 24, 26, 28, 34, 38, 41], "your_user_nam": 26, "yum": 28, "z": 34, "zara": 5, "zee": 5, "zero": 21, "zhao": 14, "zhousx": 14, "zip": [5, 15], "zsh": 16, "\u00eele": 5}, "titles": ["SGLang Documentation", "Custom Chat Template", "Tool and Function Calling", "Hyperparameter Tuning", "SGLang Native APIs", "Offline Engine API", "OpenAI APIs - Completions", "OpenAI APIs - Embedding", "OpenAI APIs - Vision", "Quantization", "Sampling Parameters", "Sending Requests", "Reasoning Parser", "Server Arguments", "Speculative Decoding", "Structured Outputs", "Development Guide Using Docker", "PyPI Package Release Process", "Set Up Self-Hosted Runners for GitHub Action", "Choices Methods in SGLang", "SGLang Frontend Language", "SGLang Documentation", "Measuring Model Accuracy in SGLang", "Multi-Node Deployment", "SGLang on AMD", "Benchmark and Profiling", "Contribution Guide", "DeepSeek Usage", "Deploy On Kubernetes", "Frequently Asked Questions", "General Guidance", "Hardware Supports", "Learn more", "Use Models From ModelScope", "Multi-Node Deployment", "Apply SGLang on NVIDIA Jetson Orin", "Performance Tuning", "Production Metrics", "Supported Models", "Troubleshooting", "Router for Data Parallelism", "Install SGLang"], "titleterms": {"0": 29, "1": [18, 24, 34, 41], "2": [18, 41], "3": [14, 18, 34, 41], "4": 41, "405b": 34, "5": 41, "6": 41, "8": 27, "A": [4, 6, 7, 8, 11, 20], "On": 28, "One": 27, "The": 29, "With": 41, "access": 39, "accuraci": 22, "achiev": 3, "action": 18, "ad": 26, "add": [18, 38], "advanc": [5, 21], "align": 0, "alloc": 0, "amd": 24, "an": [38, 39], "api": [2, 4, 5, 6, 7, 8, 11, 12, 13, 15, 40], "appli": 35, "approach": 0, "ar": [29, 37], "argument": 13, "ask": 29, "asynchron": 5, "attent": 27, "auto": 24, "avoid": 3, "awar": 40, "back": 2, "backend": [13, 21], "balanc": [24, 40], "basic": [20, 28], "batch": [5, 6, 20], "being": 37, "benchmark": [22, 25], "block": 27, "build": 26, "cach": [3, 4, 27, 40], "call": 2, "capabl": 22, "case": 28, "chat": [1, 6, 8], "check": [4, 37], "choic": 19, "chunk": 3, "ci": [0, 26], "classifi": 4, "client": [2, 7, 8, 11], "clone": 26, "cloud": 41, "co": 40, "code": [17, 26], "collect": 37, "command": 13, "commit": 26, "common": [13, 41], "compat": [2, 12, 15], "compil": [3, 14, 27], "complet": 6, "complex": 20, "compos": 41, "compressor": 9, "config": 18, "configur": [13, 18, 24, 40], "conserv": 3, "constrain": [10, 13, 20], "contain": [16, 18, 35], "content": 27, "contribut": 26, "control": 20, "core": 10, "correct": 38, "creat": 37, "cuda": 39, "curl": [7, 8, 11], "custom": [1, 10, 22], "dashboard": 37, "data": [13, 27, 40], "debug": [13, 28, 38], "decod": [10, 13, 14, 20], "deepseek": [24, 27, 34], "defin": 2, "depend": [0, 26], "deploi": 28, "deploy": [23, 34], "determinist": 29, "detoken": 4, "develop": 16, "dialog": 20, "disabl": 24, "disk": 4, "distribut": 13, "doc": [0, 26], "docker": [16, 18, 24, 41], "document": [0, 21, 26], "doubl": 13, "download": 27, "dp": 3, "dynam": 40, "eagl": 14, "ebnf": [6, 10, 15], "effici": 0, "embed": [4, 7, 38], "enabl": 3, "encod": 4, "encount": 39, "endpoint": 10, "engin": [2, 5, 12, 15], "error": 39, "evalu": [16, 22], "even": 29, "exampl": [0, 9, 10, 24, 27, 28], "execut": 2, "expert": 13, "express": 15, "extend": 22, "extern": 38, "faq": 27, "fault": 40, "featur": 21, "flow": 20, "flush": 4, "fork": 26, "format": [1, 26], "fp8": 27, "fraction": 3, "frequenc": 14, "frequent": 29, "from": [4, 24, 26, 33, 38, 41], "frontend": [20, 21], "function": 2, "gener": [4, 5, 10, 11, 20, 30, 38], "get": 4, "github": [17, 18], "gptqmodel": 9, "grafana": 37, "greedi": 19, "grub": 24, "guid": [16, 26, 37], "guidanc": 30, "h100": 16, "h200": [16, 27], "handl": 2, "hardwar": 31, "head": 27, "health": 4, "highlight": 14, "host": 18, "how": [2, 38], "http": 13, "hyperparamet": 3, "id": 7, "illeg": 39, "imag": 8, "implement": 38, "infer": [5, 34, 35], "info": 4, "initi": 2, "input": [7, 8], "instal": [0, 21, 24, 26, 35, 40, 41], "interact": 38, "issu": 28, "jetson": 35, "jinja": 1, "json": [1, 6, 10, 15], "kei": 28, "kernel": 13, "kubernet": [28, 41], "languag": [20, 38], "latent": 27, "launch": [2, 4, 6, 7, 8, 11, 12, 13, 20, 27, 40], "learn": 32, "length": 19, "likelihood": 19, "llama": 34, "llama3": 24, "llm": 9, "load": 40, "log": 13, "logit": 10, "lora": 13, "make": 17, "max": 3, "measur": 22, "mem": 3, "memori": [3, 13, 39], "messag": 2, "method": [19, 41], "metric": 37, "mla": 27, "modal": [10, 20], "model": [0, 2, 4, 9, 12, 13, 22, 33, 38], "modelscop": 33, "more": 32, "multi": [10, 13, 20, 23, 27, 34], "multipl": 8, "nativ": [2, 4, 11, 12, 15], "new": [2, 12, 38], "newcom": 26, "node": [13, 23, 27, 34], "non": [2, 5, 12], "normal": [10, 19], "note": 41, "nsight": 25, "numa": 24, "nvidia": 35, "offlin": [2, 5, 9, 12, 15], "onlin": 9, "openai": [2, 6, 7, 8, 11, 12, 15], "optim": [13, 27], "option": [10, 13], "orin": 35, "other": [10, 13, 25], "out": [3, 39], "output": [6, 10, 15, 35], "packag": 17, "parallel": [13, 20, 27, 40], "param": 10, "paramet": [6, 10, 40], "parser": 12, "peak": 3, "penal": 10, "perform": [14, 36], "pip": 41, "polici": 3, "port": [0, 38], "pre": 26, "predict": 27, "prefil": 3, "prerequisit": [28, 35], "process": 17, "processor": 10, "product": 37, "profil": [16, 25], "prompt": [0, 20], "pypi": 17, "python": [7, 8, 11], "pytorch": 25, "quantiz": [9, 35], "question": 29, "r1": [27, 34], "rank": 14, "rdma": 28, "reason": [12, 27], "recommend": 24, "refer": [9, 14, 21, 35], "regex": [6, 10], "regist": 38, "regular": 15, "releas": 17, "remain": 28, "repositori": 26, "request": [2, 3, 7, 8, 11, 12], "result": [2, 29], "reward": [4, 38], "roce": 28, "rout": 40, "router": [21, 40], "run": [3, 18, 24, 26, 27, 35, 41], "runner": 18, "runtim": [2, 13, 15, 40], "sampl": [10, 14], "scale": 40, "scenario": 28, "schedul": [3, 13], "schema": 12, "script": 22, "select": [0, 19], "self": 18, "send": [2, 11], "separ": 40, "serv": 13, "server": [2, 4, 6, 7, 8, 11, 12, 13, 20], "set": [18, 24, 26], "setup": [16, 37], "sglang": [0, 2, 4, 12, 15, 19, 20, 21, 22, 24, 27, 35, 38, 41], "sh": 18, "size": 3, "skip": 4, "skypilot": 41, "slurm": 34, "sourc": [24, 26, 41], "sparsiti": 13, "specul": [13, 14], "speed": 3, "srt": [2, 15], "start": 18, "static": 3, "step": [18, 24], "strategi": 40, "stream": [2, 5, 10, 11, 12, 20], "structur": [6, 10, 15, 35], "submiss": 3, "success": 28, "suit": 38, "support": [2, 12, 31, 38], "synchron": 5, "system": 24, "tag": 15, "temperatur": 29, "templat": [1, 8], "tensor": [13, 27], "test": [26, 38], "text": 4, "thi": 0, "throughput": [3, 27], "tip": [25, 26], "todo": 28, "token": [4, 13, 19, 27], "toler": 40, "tool": 2, "torch": [3, 14, 27], "torchao": 35, "tp": 3, "troubleshoot": [37, 39], "tune": [3, 36], "turn": 20, "tutori": 21, "uncondit": 19, "unit": 26, "up": [18, 26], "updat": [0, 4, 17, 24], "upload": 17, "us": [7, 8, 9, 11, 16, 24, 33, 41], "usag": [5, 6, 12, 20, 27], "uv": 41, "v3": [24, 27, 34], "variabl": 37, "version": 17, "via": 14, "vision": 8, "vllm": 38, "vlm": 38, "vscode": 16, "wa": 39, "warmup": 24, "weight": [4, 27], "why": 0, "wise": 27, "workflow": 0, "write": 26, "xgrammar": 35, "your": 3}})