From f110780ef077ed86ddf38c1f26d5381215021b41 Mon Sep 17 00:00:00 2001 From: fpagny Date: Tue, 11 Feb 2025 10:06:21 +0100 Subject: [PATCH] Add L40S to Managed Inference models (#4378) * Add L40S to Managed Inference models * Update llama-3-70b-instruct.mdx * Update llama-3.1-8b-instruct.mdx * Update deepseek-r1-distill-llama-8b.mdx * Update pixtral-12b-2409.mdx * Update mistral-7b-instruct-v0.3.mdx * Update mistral-nemo-instruct-2407.mdx * Update pixtral-12b-2409.mdx * Update llama-3.1-8b-instruct.mdx * Update deepseek-r1-distill-llama-8b.mdx * Update bge-multilingual-gemma2.mdx --- .../reference-content/bge-multilingual-gemma2.mdx | 3 ++- .../deepseek-r1-distill-llama-8b.mdx | 3 ++- .../reference-content/llama-3-70b-instruct.mdx | 5 +++-- .../reference-content/llama-3-8b-instruct.mdx | 6 ++++-- .../reference-content/llama-3.1-8b-instruct.mdx | 9 +++++---- .../reference-content/mistral-7b-instruct-v0.3.mdx | 11 +++++++---- .../reference-content/mistral-nemo-instruct-2407.mdx | 12 +++++++----- .../reference-content/pixtral-12b-2409.mdx | 9 +++++---- 8 files changed, 35 insertions(+), 23 deletions(-) diff --git a/pages/managed-inference/reference-content/bge-multilingual-gemma2.mdx b/pages/managed-inference/reference-content/bge-multilingual-gemma2.mdx index 3e51e1b013..885eaeef94 100644 --- a/pages/managed-inference/reference-content/bge-multilingual-gemma2.mdx +++ b/pages/managed-inference/reference-content/bge-multilingual-gemma2.mdx @@ -18,7 +18,7 @@ dates: | Attribute | Details | |-----------------|------------------------------------| | Provider | [baai](https://huggingface.co/BAAI) | -| Compatible Instances | L4 (FP32) | +| Compatible Instances | L4, L40S (FP32) | | Context size | 4096 tokens | ## Model name @@ -32,6 +32,7 @@ baai/bge-multilingual-gemma2:fp32 | Instance type | Max context length | | ------------- |-------------| | L4 | 4096 (FP32) | +| L40S | 4096 (FP32) | ## Model introduction diff --git a/pages/managed-inference/reference-content/deepseek-r1-distill-llama-8b.mdx b/pages/managed-inference/reference-content/deepseek-r1-distill-llama-8b.mdx index bde51b9140..dd9919bc93 100644 --- a/pages/managed-inference/reference-content/deepseek-r1-distill-llama-8b.mdx +++ b/pages/managed-inference/reference-content/deepseek-r1-distill-llama-8b.mdx @@ -19,7 +19,7 @@ categories: |-----------------|------------------------------------| | Provider | [Deepseek](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | | License | [MIT](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/mit.md) | -| Compatible Instances | L4, H100 (BF16) | +| Compatible Instances | L4, L40S, H100 (BF16) | | Context Length | up to 131k tokens | ## Model names @@ -33,6 +33,7 @@ deepseek/deepseek-r1-distill-llama-8b:bf16 | Instance type | Max context length | | ------------- |-------------| | L4 | 39k (BF16) | +| L40S | 131k (BF16) | | H100 | 131k (BF16) | ## Model introduction diff --git a/pages/managed-inference/reference-content/llama-3-70b-instruct.mdx b/pages/managed-inference/reference-content/llama-3-70b-instruct.mdx index 1844efb25e..a0c5c9ce68 100644 --- a/pages/managed-inference/reference-content/llama-3-70b-instruct.mdx +++ b/pages/managed-inference/reference-content/llama-3-70b-instruct.mdx @@ -18,7 +18,7 @@ categories: | Attribute | Details | |-----------------|------------------------------------| | Provider | [Meta](https://llama.meta.com/llama3/) | -| Compatible Instances | H100 (FP8) | +| Compatible Instances | H100, H100-2 (FP8) | | Context size | 8192 tokens | ## Model names @@ -30,6 +30,7 @@ meta/llama-3-70b-instruct:fp8 ## Compatible Instances - [H100 (FP8)](https://www.scaleway.com/en/h100-pcie-try-it-now/) +- H100-2 (FP8) ## Model introduction @@ -82,4 +83,4 @@ Process the output data according to your application's needs. The response will Despite efforts for accuracy, the possibility of generated text containing inaccuracies or [hallucinations](/managed-inference/concepts/#hallucinations) exists. Always verify the content generated independently. - \ No newline at end of file + diff --git a/pages/managed-inference/reference-content/llama-3-8b-instruct.mdx b/pages/managed-inference/reference-content/llama-3-8b-instruct.mdx index d018be991c..a43b272906 100644 --- a/pages/managed-inference/reference-content/llama-3-8b-instruct.mdx +++ b/pages/managed-inference/reference-content/llama-3-8b-instruct.mdx @@ -18,7 +18,7 @@ categories: | Attribute | Details | |-----------------|------------------------------------| | Provider | [Meta](https://llama.meta.com/llama3/) | -| Compatible Instances | L4, H100 (FP8, BF16) | +| Compatible Instances | L4, L40S, H100, H100-2 (FP8, BF16) | | Context size | 8192 tokens | ## Model names @@ -33,7 +33,9 @@ meta/llama-3-8b-instruct:fp8 | Instance type | Max context length | | ------------- |-------------| | L4 | 8192 (FP8, BF16) | -| H100 | 8192 (FP8, BF16) +| L40S | 8192 (FP8, BF16) | +| H100 | 8192 (FP8, BF16) | +| H100-2 | 8192 (FP8, BF16) | ## Model introduction diff --git a/pages/managed-inference/reference-content/llama-3.1-8b-instruct.mdx b/pages/managed-inference/reference-content/llama-3.1-8b-instruct.mdx index 0318375220..a6d24ab250 100644 --- a/pages/managed-inference/reference-content/llama-3.1-8b-instruct.mdx +++ b/pages/managed-inference/reference-content/llama-3.1-8b-instruct.mdx @@ -19,7 +19,7 @@ categories: |-----------------|------------------------------------| | Provider | [Meta](https://llama.meta.com/llama3/) | | License | [Llama 3.1 community](https://llama.meta.com/llama3_1/license/) | -| Compatible Instances | L4, H100, H100-2 (FP8, BF16) | +| Compatible Instances | L4, L40S, H100, H100-2 (FP8, BF16) | | Context Length | up to 128k tokens | ## Model names @@ -34,8 +34,9 @@ meta/llama-3.1-8b-instruct:bf16 | Instance type | Max context length | | ------------- |-------------| | L4 | 96k (FP8), 27k (BF16) | -| H100 | 128k (FP8, BF16) -| H100-2 | 128k (FP8, BF16) +| L40S | 128k (FP8, BF16) | +| H100 | 128k (FP8, BF16) | +| H100-2 | 128k (FP8, BF16) | ## Model introduction @@ -82,4 +83,4 @@ Process the output data according to your application's needs. The response will Despite efforts for accuracy, the possibility of generated text containing inaccuracies or [hallucinations](/managed-inference/concepts/#hallucinations) exists. Always verify the content generated independently. - \ No newline at end of file + diff --git a/pages/managed-inference/reference-content/mistral-7b-instruct-v0.3.mdx b/pages/managed-inference/reference-content/mistral-7b-instruct-v0.3.mdx index 0e299bab0e..be1d2d4f2d 100644 --- a/pages/managed-inference/reference-content/mistral-7b-instruct-v0.3.mdx +++ b/pages/managed-inference/reference-content/mistral-7b-instruct-v0.3.mdx @@ -17,8 +17,8 @@ categories: | Attribute | Details | |-----------------|------------------------------------| -| Provider | [Mistral](https://mistral.ai/technology/#models) | -| Compatible Instances | L4 (BF16) | +| Provider | [Mistral](https://mistral.ai/technology/#models) | +| Compatible Instances | L4, L40S, H100, H100-2 (BF16) | | Context size | 32K tokens | ## Model name @@ -31,7 +31,10 @@ mistral/mistral-7b-instruct-v0.3:bf16 | Instance type | Max context length | | ------------- |-------------| -| L4 | 32k (BF16) +| L4 | 32k (BF16) | +| L40S | 32k (BF16) | +| H100 | 32k (BF16) | +| H100-2 | 32k (BF16) | ## Model introduction @@ -75,4 +78,4 @@ Process the output data according to your application's needs. The response will Despite efforts for accuracy, the possibility of generated text containing inaccuracies or [hallucinations](/managed-inference/concepts/#hallucinations) exists. Always verify the content generated independently. - \ No newline at end of file + diff --git a/pages/managed-inference/reference-content/mistral-nemo-instruct-2407.mdx b/pages/managed-inference/reference-content/mistral-nemo-instruct-2407.mdx index e7d8e3c2a2..ccf76d3d0e 100644 --- a/pages/managed-inference/reference-content/mistral-nemo-instruct-2407.mdx +++ b/pages/managed-inference/reference-content/mistral-nemo-instruct-2407.mdx @@ -17,9 +17,9 @@ categories: | Attribute | Details | |-----------------|------------------------------------| -| Provider | [Mistral](https://mistral.ai/technology/#models) | -| Compatible Instances | H100 (FP8) | -| Context size | 128K tokens | +| Provider | [Mistral](https://mistral.ai/technology/#models) | +| Compatible Instances | L40S, H100, H100-2 (FP8) | +| Context size | 128K tokens | ## Model name @@ -31,7 +31,9 @@ mistral/mistral-nemo-instruct-2407:fp8 | Instance type | Max context length | | ------------- |-------------| -| H100 | 128k (FP8) +| L40 | 128k (FP8) | +| H100 | 128k (FP8) | +| H100-2 | 128k (FP8) | ## Model introduction @@ -81,4 +83,4 @@ Process the output data according to your application's needs. The response will Despite efforts for accuracy, the possibility of generated text containing inaccuracies or [hallucinations](/managed-inference/concepts/#hallucinations) exists. Always verify the content generated independently. - \ No newline at end of file + diff --git a/pages/managed-inference/reference-content/pixtral-12b-2409.mdx b/pages/managed-inference/reference-content/pixtral-12b-2409.mdx index 2c22572c72..4899e8a3d6 100644 --- a/pages/managed-inference/reference-content/pixtral-12b-2409.mdx +++ b/pages/managed-inference/reference-content/pixtral-12b-2409.mdx @@ -17,9 +17,9 @@ categories: | Attribute | Details | |-----------------|------------------------------------| -| Provider | [Mistral](https://mistral.ai/technology/#models) | -| Compatible Instances | H100, H100-2 (bf16) | -| Context size | 128k tokens | +| Provider | [Mistral](https://mistral.ai/technology/#models) | +| Compatible Instances | L40S, H100, H100-2 (bf16) | +| Context size | 128k tokens | ## Model name @@ -31,6 +31,7 @@ mistral/pixtral-12b-2409:bf16 | Instance type | Max context length | | ------------- |-------------| +| L40S | 50k (BF16) | H100 | 128k (BF16) | H100-2 | 128k (BF16) @@ -162,4 +163,4 @@ Only bitmaps can be analyzed by Pixtral, PDFs and videos are not supported. The only limitation is in context window (1 token for each 16x16 pixel). #### What is the maximum amount of images per conversation? -One conversation can handle up to 12 images (per request). The 13rd will return a 413 error. \ No newline at end of file +One conversation can handle up to 12 images (per request). The 13rd will return a 413 error.