Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/locales/en-US/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -212,5 +212,12 @@ export default {
'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.',
'models.form.kvCache.tips2':
'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
'models.form.scheduling': 'Scheduling'
'models.form.scheduling': 'Scheduling',
'models.form.ramRatio': 'RAM-to-VRAM Ratio',
'models.form.ramSize': 'Maximum RAM Size (GiB)',
'models.form.ramRatio.tips':
'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
'models.form.chunkSize.tips':
'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
};
16 changes: 14 additions & 2 deletions src/locales/ja-JP/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,14 @@ export default {
'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.',
'models.form.kvCache.tips2':
'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
'models.form.scheduling': 'Scheduling'
'models.form.scheduling': 'Scheduling',
'models.form.ramRatio': 'RAM-to-VRAM Ratio',
'models.form.ramSize': 'Maximum RAM Size (GiB)',
'models.form.ramRatio.tips':
'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
'models.form.chunkSize.tips':
'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
};

// ========== To-Do: Translate Keys (Remove After Translation) ==========
Expand Down Expand Up @@ -259,5 +266,10 @@ export default {
// 43. 'models.form.remoteURL.tips': 'Refer to the <a href="https://docs.lmcache.ai/api_reference/configurations.html" target="_blank">configuration documentation</a> for details.',
// 44. 'models.form.kvCache.tips': 'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.'
// 45. 'models.form.kvCache.tips2': 'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
// 46. 'models.form.scheduling': 'Scheduling'
// 46. 'models.form.scheduling': 'Scheduling',
// 47. 'models.form.ramRatio': 'RAM-to-VRAM Ratio',
// 48. 'models.form.ramSize': 'Maximum RAM Size (GiB)',
// 49. 'models.form.ramRatio.tips': 'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
// 50. 'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
// 51. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
// ========== End of To-Do List ==========
16 changes: 14 additions & 2 deletions src/locales/ru-RU/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,14 @@ export default {
'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.',
'models.form.kvCache.tips2':
'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
'models.form.scheduling': 'Scheduling'
'models.form.scheduling': 'Scheduling',
'models.form.ramRatio': 'RAM-to-VRAM Ratio',
'models.form.ramSize': 'Maximum RAM Size (GiB)',
'models.form.ramRatio.tips':
'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
'models.form.chunkSize.tips':
'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
};

// ========== To-Do: Translate Keys (Remove After Translation) ==========
Expand All @@ -222,5 +229,10 @@ export default {
// 5. 'models.form.remoteURL.tips': 'Refer to the <a href="https://docs.lmcache.ai/api_reference/configurations.html" target="_blank">configuration documentation</a> for details.',
// 6. 'models.form.kvCache.tips': 'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.'
// 7. 'models.form.kvCache.tips2': 'KV cache is only supported when using built-in inference backends (vLLM or SGLang).';
// 8. 'models.form.scheduling': 'Scheduling'
// 8. 'models.form.scheduling': 'Scheduling',
// 9. 'models.form.ramRatio': 'RAM-to-VRAM Ratio',
// 10. 'models.form.ramSize': 'Maximum RAM Size (GiB)',
// 11. 'models.form.ramRatio.tips': 'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
// 12. 'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
// 13. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
// ========== End of To-Do List ==========
9 changes: 8 additions & 1 deletion src/locales/zh-CN/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -201,5 +201,12 @@ export default {
'仅在内置后端(vLLM / SGLang)可用 —— 请在<span class="bold-text">高级</span>配置中切换后端以启用。',
'models.form.kvCache.tips2':
'仅在使用内置推理后端(vLLM 或 SGLang)时支持 KV 缓存。',
'models.form.scheduling': '调度'
'models.form.scheduling': '调度',
'models.form.ramRatio': '内存与显存比例',
'models.form.ramSize': '内存最大占用 (GiB)',
'models.form.ramRatio.tips':
'KV 缓存在系统内存与 GPU 显存之间的比例。例如设置为 2.0 表示系统内存中可缓存的数据量是显存的两倍。',
'models.form.ramSize.tips': `KV 缓存在系统内存中的最大值。当设置该值时,将覆盖 "{content}" 的配置。`,
'models.form.chunkSize.tips':
'每个 KV 缓存块包含的 token 数量。数值越大可提升吞吐量,但也会增加内存占用。'
};
4 changes: 2 additions & 2 deletions src/pages/llmodels/config/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ export interface FormData {
extended_kv_cache: {
enabled: boolean;
chunk_size: number;
max_local_cpu_size: number;
remote_url: string;
ram_ratio: number;
ram_size: number;
};
}

Expand Down
6 changes: 3 additions & 3 deletions src/pages/llmodels/forms/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,9 @@ const DataForm: React.FC<DataFormProps> = forwardRef((props, ref) => {
distributed_inference_across_workers: true,
extended_kv_cache: {
enabled: false,
chunk_size: 256,
max_local_cpu_size: 10,
remote_url: ''
chunk_size: null,
ram_ratio: 1.2,
ram_size: null
},
...initialValues
}}
Expand Down
50 changes: 25 additions & 25 deletions src/pages/llmodels/forms/kv-cache.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import CheckboxField from '@/components/seal-form/checkbox-field';
import SealInputNumber from '@/components/seal-form/input-number';
import SealInput from '@/components/seal-form/seal-input';
import { useIntl } from '@umijs/max';
import { Form } from 'antd';
import { useMemo } from 'react';
Expand All @@ -21,9 +20,9 @@ const KVCacheForm = () => {
form.setFieldsValue({
extended_kv_cache: {
enabled: true,
chunk_size: extendedKVCache?.chunk_size || 256,
max_local_cpu_size: extendedKVCache?.max_local_cpu_size || 10,
remote_url: extendedKVCache?.remote_url || ''
chunk_size: extendedKVCache?.chunk_size,
ram_ratio: extendedKVCache?.ram_ratio || 1.2,
ram_size: extendedKVCache?.ram_size
}
});
}
Expand Down Expand Up @@ -76,38 +75,39 @@ const KVCacheForm = () => {
</div>
{kvCacheEnabled && (
<>
<Form.Item<FormData>
name={['extended_kv_cache', 'max_local_cpu_size']}
>
<Form.Item<FormData> name={['extended_kv_cache', 'ram_ratio']}>
<SealInputNumber
label={intl.formatMessage({ id: 'models.form.maxCPUSize' })}
label={intl.formatMessage({ id: 'models.form.ramRatio' })}
description={intl.formatMessage({
id: 'models.form.ramRatio.tips'
})}
min={0}
step={1}
precision={0}
step={0.1}
precision={1}
/>
</Form.Item>
<Form.Item<FormData> name={['extended_kv_cache', 'chunk_size']}>
<Form.Item<FormData> name={['extended_kv_cache', 'ram_size']}>
<SealInputNumber
label={intl.formatMessage({ id: 'models.form.chunkSize' })}
label={intl.formatMessage({ id: 'models.form.ramSize' })}
description={intl.formatMessage(
{
id: 'models.form.ramSize.tips'
},
{ content: intl.formatMessage({ id: 'models.form.ramRatio' }) }
)}
min={0}
step={1}
precision={0}
/>
</Form.Item>
<Form.Item<FormData> name={['extended_kv_cache', 'remote_url']}>
<SealInput.Input
description={
<span
dangerouslySetInnerHTML={{
__html: intl.formatMessage({
id: 'models.form.remoteURL.tips'
})
}}
></span>
}
label={intl.formatMessage({ id: 'models.form.remoteURL' })}
<Form.Item<FormData> name={['extended_kv_cache', 'chunk_size']}>
<SealInputNumber
label={intl.formatMessage({ id: 'models.form.chunkSize' })}
description={intl.formatMessage({
id: 'models.form.chunkSize.tips'
})}
min={0}
step={1}
placeholder="protocol://host:port"
/>
</Form.Item>
</>
Expand Down