Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/locales/en-US/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -211,13 +211,12 @@ export default {
'models.form.kvCache.tips':
'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.',
'models.form.kvCache.tips2':
'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
'models.form.scheduling': 'Scheduling',
'models.form.ramRatio': 'RAM-to-VRAM Ratio',
'models.form.ramSize': 'Maximum RAM Size (GiB)',
'models.form.ramRatio.tips':
'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
'models.form.chunkSize.tips':
'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
};
9 changes: 4 additions & 5 deletions src/locales/ja-JP/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,14 @@ export default {
'models.form.kvCache.tips':
'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.',
'models.form.kvCache.tips2':
'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
'models.form.scheduling': 'Scheduling',
'models.form.ramRatio': 'RAM-to-VRAM Ratio',
'models.form.ramSize': 'Maximum RAM Size (GiB)',
'models.form.ramRatio.tips':
'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
'models.form.chunkSize.tips':
'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
};

// ========== To-Do: Translate Keys (Remove After Translation) ==========
Expand Down Expand Up @@ -265,11 +264,11 @@ export default {
// 42. 'models.mymodels.status.active': 'Active'
// 43. 'models.form.remoteURL.tips': 'Refer to the <a href="https://docs.lmcache.ai/api_reference/configurations.html" target="_blank">configuration documentation</a> for details.',
// 44. 'models.form.kvCache.tips': 'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.'
// 45. 'models.form.kvCache.tips2': 'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
// 45. 'models.form.kvCache.tips2': 'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
// 46. 'models.form.scheduling': 'Scheduling',
// 47. 'models.form.ramRatio': 'RAM-to-VRAM Ratio',
// 48. 'models.form.ramSize': 'Maximum RAM Size (GiB)',
// 49. 'models.form.ramRatio.tips': 'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
// 50. 'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
// 51. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
// 51. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
// ========== End of To-Do List ==========
9 changes: 4 additions & 5 deletions src/locales/ru-RU/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,14 @@ export default {
'models.form.kvCache.tips':
'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.',
'models.form.kvCache.tips2':
'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
'models.form.scheduling': 'Scheduling',
'models.form.ramRatio': 'RAM-to-VRAM Ratio',
'models.form.ramSize': 'Maximum RAM Size (GiB)',
'models.form.ramRatio.tips':
'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
'models.form.chunkSize.tips':
'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
};

// ========== To-Do: Translate Keys (Remove After Translation) ==========
Expand All @@ -228,11 +227,11 @@ export default {
// 4. 'models.mymodels.status.active': 'Active'
// 5. 'models.form.remoteURL.tips': 'Refer to the <a href="https://docs.lmcache.ai/api_reference/configurations.html" target="_blank">configuration documentation</a> for details.',
// 6. 'models.form.kvCache.tips': 'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.'
// 7. 'models.form.kvCache.tips2': 'KV cache is only supported when using built-in inference backends (vLLM or SGLang).';
// 7. 'models.form.kvCache.tips2': 'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).';
// 8. 'models.form.scheduling': 'Scheduling',
// 9. 'models.form.ramRatio': 'RAM-to-VRAM Ratio',
// 10. 'models.form.ramSize': 'Maximum RAM Size (GiB)',
// 11. 'models.form.ramRatio.tips': 'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
// 12. 'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
// 13. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
// 13. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
// ========== End of To-Do List ==========
13 changes: 6 additions & 7 deletions src/pages/llmodels/config/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import _ from 'lodash';
import { backendOptionsMap } from '../config/backend-parameters';
import { FormData } from './types';

// generate the gpu_selector field for form initial values, when eidting a model
export const generateGPUSelector = (data: any, gpuOptions: any[]) => {
const gpu_ids = _.get(data, 'gpu_selector.gpu_ids', []);
if (gpu_ids.length === 0) {
Expand Down Expand Up @@ -34,13 +35,13 @@ export const generateGPUSelector = (data: any, gpuOptions: any[]) => {
};

/**
* before submit the form, generate the gpu_selector field
* before submit the form, generate the gpu_selector field, and clear worker_selector if needed
* @param data
* @returns
*/
export const generateGPUIds = (data: FormData) => {
const gpu_ids = _.get(data, 'gpu_selector.gpu_ids', []);
console.log('generateGPUIds', gpu_ids);

if (!gpu_ids.length) {
return {
gpu_selector: null
Expand All @@ -63,10 +64,8 @@ export const generateGPUIds = (data: FormData) => {
return {
gpu_selector: {
gpu_ids: result || [],
gpus_per_replica:
data.gpu_selector?.gpus_per_replica === -1
? null
: data.gpu_selector?.gpus_per_replica
}
gpus_per_replica: data.gpu_selector?.gpus_per_replica || null
},
worker_selector: null
};
};
4 changes: 2 additions & 2 deletions src/pages/llmodels/forms/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ const DataForm: React.FC<DataFormProps> = forwardRef((props, ref) => {
return {
gpu_selector: {
gpu_ids: [gpuids[0]],
gpus_per_replica: -1
gpus_per_replica: null
}
};
}
Expand Down Expand Up @@ -352,7 +352,7 @@ const DataForm: React.FC<DataFormProps> = forwardRef((props, ref) => {
name="deployModel"
form={form}
onFinish={handleOk}
preserve={true}
preserve={false}
clearOnDestroy={true}
onValuesChange={handleOnValuesChange}
onFinishFailed={handleOnFinishFailed}
Expand Down
12 changes: 7 additions & 5 deletions src/pages/llmodels/forms/kv-cache.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import CheckboxField from '@/components/seal-form/checkbox-field';
import SealInputNumber from '@/components/seal-form/input-number';
import { useIntl } from '@umijs/max';
import { Form } from 'antd';
import { useMemo } from 'react';
import { useMemo, useRef } from 'react';
import { backendOptionsMap } from '../config/backend-parameters';
import { useFormContext } from '../config/form-context';
import { FormData } from '../config/types';
Expand All @@ -13,18 +13,20 @@ const KVCacheForm = () => {
const { onValuesChange, backendOptions } = useFormContext();
const kvCacheEnabled = Form.useWatch(['extended_kv_cache', 'enabled'], form);
const backend = Form.useWatch('backend', form);
const configCacheRef = useRef<any>({});

const handleOnChange = async (e: any) => {
const extendedKVCache = form.getFieldValue('extended_kv_cache');
if (e.target.checked) {
form.setFieldsValue({
extended_kv_cache: {
enabled: true,
chunk_size: extendedKVCache?.chunk_size,
ram_ratio: extendedKVCache?.ram_ratio || 1.2,
ram_size: extendedKVCache?.ram_size
chunk_size: configCacheRef.current?.chunk_size,
ram_ratio: configCacheRef.current?.ram_ratio || 1.2,
ram_size: configCacheRef.current?.ram_size
}
});
} else {
configCacheRef.current = form.getFieldValue('extended_kv_cache');
}
await new Promise((resolve) => {
setTimeout(resolve, 200);
Expand Down
5 changes: 3 additions & 2 deletions src/pages/llmodels/forms/schedule-type.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ const ScheduleTypeForm: React.FC = () => {
return;
}
if (value === ScheduleValueMap.Manual) {
form.setFieldValue(['gpu_selector', 'gpus_per_replica'], -1);
form.setFieldValue(['gpu_selector', 'gpus_per_replica'], null);
}
};

Expand Down Expand Up @@ -180,10 +180,11 @@ const ScheduleTypeForm: React.FC = () => {
label={intl.formatMessage({
id: 'models.form.gpusperreplica'
})}
allowNull
options={[
{
label: intl.formatMessage({ id: 'common.options.auto' }),
value: -1
value: null
},
{ label: '1', value: 1 },
{ label: '2', value: 2 },
Expand Down