gpustack · hibig · Oct 31, 2025 · Oct 31, 2025
diff --git a/src/locales/en-US/models.ts b/src/locales/en-US/models.ts
@@ -211,13 +211,12 @@ export default {
   'models.form.kvCache.tips':
     'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.',
   'models.form.kvCache.tips2':
-    'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
+    'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
   'models.form.scheduling': 'Scheduling',
   'models.form.ramRatio': 'RAM-to-VRAM Ratio',
   'models.form.ramSize': 'Maximum RAM Size (GiB)',
   'models.form.ramRatio.tips':
     'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
   'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
-  'models.form.chunkSize.tips':
-    'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
+  'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
 };
diff --git a/src/locales/ja-JP/models.ts b/src/locales/ja-JP/models.ts
@@ -211,15 +211,14 @@ export default {
   'models.form.kvCache.tips':
     'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.',
   'models.form.kvCache.tips2':
-    'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
+    'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
   'models.form.scheduling': 'Scheduling',
   'models.form.ramRatio': 'RAM-to-VRAM Ratio',
   'models.form.ramSize': 'Maximum RAM Size (GiB)',
   'models.form.ramRatio.tips':
     'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
   'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
-  'models.form.chunkSize.tips':
-    'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
+  'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
 };
 
 // ========== To-Do: Translate Keys (Remove After Translation) ==========
@@ -265,11 +264,11 @@ export default {
 // 42.  'models.mymodels.status.active': 'Active'
 // 43. 'models.form.remoteURL.tips': 'Refer to the <a href="https://docs.lmcache.ai/api_reference/configurations.html" target="_blank">configuration documentation</a> for details.',
 // 44. 'models.form.kvCache.tips': 'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.'
-// 45. 'models.form.kvCache.tips2': 'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
+// 45. 'models.form.kvCache.tips2': 'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
 // 46. 'models.form.scheduling': 'Scheduling',
 // 47. 'models.form.ramRatio': 'RAM-to-VRAM Ratio',
 // 48. 'models.form.ramSize': 'Maximum RAM Size (GiB)',
 // 49. 'models.form.ramRatio.tips': 'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
 // 50. 'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
-// 51. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
+// 51. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
 // ========== End of To-Do List ==========
diff --git a/src/locales/ru-RU/models.ts b/src/locales/ru-RU/models.ts
@@ -211,15 +211,14 @@ export default {
   'models.form.kvCache.tips':
     'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.',
   'models.form.kvCache.tips2':
-    'KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
+    'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).',
   'models.form.scheduling': 'Scheduling',
   'models.form.ramRatio': 'RAM-to-VRAM Ratio',
   'models.form.ramSize': 'Maximum RAM Size (GiB)',
   'models.form.ramRatio.tips':
     'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
   'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
-  'models.form.chunkSize.tips':
-    'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
+  'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
 };
 
 // ========== To-Do: Translate Keys (Remove After Translation) ==========
@@ -228,11 +227,11 @@ export default {
 // 4. 'models.mymodels.status.active': 'Active'
 // 5. 'models.form.remoteURL.tips': 'Refer to the <a href="https://docs.lmcache.ai/api_reference/configurations.html" target="_blank">configuration documentation</a> for details.',
 // 6. 'models.form.kvCache.tips': 'Available only with built-in backends (vLLM / SGLang) — switch backend in <span class="bold-text">Advanced</span> to enable.'
-// 7. 'models.form.kvCache.tips2': 'KV cache is only supported when using built-in inference backends (vLLM or SGLang).';
+// 7. 'models.form.kvCache.tips2': 'Extended KV cache is only supported when using built-in inference backends (vLLM or SGLang).';
 // 8. 'models.form.scheduling': 'Scheduling',
 // 9. 'models.form.ramRatio': 'RAM-to-VRAM Ratio',
 // 10. 'models.form.ramSize': 'Maximum RAM Size (GiB)',
 // 11. 'models.form.ramRatio.tips': 'Ratio of system RAM to GPU VRAM used for KV cache. For example, 2.0 means the cache in RAM can be twice as large as the GPU VRAM.',
 // 12. 'models.form.ramSize.tips': `Maximum size of the KV cache stored in system memory (GiB). If set, this value overrides "{content}".`,
-// 13. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk. A larger chunk size may improve throughput but increase memory usage.'
+// 13. 'models.form.chunkSize.tips': 'Number of tokens per KV cache chunk.'
 // ========== End of To-Do List ==========
diff --git a/src/pages/llmodels/config/utils.ts b/src/pages/llmodels/config/utils.ts
@@ -2,6 +2,7 @@ import _ from 'lodash';
 import { backendOptionsMap } from '../config/backend-parameters';
 import { FormData } from './types';
 
+// generate the gpu_selector field for form initial values, when eidting a model
 export const generateGPUSelector = (data: any, gpuOptions: any[]) => {
   const gpu_ids = _.get(data, 'gpu_selector.gpu_ids', []);
   if (gpu_ids.length === 0) {
@@ -34,13 +35,13 @@ export const generateGPUSelector = (data: any, gpuOptions: any[]) => {
 };
 
 /**
- * before submit the form, generate the gpu_selector field
+ * before submit the form, generate the gpu_selector field, and clear worker_selector if needed
  * @param data
  * @returns
  */
 export const generateGPUIds = (data: FormData) => {
   const gpu_ids = _.get(data, 'gpu_selector.gpu_ids', []);
-  console.log('generateGPUIds', gpu_ids);
+
   if (!gpu_ids.length) {
     return {
       gpu_selector: null
@@ -63,10 +64,8 @@ export const generateGPUIds = (data: FormData) => {
   return {
     gpu_selector: {
       gpu_ids: result || [],
-      gpus_per_replica:
-        data.gpu_selector?.gpus_per_replica === -1
-          ? null
-          : data.gpu_selector?.gpus_per_replica
-    }
+      gpus_per_replica: data.gpu_selector?.gpus_per_replica || null
+    },
+    worker_selector: null
   };
 };
diff --git a/src/pages/llmodels/forms/index.tsx b/src/pages/llmodels/forms/index.tsx
@@ -158,7 +158,7 @@ const DataForm: React.FC<DataFormProps> = forwardRef((props, ref) => {
       return {
         gpu_selector: {
           gpu_ids: [gpuids[0]],
-          gpus_per_replica: -1
+          gpus_per_replica: null
         }
       };
     }
@@ -352,7 +352,7 @@ const DataForm: React.FC<DataFormProps> = forwardRef((props, ref) => {
         name="deployModel"
         form={form}
         onFinish={handleOk}
-        preserve={true}
+        preserve={false}
         clearOnDestroy={true}
         onValuesChange={handleOnValuesChange}
         onFinishFailed={handleOnFinishFailed}

diff --git a/src/pages/llmodels/forms/kv-cache.tsx b/src/pages/llmodels/forms/kv-cache.tsx
@@ -2,7 +2,7 @@ import CheckboxField from '@/components/seal-form/checkbox-field';
 import SealInputNumber from '@/components/seal-form/input-number';
 import { useIntl } from '@umijs/max';
 import { Form } from 'antd';
-import { useMemo } from 'react';
+import { useMemo, useRef } from 'react';
 import { backendOptionsMap } from '../config/backend-parameters';
 import { useFormContext } from '../config/form-context';
 import { FormData } from '../config/types';
@@ -13,18 +13,20 @@ const KVCacheForm = () => {
   const { onValuesChange, backendOptions } = useFormContext();
   const kvCacheEnabled = Form.useWatch(['extended_kv_cache', 'enabled'], form);
   const backend = Form.useWatch('backend', form);
+  const configCacheRef = useRef<any>({});
 
   const handleOnChange = async (e: any) => {
-    const extendedKVCache = form.getFieldValue('extended_kv_cache');
     if (e.target.checked) {
       form.setFieldsValue({
         extended_kv_cache: {
           enabled: true,
-          chunk_size: extendedKVCache?.chunk_size,
-          ram_ratio: extendedKVCache?.ram_ratio || 1.2,
-          ram_size: extendedKVCache?.ram_size
+          chunk_size: configCacheRef.current?.chunk_size,
+          ram_ratio: configCacheRef.current?.ram_ratio || 1.2,
+          ram_size: configCacheRef.current?.ram_size
         }
       });
+    } else {
+      configCacheRef.current = form.getFieldValue('extended_kv_cache');
     }
     await new Promise((resolve) => {
       setTimeout(resolve, 200);

diff --git a/src/pages/llmodels/forms/schedule-type.tsx b/src/pages/llmodels/forms/schedule-type.tsx
@@ -77,7 +77,7 @@ const ScheduleTypeForm: React.FC = () => {
       return;
     }
     if (value === ScheduleValueMap.Manual) {
-      form.setFieldValue(['gpu_selector', 'gpus_per_replica'], -1);
+      form.setFieldValue(['gpu_selector', 'gpus_per_replica'], null);
     }
   };
 
@@ -180,10 +180,11 @@ const ScheduleTypeForm: React.FC = () => {
                 label={intl.formatMessage({
                   id: 'models.form.gpusperreplica'
                 })}
+                allowNull
                 options={[
                   {
                     label: intl.formatMessage({ id: 'common.options.auto' }),
-                    value: -1
+                    value: null
                   },
                   { label: '1', value: 1 },
                   { label: '2', value: 2 },