feat: delegate the prompt formatting to ollama

yousshim · yousshim · commit b465ea2c710a · 2025-03-31T00:59:24.000+02:00
diff --git a/package.json b/package.json
@@ -83,37 +83,6 @@
           },
           "inference.model": {
             "type": "string",
-            "enum": [
-              "stable-code:3b-code-q4_0",
-              "codellama:7b-code-q4_K_S",
-              "codellama:7b-code-q4_K_M",
-              "codellama:7b-code-q6_K",
-              "codellama:7b-code-fp16",
-              "codellama:13b-code-q4_K_S",
-              "codellama:13b-code-q4_K_M",
-              "codellama:13b-code-q6_K",
-              "codellama:13b-code-fp16",
-              "codellama:34b-code-q4_K_S",
-              "codellama:34b-code-q4_K_M",
-              "codellama:34b-code-q6_K",
-              "codellama:70b-code-q4_K_S",
-              "codellama:70b-code-q4_K_M",
-              "codellama:70b-code-q6_K",
-              "codellama:70b-code-fp16",
-              "deepseek-coder:1.3b-base-q4_0",
-              "deepseek-coder:1.3b-base-q4_1",
-              "deepseek-coder:1.3b-base-q8_0",
-              "deepseek-coder:6.7b-base-q4_K_S",
-              "deepseek-coder:6.7b-base-q4_K_M",
-              "deepseek-coder:6.7b-base-q5_K_S",
-              "deepseek-coder:6.7b-base-q5_K_M",
-              "deepseek-coder:6.7b-base-q8_0",
-              "deepseek-coder:6.7b-base-fp16",
-              "deepseek-coder:33b-base-q4_K_S",
-              "deepseek-coder:33b-base-q4_K_M",
-              "deepseek-coder:33b-base-fp16",
-              "custom"
-            ],
             "default": "stable-code:3b-code-q4_0",
             "description": "Inference model to use",
             "order": 2
@@ -124,23 +93,6 @@
             "description": "Temperature of the model. Increasing the temperature will make the model answer more creatively.",
             "order": 3
           },
-          "inference.custom.model": {
-            "type": "string",
-            "default": "",
-            "description": "Custom model name",
-            "order": 4
-          },
-          "inference.custom.format": {
-            "type": "string",
-            "enum": [
-              "stable-code",
-              "codellama",
-              "deepseek"
-            ],
-            "default": "stable-code",
-            "description": "Custom model prompt format",
-            "order": 5
-          },
           "inference.maxLines": {
             "type": "number",
             "default": 16,
diff --git a/src/config.ts b/src/config.ts
@@ -1,5 +1,4 @@
 import vscode from 'vscode';
-import { ModelFormat } from './prompts/processors/models';
 
 class Config {
 
@@ -24,17 +23,6 @@ class Config {
 
         // Load model
         let modelName = config.get('model') as string;
-        let modelFormat: ModelFormat = 'codellama';
-        if (modelName === 'custom') {
-            modelName = config.get('custom.model') as string;
-            modelFormat = config.get('cutom.format') as ModelFormat;
-        } else {
-            if (modelName.startsWith('deepseek-coder')) {
-                modelFormat = 'deepseek';
-            } else if (modelName.startsWith('stable-code')) {
-                modelFormat = 'stable-code';
-            }
-        }
 
         let delay = config.get('delay') as number;
 
@@ -45,7 +33,6 @@ class Config {
             maxTokens,
             temperature,
             modelName,
-            modelFormat,
             delay
         };
     }
diff --git a/src/prompts/autocomplete.ts b/src/prompts/autocomplete.ts
@@ -1,13 +1,11 @@
 import { ollamaTokenGenerator } from '../modules/ollamaTokenGenerator';
 import { countSymbol } from '../modules/text';
 import { info } from '../modules/log';
-import { ModelFormat, adaptPrompt } from './processors/models';
 
 export async function autocomplete(args: {
     endpoint: string,
     bearerToken: string,
     model: string,
-    format: ModelFormat,
     prefix: string,
     suffix: string,
     maxLines: number,
@@ -16,15 +14,13 @@ export async function autocomplete(args: {
     canceled?: () => boolean,
 }): Promise<string> {
 
-    let prompt = adaptPrompt({ prefix: args.prefix, suffix: args.suffix, format: args.format });
-
     // Calculate arguments
     let data = {
         model: args.model,
-        prompt: prompt.prompt,
+        prompt: args.prefix,
+        suffix: args.suffix,
         raw: true,
         options: {
-            stop: prompt.stop,
             num_predict: args.maxTokens,
             temperature: args.temperature
         }
diff --git a/src/prompts/processors/models.ts b/src/prompts/processors/models.ts
diff --git a/src/prompts/provider.ts b/src/prompts/provider.ts
@@ -163,7 +163,6 @@ export class PromptProvider implements vscode.InlineCompletionItemProvider {
                             endpoint: inferenceConfig.endpoint,
                             bearerToken: inferenceConfig.bearerToken,
                             model: inferenceConfig.modelName,
-                            format: inferenceConfig.modelFormat,
                             maxLines: inferenceConfig.maxLines,
                             maxTokens: inferenceConfig.maxTokens,
                             temperature: inferenceConfig.temperature,