FEATURE: Enhance LLM context window settings (#1271)

keegangeorge · web-flow · commit e2b028733303 · 2025-04-17T14:44:15.000-07:00
### 🔍 Overview
This update performs some enhancements to the LLM configuration screen. In particular, it renames the UI for the number of tokens for the prompt to "Context window" since the naming can be confusing to the user. Additionally, it adds a new optional field called "Max output tokens".
diff --git a/app/controllers/discourse_ai/admin/ai_llms_controller.rb b/app/controllers/discourse_ai/admin/ai_llms_controller.rb
@@ -157,6 +157,7 @@ def ai_llm_params(updating: nil)
             :provider,
             :tokenizer,
             :max_prompt_tokens,
+            :max_output_tokens,
             :api_key,
             :enabled_chat_bot,
             :vision_enabled,
diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
@@ -13,6 +13,7 @@ class LlmModel < ActiveRecord::Base
   validates :url, presence: true, unless: -> { provider == BEDROCK_PROVIDER_NAME }
   validates_presence_of :name, :api_key
   validates :max_prompt_tokens, numericality: { greater_than: 0 }
+  validates :max_output_tokens, numericality: { greater_than_or_equal_to: 0 }, allow_nil: true
   validate :required_provider_params
   scope :in_use,
         -> do
@@ -183,4 +184,5 @@ def required_provider_params
 #  enabled_chat_bot  :boolean          default(FALSE), not null
 #  provider_params   :jsonb
 #  vision_enabled    :boolean          default(FALSE), not null
+#  max_output_tokens :integer
 #
diff --git a/app/serializers/llm_model_serializer.rb b/app/serializers/llm_model_serializer.rb
@@ -11,6 +11,7 @@ class LlmModelSerializer < ApplicationSerializer
              :name,
              :provider,
              :max_prompt_tokens,
+             :max_output_tokens,
              :tokenizer,
              :api_key,
              :url,
diff --git a/assets/javascripts/discourse/admin/models/ai-llm.js b/assets/javascripts/discourse/admin/models/ai-llm.js
@@ -10,6 +10,7 @@ export default class AiLlm extends RestModel {
       "provider",
       "tokenizer",
       "max_prompt_tokens",
+      "max_output_tokens",
       "url",
       "api_key",
       "enabled_chat_bot",
diff --git a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
@@ -40,6 +40,7 @@ export default class AiLlmEditorForm extends Component {
 
       return {
         max_prompt_tokens: modelInfo.tokens,
+        max_output_tokens: modelInfo.max_output_tokens,
         tokenizer: info.tokenizer,
         url: modelInfo.endpoint || info.endpoint,
         display_name: modelInfo.display_name,
@@ -53,6 +54,7 @@ export default class AiLlmEditorForm extends Component {
 
     return {
       max_prompt_tokens: model.max_prompt_tokens,
+      max_output_tokens: model.max_output_tokens,
       api_key: model.api_key,
       tokenizer: model.tokenizer,
       url: model.url,
@@ -183,8 +185,18 @@ export default class AiLlmEditorForm extends Component {
     this.isSaving = true;
     const isNew = this.args.model.isNew;
 
+    const updatedData = {
+      ...data,
+    };
+
+    // If max_prompt_tokens input is cleared,
+    // we want the db to store null
+    if (!data.max_output_tokens) {
+      updatedData.max_output_tokens = null;
+    }
+
     try {
-      await this.args.model.save(data);
+      await this.args.model.save(updatedData);
 
       if (isNew) {
         this.args.llms.addObject(this.args.model);
@@ -399,6 +411,16 @@ export default class AiLlmEditorForm extends Component {
           <field.Input @type="number" step="any" min="0" lang="en" />
         </form.Field>
 
+        <form.Field
+          @name="max_output_tokens"
+          @title={{i18n "discourse_ai.llms.max_output_tokens"}}
+          @tooltip={{i18n "discourse_ai.llms.hints.max_output_tokens"}}
+          @format="large"
+          as |field|
+        >
+          <field.Input @type="number" step="any" min="0" lang="en" />
+        </form.Field>
+
         <form.Field
           @name="vision_enabled"
           @title={{i18n "discourse_ai.llms.vision_enabled"}}
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
@@ -397,7 +397,8 @@ en:
         name: "Model id"
         provider: "Provider"
         tokenizer: "Tokenizer"
-        max_prompt_tokens: "Number of tokens for the prompt"
+        max_prompt_tokens: "Context window"
+        max_output_tokens: "Max output tokens"
         url: "URL of the service hosting the model"
         api_key: "API Key of the service hosting the model"
         enabled_chat_bot: "Allow AI bot selector"
@@ -480,7 +481,8 @@ en:
           failure: "Trying to contact the model returned this error: %{error}"
 
         hints:
-          max_prompt_tokens: "Max numbers of tokens for the prompt. As a rule of thumb, this should be 50% of the model's context window."
+          max_prompt_tokens: "The maximum number of tokens the model can process in a single request"
+          max_output_tokens: "The maximum number of tokens the model can generate in a single request"
           display_name: "The name used to reference this model across your site's interface."
           name: "We include this in the API call to specify which model we'll use"
           vision_enabled: "If enabled, the AI will attempt to understand images. It depends on the model being used supporting vision. Supported by latest models from Anthropic, Google, and OpenAI."
diff --git a/db/migrate/20250417194503_add_max_output_tokens_to_llm_model.rb b/db/migrate/20250417194503_add_max_output_tokens_to_llm_model.rb
@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+
+class AddMaxOutputTokensToLlmModel < ActiveRecord::Migration[7.2]
+  def change
+    add_column :llm_models, :max_output_tokens, :integer
+  end
+end
diff --git a/spec/system/llms/ai_llm_spec.rb b/spec/system/llms/ai_llm_spec.rb
@@ -53,6 +53,7 @@
     form.field("max_prompt_tokens").fill_in(8000)
     form.field("provider").select("vllm")
     form.field("tokenizer").select("DiscourseAi::Tokenizer::Llama3Tokenizer")
+    form.field("max_output_tokens").fill_in(2000)
     form.field("vision_enabled").toggle
     form.field("enabled_chat_bot").toggle
     form.submit
@@ -67,6 +68,7 @@
     expect(llm.tokenizer).to eq("DiscourseAi::Tokenizer::Llama3Tokenizer")
     expect(llm.max_prompt_tokens.to_i).to eq(8000)
     expect(llm.provider).to eq("vllm")
+    expect(llm.max_output_tokens.to_i).to eq(2000)
     expect(llm.vision_enabled).to eq(true)
     expect(llm.user_id).not_to be_nil
   end