ibm-skills-network
diff --git a/‎.secrets.baseline‎
Lines changed: 1 addition & 19 deletions b/‎.secrets.baseline‎
Lines changed: 1 addition & 19 deletions
diff --git a/‎apps/api/package.json‎
Lines changed: 3 additions & 2 deletions b/‎apps/api/package.json‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql‎
Lines changed: 26 additions & 0 deletions b/‎apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎apps/api/prisma/migrations/20251020072748_add_new_watsonx_models/migration.sql‎
Lines changed: 43 additions & 0 deletions b/‎apps/api/prisma/migrations/20251020072748_add_new_watsonx_models/migration.sql‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎apps/api/prisma/migrations/20251020080828_add_granite_vision_model/migration.sql‎
Lines changed: 23 additions & 0 deletions b/‎apps/api/prisma/migrations/20251020080828_add_granite_vision_model/migration.sql‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎apps/api/src/api/assignment/v2/services/translation.service.ts‎
Lines changed: 72 additions & 8 deletions b/‎apps/api/src/api/assignment/v2/services/translation.service.ts‎
Lines changed: 72 additions & 8 deletions
diff --git a/‎apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts‎
Lines changed: 2 additions & 0 deletions b/‎apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts‎
Lines changed: 2 additions & 0 deletions
@@ -3,7 +3,7 @@
     "files": null,
     "lines": null
   },
-  "generated_at": "2025-10-09T13:24:14Z",
+  "generated_at": "2025-10-20T16:35:31Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -311,24 +311,6 @@
         "verified_result": null
       }
     ],
-    "apps/web/lib/author.ts": [
-      {
-        "hashed_secret": "d3ecb0d890368d7659ee54010045b835dacb8efe",
-        "is_verified": false,
-        "line_number": 1430,
-        "type": "Secret Keyword",
-        "verified_result": null
-      }
-    ],
-    "apps/web/lib/shared.ts": [
-      {
-        "hashed_secret": "d3ecb0d890368d7659ee54010045b835dacb8efe",
-        "is_verified": false,
-        "line_number": 1051,
-        "type": "Secret Keyword",
-        "verified_result": null
-      }
-    ],
     "apps/web/public/ffmpeg-core/ffmpeg-core.js": [
       {
         "hashed_secret": "b4e44716dbbf57be3dae2f819d96795a85d06652",
 
@@ -32,6 +32,7 @@
   },
   "dependencies": {
     "@dqbd/tiktoken": "^1.0.7",
+    "@ibm-cloud/watsonx-ai": "^1.7.0",
     "@instana/collector": "^4.14.0",
     "@langchain/community": "^0.3.56",
     "@langchain/core": "^0.3.77",
@@ -86,8 +87,8 @@
     "pg": "^8.11.1",
     "prisma": "^5.3.1",
     "redis": "^4.7.0",
-    "reflect-metadata": "^0.2.0",
-    "rxjs": "^7.8.1",
+    "reflect-metadata": "0.2.0",
+    "rxjs": "7.8.1",
     "sn-messaging-ts-client": "^0.4.0",
     "socket.io": "^4.8.0",
     "unzipper": "^0.12.3",
 
@@ -0,0 +1,26 @@
+-- Add IBM foundation model variant to the LLMModel table
+-- This migration adds the IBM foundation model GPT-oss-120b
+
+INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
+('gpt-oss-120b', 'GPT-OSS-120B', 'OpenAI', true, NOW(), NOW());
+
+-- Add initial pricing data for the new GPT model
+-- Note: These are estimated prices, adjust based on actual OpenAI pricing when available
+WITH new_models AS (
+  SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" IN ('gpt-oss-120b')
+)
+INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
+SELECT 
+  m.id,
+  CASE 
+    WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.00000015
+  END,
+  CASE 
+    WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.0000006
+  END,
+  NOW(),
+  'MANUAL',
+  true,
+  NOW(),
+  NOW()
+FROM new_models m;
@@ -0,0 +1,43 @@
+-- Add new WatsonX AI models to the LLMModel table
+-- This migration adds Granite, Llama 3.3, Llama 4 Maverick, and Mistral Medium models
+
+INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
+('granite-4-h-small', 'Granite 4-H Small', 'IBM', true, NOW(), NOW()),
+('llama-3-3-70b-instruct', 'Llama 3.3 70B Instruct', 'Meta', true, NOW(), NOW()),
+('llama-4-maverick', 'Llama 4 Maverick 17B', 'Meta', true, NOW(), NOW()),
+('mistral-medium-2505', 'Mistral Medium 2505', 'Mistral AI', true, NOW(), NOW());
+
+-- Add initial pricing data for the new models
+-- Note: Pricing based on WatsonX Resource Unit (RU) model where 1 RU = 1,000 tokens
+-- Granite models (Class 1): $0.0006/RU = $0.0000006 per token
+-- Llama/Mistral models (Class 2): $0.0018/RU = $0.0000018 per token
+-- Input and output tokens are charged at the same rate in WatsonX
+WITH new_models AS (
+  SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" IN (
+    'granite-4-h-small',
+    'llama-3-3-70b-instruct',
+    'llama-4-maverick',
+    'mistral-medium-2505'
+  )
+)
+INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
+SELECT
+  m.id,
+  CASE
+    WHEN m."modelKey" = 'granite-4-h-small' THEN 0.0000006
+    WHEN m."modelKey" = 'llama-3-3-70b-instruct' THEN 0.0000018
+    WHEN m."modelKey" = 'llama-4-maverick' THEN 0.0000018
+    WHEN m."modelKey" = 'mistral-medium-2505' THEN 0.0000018
+  END,
+  CASE
+    WHEN m."modelKey" = 'granite-4-h-small' THEN 0.0000006
+    WHEN m."modelKey" = 'llama-3-3-70b-instruct' THEN 0.0000018
+    WHEN m."modelKey" = 'llama-4-maverick' THEN 0.0000018
+    WHEN m."modelKey" = 'mistral-medium-2505' THEN 0.0000018
+  END,
+  NOW(),
+  'MANUAL',
+  true,
+  NOW(),
+  NOW()
+FROM new_models m;
@@ -0,0 +1,23 @@
+-- Add Granite Vision 3.2 2B model to the LLMModel table
+-- This migration adds IBM's multimodal Granite Vision model for image grading
+
+INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
+('granite-vision-3-2-2b', 'Granite Vision 3.2 2B', 'IBM', true, NOW(), NOW());
+
+-- Add initial pricing data for the Granite Vision model
+-- Note: Pricing based on WatsonX vision model rates
+-- Vision models typically cost more than text-only models due to image processing
+WITH new_model AS (
+  SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" = 'granite-vision-3-2-2b'
+)
+INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
+SELECT
+  m.id,
+  0.000001,  -- Input token price (slightly higher than granite-4-h-small due to vision capability)
+  0.000001,  -- Output token price (same as input for vision models)
+  NOW(),
+  'MANUAL',
+  true,
+  NOW(),
+  NOW()
+FROM new_model m;
@@ -1,8 +1,10 @@
 /* eslint-disable unicorn/no-null */
-import { Injectable, Logger, NotFoundException } from "@nestjs/common";
+import { Inject, Injectable, Logger, NotFoundException } from "@nestjs/common";
 import { Prisma } from "@prisma/client";
 import Bottleneck from "bottleneck";
 import { LlmFacadeService } from "src/api/llm/llm-facade.service";
+import { LLM_RESOLVER_SERVICE } from "src/api/llm/llm.constants";
+import { LLMResolverService } from "src/api/llm/core/services/llm-resolver.service";
 import { PrismaService } from "src/database/prisma.service";
 import {
   getAllLanguageCodes,
@@ -61,7 +63,9 @@ interface BatchProcessResult {
 export class TranslationService {
   private readonly logger = new Logger(TranslationService.name);
   private readonly languageTranslation: boolean;
-  private readonly limiter: Bottleneck;
+  private readonly limiter: Bottleneck; // default high-throughput
+  private readonly watsonxLimiter: Bottleneck; // conservative for IBM Watsonx
+  private useWatsonxLimiterForTranslation = false;
 
   // Performance optimized settings
   private readonly MAX_BATCH_SIZE = 100; // Increased for better throughput
@@ -91,6 +95,8 @@ export class TranslationService {
     private readonly prisma: PrismaService,
     private readonly llmFacadeService: LlmFacadeService,
     private readonly jobStatusService: JobStatusServiceV2,
+    @Inject(LLM_RESOLVER_SERVICE)
+    private readonly llmResolver: LLMResolverService,
   ) {
     this.languageTranslation =
       process.env.ENABLE_TRANSLATION.toString().toLowerCase() === "true" ||
@@ -105,10 +111,62 @@ export class TranslationService {
       strategy: Bottleneck.strategy.OVERFLOW,
       timeout: this.OPERATION_TIMEOUT,
     });
+    // More conservative limiter for Watsonx-backed translations
+    this.watsonxLimiter = new Bottleneck({
+      maxConcurrent: 8,
+      minTime: 50, // ~20 rps spacing
+      reservoir: 20,
+      reservoirRefreshInterval: 1000,
+      reservoirRefreshAmount: 20,
+      highWater: 1000,
+      strategy: Bottleneck.strategy.OVERFLOW,
+      timeout: this.OPERATION_TIMEOUT,
+    });
     setInterval(() => this.checkLimiterHealth(), 30_000);
     setInterval(() => this.checkJobTimeouts(), 60_000); // Check every minute
   }
 
+  /**
+   * Decide which limiter to use based on current translation model assignment
+   */
+  private async syncLimiterForTranslationModel(): Promise<void> {
+    try {
+      const modelKey = await this.llmResolver.getModelKeyWithFallback(
+        "translation",
+        "gpt-4o-mini",
+      );
+      const isWatsonx = this.isWatsonxModel(modelKey);
+      if (isWatsonx !== this.useWatsonxLimiterForTranslation) {
+        this.useWatsonxLimiterForTranslation = isWatsonx;
+        this.logger.debug(
+          `Translation limiter set to ${isWatsonx ? "Watsonx profile" : "default profile"} (model: ${modelKey})`,
+        );
+      }
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      this.logger.warn(
+        `Could not resolve translation model; using default limiter. Reason: ${message}`,
+      );
+      this.useWatsonxLimiterForTranslation = false;
+    }
+  }
+
+  private isWatsonxModel(modelKey: string): boolean {
+    if (!modelKey) return false;
+    return (
+      modelKey.startsWith("granite-") ||
+      modelKey.startsWith("gpt-oss-") ||
+      modelKey === "llama-3-3-70b-instruct" ||
+      modelKey === "llama-4-maverick"
+    );
+  }
+
+  private getActiveLimiter(): Bottleneck {
+    return this.useWatsonxLimiterForTranslation
+      ? this.watsonxLimiter
+      : this.limiter;
+  }
+
   /**
    * Process translations in parallel with efficient batching
    * @param items Items to translate
@@ -133,7 +191,7 @@ export class TranslationService {
       const chunk = chunks[chunkIndex];
 
       const processingPromises = chunk.map((item) =>
-        this.limiter
+        this.getActiveLimiter()
           .schedule({ expiration: 15_000, priority: 5 }, () =>
             batchProcessor(item),
           )
@@ -582,7 +640,8 @@ export class TranslationService {
    */
   private checkLimiterHealth(): void {
     try {
-      const counts = this.limiter.counts();
+      const limiter = this.getActiveLimiter();
+      const counts = limiter.counts();
 
       if (
         counts.RUNNING > 10 &&
@@ -600,10 +659,10 @@ export class TranslationService {
         this.logger.warn(
           `High queue load: ${counts.QUEUED} jobs queued. Reducing accepting rate.`,
         );
-        this.limiter.updateSettings({ maxConcurrent: 5 });
+        limiter.updateSettings({ maxConcurrent: 5 });
 
         setTimeout(() => {
-          this.limiter.updateSettings({ maxConcurrent: 25 });
+          limiter.updateSettings({ maxConcurrent: 25 });
           this.logger.log("Restored normal concurrency limits");
         }, 30_000);
       }
@@ -623,8 +682,9 @@ export class TranslationService {
         "Resetting bottleneck limiter due to potential stalled state",
       );
 
-      void this.limiter.stop({ dropWaitingJobs: false }).then(() => {
-        this.limiter.updateSettings({
+      const limiter = this.getActiveLimiter();
+      void limiter.stop({ dropWaitingJobs: false }).then(() => {
+        limiter.updateSettings({
           maxConcurrent: 25,
           minTime: 10,
           reservoir: 100,
@@ -1018,6 +1078,7 @@ export class TranslationService {
         )
       : undefined;
 
+    await this.syncLimiterForTranslationModel();
     await this.processBatchesInParallel(
       languageCodes,
       async (lang: string) => {
@@ -1227,6 +1288,7 @@ export class TranslationService {
         )
       : undefined;
 
+    await this.syncLimiterForTranslationModel();
     const results = await this.processBatchesInParallel(
       supportedLanguages,
       async (lang: string) => {
@@ -1404,6 +1466,7 @@ export class TranslationService {
       }
     }
 
+    await this.syncLimiterForTranslationModel();
     const results = await this.processBatchesInParallel(
       supportedLanguages,
       async (lang: string) => {
@@ -1566,6 +1629,7 @@ export class TranslationService {
       });
     }
 
+    await this.syncLimiterForTranslationModel();
     const results = await this.processBatchesInParallel(
       supportedLanguages,
       async (lang: string) => {
 
@@ -24,7 +24,9 @@ export interface ILlmProvider {
     options?: LlmRequestOptions,
   ): Promise<LlmResponse>;
   readonly key: string;
+}
 
+export interface IMultimodalLlmProvider extends ILlmProvider {
   /**
    * Send a request with image content to the LLM
    */