Skip to content

Commit aea11a9

Browse files
authored
Behzad changes (#137)
* Behzad changes * updating packages * adding new llm models (watsonx) * update data transformation bug fixes * adding templates
1 parent a4db4f7 commit aea11a9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2005
-331
lines changed

.secrets.baseline

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"files": null,
44
"lines": null
55
},
6-
"generated_at": "2025-10-09T13:24:14Z",
6+
"generated_at": "2025-10-20T16:35:31Z",
77
"plugins_used": [
88
{
99
"name": "AWSKeyDetector"
@@ -311,24 +311,6 @@
311311
"verified_result": null
312312
}
313313
],
314-
"apps/web/lib/author.ts": [
315-
{
316-
"hashed_secret": "d3ecb0d890368d7659ee54010045b835dacb8efe",
317-
"is_verified": false,
318-
"line_number": 1430,
319-
"type": "Secret Keyword",
320-
"verified_result": null
321-
}
322-
],
323-
"apps/web/lib/shared.ts": [
324-
{
325-
"hashed_secret": "d3ecb0d890368d7659ee54010045b835dacb8efe",
326-
"is_verified": false,
327-
"line_number": 1051,
328-
"type": "Secret Keyword",
329-
"verified_result": null
330-
}
331-
],
332314
"apps/web/public/ffmpeg-core/ffmpeg-core.js": [
333315
{
334316
"hashed_secret": "b4e44716dbbf57be3dae2f819d96795a85d06652",

apps/api/package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
},
3333
"dependencies": {
3434
"@dqbd/tiktoken": "^1.0.7",
35+
"@ibm-cloud/watsonx-ai": "^1.7.0",
3536
"@instana/collector": "^4.14.0",
3637
"@langchain/community": "^0.3.56",
3738
"@langchain/core": "^0.3.77",
@@ -86,8 +87,8 @@
8687
"pg": "^8.11.1",
8788
"prisma": "^5.3.1",
8889
"redis": "^4.7.0",
89-
"reflect-metadata": "^0.2.0",
90-
"rxjs": "^7.8.1",
90+
"reflect-metadata": "0.2.0",
91+
"rxjs": "7.8.1",
9192
"sn-messaging-ts-client": "^0.4.0",
9293
"socket.io": "^4.8.0",
9394
"unzipper": "^0.12.3",
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
-- Add IBM foundation model variant to the LLMModel table
2+
-- This migration adds the IBM foundation model GPT-oss-120b
3+
4+
INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
5+
('gpt-oss-120b', 'GPT-OSS-120B', 'OpenAI', true, NOW(), NOW());
6+
7+
-- Add initial pricing data for the new GPT model
8+
-- Note: These are estimated prices, adjust based on actual OpenAI pricing when available
9+
WITH new_models AS (
10+
SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" IN ('gpt-oss-120b')
11+
)
12+
INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
13+
SELECT
14+
m.id,
15+
CASE
16+
WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.00000015
17+
END,
18+
CASE
19+
WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.0000006
20+
END,
21+
NOW(),
22+
'MANUAL',
23+
true,
24+
NOW(),
25+
NOW()
26+
FROM new_models m;
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
-- Add new WatsonX AI models to the LLMModel table
2+
-- This migration adds Granite, Llama 3.3, Llama 4 Maverick, and Mistral Medium models
3+
4+
INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
5+
('granite-4-h-small', 'Granite 4-H Small', 'IBM', true, NOW(), NOW()),
6+
('llama-3-3-70b-instruct', 'Llama 3.3 70B Instruct', 'Meta', true, NOW(), NOW()),
7+
('llama-4-maverick', 'Llama 4 Maverick 17B', 'Meta', true, NOW(), NOW()),
8+
('mistral-medium-2505', 'Mistral Medium 2505', 'Mistral AI', true, NOW(), NOW());
9+
10+
-- Add initial pricing data for the new models
11+
-- Note: Pricing based on WatsonX Resource Unit (RU) model where 1 RU = 1,000 tokens
12+
-- Granite models (Class 1): $0.0006/RU = $0.0000006 per token
13+
-- Llama/Mistral models (Class 2): $0.0018/RU = $0.0000018 per token
14+
-- Input and output tokens are charged at the same rate in WatsonX
15+
WITH new_models AS (
16+
SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" IN (
17+
'granite-4-h-small',
18+
'llama-3-3-70b-instruct',
19+
'llama-4-maverick',
20+
'mistral-medium-2505'
21+
)
22+
)
23+
INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
24+
SELECT
25+
m.id,
26+
CASE
27+
WHEN m."modelKey" = 'granite-4-h-small' THEN 0.0000006
28+
WHEN m."modelKey" = 'llama-3-3-70b-instruct' THEN 0.0000018
29+
WHEN m."modelKey" = 'llama-4-maverick' THEN 0.0000018
30+
WHEN m."modelKey" = 'mistral-medium-2505' THEN 0.0000018
31+
END,
32+
CASE
33+
WHEN m."modelKey" = 'granite-4-h-small' THEN 0.0000006
34+
WHEN m."modelKey" = 'llama-3-3-70b-instruct' THEN 0.0000018
35+
WHEN m."modelKey" = 'llama-4-maverick' THEN 0.0000018
36+
WHEN m."modelKey" = 'mistral-medium-2505' THEN 0.0000018
37+
END,
38+
NOW(),
39+
'MANUAL',
40+
true,
41+
NOW(),
42+
NOW()
43+
FROM new_models m;
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
-- Add Granite Vision 3.2 2B model to the LLMModel table
2+
-- This migration adds IBM's multimodal Granite Vision model for image grading
3+
4+
INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
5+
('granite-vision-3-2-2b', 'Granite Vision 3.2 2B', 'IBM', true, NOW(), NOW());
6+
7+
-- Add initial pricing data for the Granite Vision model
8+
-- Note: Pricing based on WatsonX vision model rates
9+
-- Vision models typically cost more than text-only models due to image processing
10+
WITH new_model AS (
11+
SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" = 'granite-vision-3-2-2b'
12+
)
13+
INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
14+
SELECT
15+
m.id,
16+
0.000001, -- Input token price (slightly higher than granite-4-h-small due to vision capability)
17+
0.000001, -- Output token price (same as input for vision models)
18+
NOW(),
19+
'MANUAL',
20+
true,
21+
NOW(),
22+
NOW()
23+
FROM new_model m;

apps/api/src/api/assignment/v2/services/translation.service.ts

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
/* eslint-disable unicorn/no-null */
2-
import { Injectable, Logger, NotFoundException } from "@nestjs/common";
2+
import { Inject, Injectable, Logger, NotFoundException } from "@nestjs/common";
33
import { Prisma } from "@prisma/client";
44
import Bottleneck from "bottleneck";
55
import { LlmFacadeService } from "src/api/llm/llm-facade.service";
6+
import { LLM_RESOLVER_SERVICE } from "src/api/llm/llm.constants";
7+
import { LLMResolverService } from "src/api/llm/core/services/llm-resolver.service";
68
import { PrismaService } from "src/database/prisma.service";
79
import {
810
getAllLanguageCodes,
@@ -61,7 +63,9 @@ interface BatchProcessResult {
6163
export class TranslationService {
6264
private readonly logger = new Logger(TranslationService.name);
6365
private readonly languageTranslation: boolean;
64-
private readonly limiter: Bottleneck;
66+
private readonly limiter: Bottleneck; // default high-throughput
67+
private readonly watsonxLimiter: Bottleneck; // conservative for IBM Watsonx
68+
private useWatsonxLimiterForTranslation = false;
6569

6670
// Performance optimized settings
6771
private readonly MAX_BATCH_SIZE = 100; // Increased for better throughput
@@ -91,6 +95,8 @@ export class TranslationService {
9195
private readonly prisma: PrismaService,
9296
private readonly llmFacadeService: LlmFacadeService,
9397
private readonly jobStatusService: JobStatusServiceV2,
98+
@Inject(LLM_RESOLVER_SERVICE)
99+
private readonly llmResolver: LLMResolverService,
94100
) {
95101
this.languageTranslation =
96102
process.env.ENABLE_TRANSLATION.toString().toLowerCase() === "true" ||
@@ -105,10 +111,62 @@ export class TranslationService {
105111
strategy: Bottleneck.strategy.OVERFLOW,
106112
timeout: this.OPERATION_TIMEOUT,
107113
});
114+
// More conservative limiter for Watsonx-backed translations
115+
this.watsonxLimiter = new Bottleneck({
116+
maxConcurrent: 8,
117+
minTime: 50, // ~20 rps spacing
118+
reservoir: 20,
119+
reservoirRefreshInterval: 1000,
120+
reservoirRefreshAmount: 20,
121+
highWater: 1000,
122+
strategy: Bottleneck.strategy.OVERFLOW,
123+
timeout: this.OPERATION_TIMEOUT,
124+
});
108125
setInterval(() => this.checkLimiterHealth(), 30_000);
109126
setInterval(() => this.checkJobTimeouts(), 60_000); // Check every minute
110127
}
111128

129+
/**
130+
* Decide which limiter to use based on current translation model assignment
131+
*/
132+
private async syncLimiterForTranslationModel(): Promise<void> {
133+
try {
134+
const modelKey = await this.llmResolver.getModelKeyWithFallback(
135+
"translation",
136+
"gpt-4o-mini",
137+
);
138+
const isWatsonx = this.isWatsonxModel(modelKey);
139+
if (isWatsonx !== this.useWatsonxLimiterForTranslation) {
140+
this.useWatsonxLimiterForTranslation = isWatsonx;
141+
this.logger.debug(
142+
`Translation limiter set to ${isWatsonx ? "Watsonx profile" : "default profile"} (model: ${modelKey})`,
143+
);
144+
}
145+
} catch (error) {
146+
const message = error instanceof Error ? error.message : String(error);
147+
this.logger.warn(
148+
`Could not resolve translation model; using default limiter. Reason: ${message}`,
149+
);
150+
this.useWatsonxLimiterForTranslation = false;
151+
}
152+
}
153+
154+
private isWatsonxModel(modelKey: string): boolean {
155+
if (!modelKey) return false;
156+
return (
157+
modelKey.startsWith("granite-") ||
158+
modelKey.startsWith("gpt-oss-") ||
159+
modelKey === "llama-3-3-70b-instruct" ||
160+
modelKey === "llama-4-maverick"
161+
);
162+
}
163+
164+
private getActiveLimiter(): Bottleneck {
165+
return this.useWatsonxLimiterForTranslation
166+
? this.watsonxLimiter
167+
: this.limiter;
168+
}
169+
112170
/**
113171
* Process translations in parallel with efficient batching
114172
* @param items Items to translate
@@ -133,7 +191,7 @@ export class TranslationService {
133191
const chunk = chunks[chunkIndex];
134192

135193
const processingPromises = chunk.map((item) =>
136-
this.limiter
194+
this.getActiveLimiter()
137195
.schedule({ expiration: 15_000, priority: 5 }, () =>
138196
batchProcessor(item),
139197
)
@@ -582,7 +640,8 @@ export class TranslationService {
582640
*/
583641
private checkLimiterHealth(): void {
584642
try {
585-
const counts = this.limiter.counts();
643+
const limiter = this.getActiveLimiter();
644+
const counts = limiter.counts();
586645

587646
if (
588647
counts.RUNNING > 10 &&
@@ -600,10 +659,10 @@ export class TranslationService {
600659
this.logger.warn(
601660
`High queue load: ${counts.QUEUED} jobs queued. Reducing accepting rate.`,
602661
);
603-
this.limiter.updateSettings({ maxConcurrent: 5 });
662+
limiter.updateSettings({ maxConcurrent: 5 });
604663

605664
setTimeout(() => {
606-
this.limiter.updateSettings({ maxConcurrent: 25 });
665+
limiter.updateSettings({ maxConcurrent: 25 });
607666
this.logger.log("Restored normal concurrency limits");
608667
}, 30_000);
609668
}
@@ -623,8 +682,9 @@ export class TranslationService {
623682
"Resetting bottleneck limiter due to potential stalled state",
624683
);
625684

626-
void this.limiter.stop({ dropWaitingJobs: false }).then(() => {
627-
this.limiter.updateSettings({
685+
const limiter = this.getActiveLimiter();
686+
void limiter.stop({ dropWaitingJobs: false }).then(() => {
687+
limiter.updateSettings({
628688
maxConcurrent: 25,
629689
minTime: 10,
630690
reservoir: 100,
@@ -1018,6 +1078,7 @@ export class TranslationService {
10181078
)
10191079
: undefined;
10201080

1081+
await this.syncLimiterForTranslationModel();
10211082
await this.processBatchesInParallel(
10221083
languageCodes,
10231084
async (lang: string) => {
@@ -1227,6 +1288,7 @@ export class TranslationService {
12271288
)
12281289
: undefined;
12291290

1291+
await this.syncLimiterForTranslationModel();
12301292
const results = await this.processBatchesInParallel(
12311293
supportedLanguages,
12321294
async (lang: string) => {
@@ -1404,6 +1466,7 @@ export class TranslationService {
14041466
}
14051467
}
14061468

1469+
await this.syncLimiterForTranslationModel();
14071470
const results = await this.processBatchesInParallel(
14081471
supportedLanguages,
14091472
async (lang: string) => {
@@ -1566,6 +1629,7 @@ export class TranslationService {
15661629
});
15671630
}
15681631

1632+
await this.syncLimiterForTranslationModel();
15691633
const results = await this.processBatchesInParallel(
15701634
supportedLanguages,
15711635
async (lang: string) => {

apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ export interface ILlmProvider {
2424
options?: LlmRequestOptions,
2525
): Promise<LlmResponse>;
2626
readonly key: string;
27+
}
2728

29+
export interface IMultimodalLlmProvider extends ILlmProvider {
2830
/**
2931
* Send a request with image content to the LLM
3032
*/

0 commit comments

Comments
 (0)