Skip to content
24 changes: 24 additions & 0 deletions backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Add LLM in jobs table

Revision ID: 219033c644de
Revises: e7c68e43ce6f
Create Date: 2025-10-17 15:38:33.565674

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "219033c644de"
down_revision = "e7c68e43ce6f"
branch_labels = None
depends_on = None


def upgrade():
op.execute("ALTER TYPE jobtype ADD VALUE IF NOT EXISTS 'LLM_API'")


def downgrade():
pass
2 changes: 2 additions & 0 deletions backend/app/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
documents,
doc_transformation_job,
login,
llm,
organization,
openai_conversation,
project,
Expand All @@ -31,6 +32,7 @@
api_router.include_router(credentials.router)
api_router.include_router(documents.router)
api_router.include_router(doc_transformation_job.router)
api_router.include_router(llm.router)
api_router.include_router(login.router)
api_router.include_router(onboarding.router)
api_router.include_router(openai_conversation.router)
Expand Down
34 changes: 34 additions & 0 deletions backend/app/api/routes/llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import logging

from fastapi import APIRouter

from app.api.deps import AuthContext, SessionDep
from app.models.llm import LLMCallRequest
from app.services.llm.jobs import start_job
from app.utils import APIResponse

logger = logging.getLogger(__name__)
router = APIRouter(tags=["llm"])


@router.post("/llm/call")
async def llm_call(
request: LLMCallRequest, _session: SessionDep, _current_user: AuthContext
):
"""
Endpoint to initiate an LLM call as a background job.
"""
project_id = _current_user.project.id
organization_id = _current_user.organization.id

# Start background job
job_id = start_job(
db=_session,
request=request,
project_id=project_id,
organization_id=organization_id,
)

return APIResponse.success_response(
data={"status": "processing", "message": "LLM call job scheduled"},
)
Comment on lines 14 to 34
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Guard against missing context; return 202 and include job_id (fixes Ruff F841)

_current_user.project / .organization are optional; direct .id access can raise. Also, return 202 and expose job_id for polling.

-import logging
-
-from fastapi import APIRouter
+import logging
+
+from fastapi import APIRouter, HTTPException
@@
-@router.post("/llm/call")
-async def llm_call(
-    request: LLMCallRequest, _session: SessionDep, _current_user: AuthContext
-):
+@router.post("/llm/call", status_code=202)
+async def llm_call(
+    request: LLMCallRequest, _session: SessionDep, _current_user: AuthContext
+) -> APIResponse[dict[str, str]]:
@@
-    project_id = _current_user.project.id
-    organization_id = _current_user.organization.id
+    if _current_user.project is None or _current_user.organization is None:
+        raise HTTPException(status_code=403, detail="Project and organization context required")
+    project_id = _current_user.project.id
+    organization_id = _current_user.organization.id
@@
-    job_id = start_job(
+    job_id = start_job(
         db=_session,
         request=request,
         project_id=project_id,
         organization_id=organization_id,
     )
+    logger.info(f"[llm_call] Scheduled LLM job | job_id={job_id} project_id={project_id} org_id={organization_id}")
@@
-    return APIResponse.success_response(
-        data={"status": "processing", "message": "LLM call job scheduled"},
-    )
+    return APIResponse.success_response(
+        data={
+            "status": "processing",
+            "message": "LLM call job scheduled",
+            "job_id": str(job_id),
+        },
+    )

Note: This also resolves Ruff F841 (unused job_id). As per coding guidelines (Python 3.11+), the explicit return type is added.

Committable suggestion skipped: line range outside the PR's diff.

🧰 Tools
🪛 Ruff (0.14.1)

25-25: Local variable job_id is assigned to but never used

Remove assignment to unused variable job_id

(F841)

7 changes: 7 additions & 0 deletions backend/app/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@

from .job import Job, JobType, JobStatus, JobUpdate

from .llm import (
LLMCallRequest,
LLMCallResponse,
LLMConfig,
LLMModelSpec,
)

from .message import Message
from .model_evaluation import (
ModelEvaluation,
Expand Down
1 change: 1 addition & 0 deletions backend/app/models/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class JobStatus(str, Enum):

class JobType(str, Enum):
RESPONSE = "RESPONSE"
LLM_API = "LLM_API"


class Job(SQLModel, table=True):
Expand Down
21 changes: 21 additions & 0 deletions backend/app/models/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""LLM models module.

This module provides all data models for LLM functionality including
requests, responses, configurations, and model specifications.
"""

from app.models.llm.config import (
LLMConfig,
LLMModelSpec,
)
from app.models.llm.request import LLMCallRequest
from app.models.llm.response import LLMCallResponse

__all__ = [
# Request/Response models
"LLMCallRequest",
"LLMCallResponse",
# Configuration models
"LLMConfig",
"LLMModelSpec",
]
51 changes: 51 additions & 0 deletions backend/app/models/llm/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""LLM configuration models.

This module contains all configuration-related models for LLM requests,
including model specifications and advanced configuration options.
"""

from typing import Literal

from sqlmodel import SQLModel


class LLMModelSpec(SQLModel):
"""Specification for the LLM model and its parameters.

This contains the actual model configuration that will be sent to the provider.
Supports both standard models and advanced configurations.

Attributes:
model: Model identifier (e.g., "gpt-4", "claude-3-opus")
provider: Provider type (openai, anthropic, google, azure)
temperature: Sampling temperature (0.0-2.0)
reasoning_effort: Reasoning effort level for o-series models ("low", "medium", "high")
text_verbosity: Text verbosity level ("low", "medium", "high")
max_tokens: Maximum number of tokens to generate
top_p: Nucleus sampling parameter (0.0-1.0)
"""

model: str
provider: str = "openai"
temperature: float | None = None
reasoning_effort: Literal["low", "medium", "high"] | None = None
text_verbosity: Literal["low", "medium", "high"] | None = None
max_tokens: int | None = None
top_p: float | None = None


class LLMConfig(SQLModel):
"""LLM configuration containing model specification and prompt.

This wraps the model spec and can be extended with additional
provider-agnostic configuration in the future.

Attributes:
prompt: The user's input prompt
vector_store_id: Vector store ID for RAG functionality
llm_model_spec: Model specification and parameters
"""

prompt: str
vector_store_id: str | None = None
llm_model_spec: LLMModelSpec
23 changes: 23 additions & 0 deletions backend/app/models/llm/request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""LLM request models.
This module contains request models for LLM API calls.
"""

from sqlmodel import SQLModel

from app.models.llm.config import LLMConfig


class LLMCallRequest(SQLModel):
"""Request model for /v1/llm/call endpoint.
This model decouples LLM calls from the assistants table,
allowing dynamic configuration per request.
Attributes:
llm: LLM configuration containing model spec and prompt
max_num_results: Number of results to return from vector store file search
"""

llm: LLMConfig
max_num_results: int = 20 # For vector store file search
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Use Field descriptor for max_num_results with proper constraints.

The field should use SQLModel's Field descriptor with validation constraints and a description, rather than an inline comment. Based on the OpenAISpec (1-50 range), add appropriate bounds.

Apply this diff:

+from sqlmodel import SQLModel, Field
+
 class LLMCallRequest(SQLModel):
     ...
     llm: LLMConfig
-    max_num_results: int = 20  # For vector store file search
+    max_num_results: int = Field(
+        default=20,
+        ge=1,
+        le=50,
+        description="Number of results to return from vector store file search"
+    )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
llm: LLMConfig
max_num_results: int = 20 # For vector store file search
llm: LLMConfig
max_num_results: int = Field(
default=20,
ge=1,
le=50,
description="Number of results to return from vector store file search"
)
🤖 Prompt for AI Agents
In backend/app/models/llm/request.py around lines 22 to 23, replace the inline
comment on max_num_results with SQLModel's Field descriptor: change the
declaration to use Field(default=20, description="Max number of vector search
results", ge=1, le=50) so the model enforces the 1–50 bounds per the OpenAISpec
and preserves the default of 20.

28 changes: 28 additions & 0 deletions backend/app/models/llm/response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""LLM response models.

This module contains response models for LLM API calls.
"""
from sqlmodel import SQLModel


class LLMCallResponse(SQLModel):
"""Response model for /v1/llm/call endpoint.

Attributes:
status: Response status (success, error, etc.)
response_id: Unique identifier for this response
message: The generated text response
model: Model identifier that was used
input_tokens: Number of input tokens consumed
output_tokens: Number of output tokens generated
total_tokens: Total tokens consumed (input + output)
file_search_results: Optional list of file search results from RAG
"""

status: str
response_id: str
message: str
model: str
input_tokens: int
output_tokens: int
total_tokens: int
Comment on lines +8 to +28
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Docstring mentions missing field file_search_results.

Line 19 in the docstring mentions file_search_results: Optional list of file search results from RAG, but this field is not defined in the model. Either add the field or remove it from the docstring.

Apply this diff to add the missing field:

     input_tokens: int
     output_tokens: int
     total_tokens: int
+    file_search_results: list[dict] | None = None

Or remove it from the docstring:

         input_tokens: Number of input tokens consumed
         output_tokens: Number of output tokens generated
         total_tokens: Total tokens consumed (input + output)
-        file_search_results: Optional list of file search results from RAG
     """
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
class LLMCallResponse(SQLModel):
"""Response model for /v1/llm/call endpoint.
Attributes:
status: Response status (success, error, etc.)
response_id: Unique identifier for this response
message: The generated text response
model: Model identifier that was used
input_tokens: Number of input tokens consumed
output_tokens: Number of output tokens generated
total_tokens: Total tokens consumed (input + output)
file_search_results: Optional list of file search results from RAG
"""
status: str
response_id: str
message: str
model: str
input_tokens: int
output_tokens: int
total_tokens: int
class LLMCallResponse(SQLModel):
"""Response model for /v1/llm/call endpoint.
Attributes:
status: Response status (success, error, etc.)
response_id: Unique identifier for this response
message: The generated text response
model: Model identifier that was used
input_tokens: Number of input tokens consumed
output_tokens: Number of output tokens generated
total_tokens: Total tokens consumed (input + output)
file_search_results: Optional list of file search results from RAG
"""
status: str
response_id: str
message: str
model: str
input_tokens: int
output_tokens: int
total_tokens: int
file_search_results: list[dict] | None = None
🤖 Prompt for AI Agents
In backend/app/models/llm/response.py around lines 8–28, the docstring documents
a file_search_results field that is missing from the model; add the field
instead of removing the docstring entry: import Optional and List from typing
(and import the FileSearchResult model type), then add a class attribute like
file_search_results: Optional[List[FileSearchResult]] = None to the
LLMCallResponse model so the docstring and model match.

22 changes: 22 additions & 0 deletions backend/app/services/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Main orchestration
from app.services.llm.orchestrator import execute_llm_call

# Providers
from app.services.llm.providers import (
BaseProvider,
ProviderFactory,
OpenAIProvider,
)


# Initialize model specs on module import
import app.services.llm.specs # noqa: F401

__all__ = [
# Main entry point
"execute_llm_call",
# Providers
"BaseProvider",
"ProviderFactory",
"OpenAIProvider",
]
133 changes: 133 additions & 0 deletions backend/app/services/llm/jobs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import logging
from uuid import UUID

from fastapi import HTTPException
from sqlmodel import Session
from asgi_correlation_id import correlation_id

from app.celery.utils import start_high_priority_job
from app.crud import JobCrud
from app.core.db import engine

from app.models import JobType, JobStatus, JobUpdate
from app.models.llm import LLMCallRequest, LLMCallResponse
from app.services.llm.orchestrator import execute_llm_call
from app.utils import get_openai_client

logger = logging.getLogger(__name__)


def start_job(
db: Session, request: LLMCallRequest, project_id: int, organization_id: int
) -> UUID:
"""Create an LLM job and schedule Celery task."""
trace_id = correlation_id.get() or "N/A"
job_crud = JobCrud(session=db)
job = job_crud.create(job_type=JobType.LLM_API, trace_id=trace_id)

try:
task_id = start_high_priority_job(
function_path="app.services.llm.jobs.execute_job",
project_id=project_id,
job_id=str(job.id),
trace_id=trace_id,
request_data=request.model_dump(),
organization_id=organization_id,
)
except Exception as e:
logger.error(
f"[start_job] Error starting Celery task: {str(e)} | job_id={job.id}, project_id={project_id}",
exc_info=True,
)
job_update = JobUpdate(status=JobStatus.FAILED, error_message=str(e))
job_crud.update(job_id=job.id, job_update=job_update)
raise HTTPException(
status_code=500, detail="Internal server error while executing LLM call"
)

logger.info(
f"[start_job] Job scheduled for LLM call | job_id={job.id}, project_id={project_id}, task_id={task_id}"
)
return job.id


def execute_job(
request_data: dict,
project_id: int,
organization_id: int,
job_id: str,
task_id: str,
task_instance,
) -> LLMCallResponse | None:
"""Celery task to process an LLM request asynchronously."""
request = LLMCallRequest(**request_data)
job_id_uuid = UUID(job_id)

logger.info(
f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
f"provider={request.llm.provider}, model={request.llm.llm_model_spec.model}"
)

Comment on lines 91 to 95
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Bug: wrong attribute access; crashes before try.

LLMConfig has no provider; use request.llm.llm_model_spec.provider. Currently this can raise AttributeError before status moves to PROCESSING.

-    logger.info(
-        f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
-        f"provider={request.llm.provider}, model={request.llm.llm_model_spec.model}"
-    )
+    logger.info(
+        f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
+        f"provider={request.llm.llm_model_spec.provider}, model={request.llm.llm_model_spec.model}"
+    )

🏁 Script executed:

#!/bin/bash
# Confirm there are no other bad accesses to `request.llm.provider`
rg -n -C2 -g '!**/venv/**' -P '\brequest\.llm\.provider\b'

Length of output: 379



---

🏁 Script executed:

```shell
#!/bin/bash
# Find LLMConfig definition and its structure
fd -e py | xargs rg -l 'class LLMConfig' | head -5

Length of output: 32


🏁 Script executed:

#!/bin/bash
# Search for llm_model_spec definition or usage patterns
rg -n 'llm_model_spec' backend/app/services/llm/jobs.py -A 2 -B 2

Length of output: 354


🏁 Script executed:

#!/bin/bash
# Look for the request model definition
rg -n 'class.*Request|@dataclass' backend/app/services/llm/jobs.py -A 5 | head -30

Length of output: 0



---

🏁 Script executed:

```shell
#!/bin/bash
# Examine LLMConfig structure
cat backend/app/models/llm/config.py

Length of output: 1728


🏁 Script executed:

#!/bin/bash
# Check function signature to understand request parameter type
rg -n 'def execute_job' backend/app/services/llm/jobs.py -A 10

Length of output: 343


Bug confirmed: AttributeError on line 68 before try block.

LLMConfig has no provider attribute; the correct path is request.llm.llm_model_spec.provider. Line 79 in the same function already uses the correct pattern. This error will crash the job before status moves to PROCESSING.

    logger.info(
        f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
-        f"provider={request.llm.provider}, model={request.llm.llm_model_spec.model}"
+        f"provider={request.llm.llm_model_spec.provider}, model={request.llm.llm_model_spec.model}"
    )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
logger.info(
f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
f"provider={request.llm.provider}, model={request.llm.llm_model_spec.model}"
)
logger.info(
f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
f"provider={request.llm.llm_model_spec.provider}, model={request.llm.llm_model_spec.model}"
)
🤖 Prompt for AI Agents
In backend/app/services/llm/jobs.py around lines 66 to 70, the logger references
request.llm.provider which does not exist; replace it with
request.llm.llm_model_spec.provider (matching line 79) so the log uses the
correct attribute path and avoid the AttributeError that crashes before the job
status is updated.

try:
# Update job status to PROCESSING
with Session(engine) as session:
job_crud = JobCrud(session=session)
job_crud.update(
job_id=job_id_uuid, job_update=JobUpdate(status=JobStatus.PROCESSING)
)

provider_type = request.llm.llm_model_spec.provider

if provider_type == "openai":
client = get_openai_client(session, organization_id, project_id)
else:
error_msg = f"Provider '{provider_type}' is not yet supported"
logger.error(f"[execute_job] {error_msg} | job_id={job_id}")
job_crud = JobCrud(session=session)
job_crud.update(
job_id=job_id_uuid,
job_update=JobUpdate(
status=JobStatus.FAILED, error_message=error_msg
),
)
return None

response, error = execute_llm_call(
request=request,
client=client,
)

with Session(engine) as session:
job_crud = JobCrud(session=session)
if response:
job_crud.update(
job_id=job_id_uuid, job_update=JobUpdate(status=JobStatus.SUCCESS)
)
logger.info(
f"[execute_job] Successfully completed LLM job | job_id={job_id}, "
f"response_id={response.response_id}, tokens={response.total_tokens}"
)
return response
else:
job_crud.update(
job_id=job_id_uuid,
job_update=JobUpdate(
status=JobStatus.FAILED,
error_message=error or "Unknown error occurred",
),
)
logger.error(
f"[execute_job] Failed to execute LLM job | job_id={job_id}, error={error}"
)
return None

except Exception as e:
error_message = f"Unexpected error in LLM job execution: {str(e)}"
logger.error(f"[execute_job] {error_message} | job_id={job_id}", exc_info=True)
with Session(engine) as session:
job_crud = JobCrud(session=session)
job_crud.update(
job_id=job_id_uuid,
job_update=JobUpdate(status=JobStatus.FAILED, error_message=str(e)),
)
raise
Loading
Loading