ProjectTech4DevAI · avirajsingh7 · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py b/backend/app/alembic/versions/219033c644de_add_llm_im_jobs_table.py
@@ -0,0 +1,24 @@
+"""Add LLM in jobs table
+
+Revision ID: 219033c644de
+Revises: e7c68e43ce6f
+Create Date: 2025-10-17 15:38:33.565674
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "219033c644de"
+down_revision = "e7c68e43ce6f"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.execute("ALTER TYPE jobtype ADD VALUE IF NOT EXISTS 'LLM_API'")
+
+
+def downgrade():
+    pass
diff --git a/backend/app/api/main.py b/backend/app/api/main.py
@@ -7,6 +7,7 @@
     documents,
     doc_transformation_job,
     login,
+    llm,
     organization,
     openai_conversation,
     project,
@@ -31,6 +32,7 @@
 api_router.include_router(credentials.router)
 api_router.include_router(documents.router)
 api_router.include_router(doc_transformation_job.router)
+api_router.include_router(llm.router)
 api_router.include_router(login.router)
 api_router.include_router(onboarding.router)
 api_router.include_router(openai_conversation.router)

diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py
@@ -0,0 +1,34 @@
+import logging
+
+from fastapi import APIRouter
+
+from app.api.deps import AuthContext, SessionDep
+from app.models.llm import LLMCallRequest
+from app.services.llm.jobs import start_job
+from app.utils import APIResponse
+
+logger = logging.getLogger(__name__)
+router = APIRouter(tags=["llm"])
+
+
+@router.post("/llm/call")
+async def llm_call(
+    request: LLMCallRequest, _session: SessionDep, _current_user: AuthContext
+):
+    """
+    Endpoint to initiate an LLM call as a background job.
+    """
+    project_id = _current_user.project.id
+    organization_id = _current_user.organization.id
+
+    # Start background job
+    job_id = start_job(
+        db=_session,
+        request=request,
+        project_id=project_id,
+        organization_id=organization_id,
+    )
+
+    return APIResponse.success_response(
+        data={"status": "processing", "message": "LLM call job scheduled"},
+    )
diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py
@@ -48,6 +48,13 @@
 
 from .job import Job, JobType, JobStatus, JobUpdate
 
+from .llm import (
+    LLMCallRequest,
+    LLMCallResponse,
+    LLMConfig,
+    LLMModelSpec,
+)
+
 from .message import Message
 from .model_evaluation import (
     ModelEvaluation,

diff --git a/backend/app/models/job.py b/backend/app/models/job.py
@@ -15,6 +15,7 @@ class JobStatus(str, Enum):
 
 class JobType(str, Enum):
     RESPONSE = "RESPONSE"
+    LLM_API = "LLM_API"
 
 
 class Job(SQLModel, table=True):

diff --git a/backend/app/models/llm/__init__.py b/backend/app/models/llm/__init__.py
@@ -0,0 +1,21 @@
+"""LLM models module.
+
+This module provides all data models for LLM functionality including
+requests, responses, configurations, and model specifications.
+"""
+
+from app.models.llm.config import (
+    LLMConfig,
+    LLMModelSpec,
+)
+from app.models.llm.request import LLMCallRequest
+from app.models.llm.response import LLMCallResponse
+
+__all__ = [
+    # Request/Response models
+    "LLMCallRequest",
+    "LLMCallResponse",
+    # Configuration models
+    "LLMConfig",
+    "LLMModelSpec",
+]
diff --git a/backend/app/models/llm/config.py b/backend/app/models/llm/config.py
@@ -0,0 +1,51 @@
+"""LLM configuration models.
+
+This module contains all configuration-related models for LLM requests,
+including model specifications and advanced configuration options.
+"""
+
+from typing import Literal
+
+from sqlmodel import SQLModel
+
+
+class LLMModelSpec(SQLModel):
+    """Specification for the LLM model and its parameters.
+
+    This contains the actual model configuration that will be sent to the provider.
+    Supports both standard models and advanced configurations.
+
+    Attributes:
+        model: Model identifier (e.g., "gpt-4", "claude-3-opus")
+        provider: Provider type (openai, anthropic, google, azure)
+        temperature: Sampling temperature (0.0-2.0)
+        reasoning_effort: Reasoning effort level for o-series models ("low", "medium", "high")
+        text_verbosity: Text verbosity level ("low", "medium", "high")
+        max_tokens: Maximum number of tokens to generate
+        top_p: Nucleus sampling parameter (0.0-1.0)
+    """
+
+    model: str
+    provider: str = "openai"
+    temperature: float | None = None
+    reasoning_effort: Literal["low", "medium", "high"] | None = None
+    text_verbosity: Literal["low", "medium", "high"] | None = None
+    max_tokens: int | None = None
+    top_p: float | None = None
+
+
+class LLMConfig(SQLModel):
+    """LLM configuration containing model specification and prompt.
+
+    This wraps the model spec and can be extended with additional
+    provider-agnostic configuration in the future.
+
+    Attributes:
+        prompt: The user's input prompt
+        vector_store_id: Vector store ID for RAG functionality
+        llm_model_spec: Model specification and parameters
+    """
+
+    prompt: str
+    vector_store_id: str | None = None
+    llm_model_spec: LLMModelSpec
diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py
@@ -0,0 +1,23 @@
+"""LLM request models.
+
+This module contains request models for LLM API calls.
+"""
+
+from sqlmodel import SQLModel
+
+from app.models.llm.config import LLMConfig
+
+
+class LLMCallRequest(SQLModel):
+    """Request model for /v1/llm/call endpoint.
+
+    This model decouples LLM calls from the assistants table,
+    allowing dynamic configuration per request.
+
+    Attributes:
+        llm: LLM configuration containing model spec and prompt
+        max_num_results: Number of results to return from vector store file search
+    """
+
+    llm: LLMConfig
+    max_num_results: int = 20  # For vector store file search
-    llm: LLMConfig
-    max_num_results: int = 20  # For vector store file search
+    llm: LLMConfig
+    max_num_results: int = Field(
+        default=20,
+        ge=1,
+        le=50,
+        description="Number of results to return from vector store file search"
+    )
-    llm: LLMConfig
-    max_num_results: int = 20  # For vector store file search
+    llm: LLMConfig
+    max_num_results: int = Field(
+        default=20,
+        ge=1,
+        le=50,
+        description="Number of results to return from vector store file search"
+    )
diff --git a/backend/app/models/llm/response.py b/backend/app/models/llm/response.py
@@ -0,0 +1,28 @@
+"""LLM response models.
+
+This module contains response models for LLM API calls.
+"""
+from sqlmodel import SQLModel
+
+
+class LLMCallResponse(SQLModel):
+    """Response model for /v1/llm/call endpoint.
+
+    Attributes:
+        status: Response status (success, error, etc.)
+        response_id: Unique identifier for this response
+        message: The generated text response
+        model: Model identifier that was used
+        input_tokens: Number of input tokens consumed
+        output_tokens: Number of output tokens generated
+        total_tokens: Total tokens consumed (input + output)
+        file_search_results: Optional list of file search results from RAG
+    """
+
+    status: str
+    response_id: str
+    message: str
+    model: str
+    input_tokens: int
+    output_tokens: int
+    total_tokens: int
-class LLMCallResponse(SQLModel):
-    """Response model for /v1/llm/call endpoint.
-
-    Attributes:
-        status: Response status (success, error, etc.)
-        response_id: Unique identifier for this response
-        message: The generated text response
-        model: Model identifier that was used
-        input_tokens: Number of input tokens consumed
-        output_tokens: Number of output tokens generated
-        total_tokens: Total tokens consumed (input + output)
-        file_search_results: Optional list of file search results from RAG
-    """
-
-    status: str
-    response_id: str
-    message: str
-    model: str
-    input_tokens: int
-    output_tokens: int
-    total_tokens: int
+class LLMCallResponse(SQLModel):
+    """Response model for /v1/llm/call endpoint.
+
+    Attributes:
+        status: Response status (success, error, etc.)
+        response_id: Unique identifier for this response
+        message: The generated text response
+        model: Model identifier that was used
+        input_tokens: Number of input tokens consumed
+        output_tokens: Number of output tokens generated
+        total_tokens: Total tokens consumed (input + output)
+        file_search_results: Optional list of file search results from RAG
+    """
+
+    status: str
+    response_id: str
+    message: str
+    model: str
+    input_tokens: int
+    output_tokens: int
+    total_tokens: int
+    file_search_results: list[dict] | None = None
-class LLMCallResponse(SQLModel):
-    """Response model for /v1/llm/call endpoint.
-
-    Attributes:
-        status: Response status (success, error, etc.)
-        response_id: Unique identifier for this response
-        message: The generated text response
-        model: Model identifier that was used
-        input_tokens: Number of input tokens consumed
-        output_tokens: Number of output tokens generated
-        total_tokens: Total tokens consumed (input + output)
-        file_search_results: Optional list of file search results from RAG
-    """
-
-    status: str
-    response_id: str
-    message: str
-    model: str
-    input_tokens: int
-    output_tokens: int
-    total_tokens: int
+class LLMCallResponse(SQLModel):
+    """Response model for /v1/llm/call endpoint.
+
+    Attributes:
+        status: Response status (success, error, etc.)
+        response_id: Unique identifier for this response
+        message: The generated text response
+        model: Model identifier that was used
+        input_tokens: Number of input tokens consumed
+        output_tokens: Number of output tokens generated
+        total_tokens: Total tokens consumed (input + output)
+        file_search_results: Optional list of file search results from RAG
+    """
+
+    status: str
+    response_id: str
+    message: str
+    model: str
+    input_tokens: int
+    output_tokens: int
+    total_tokens: int
+    file_search_results: list[dict] | None = None
diff --git a/backend/app/services/llm/__init__.py b/backend/app/services/llm/__init__.py
@@ -0,0 +1,22 @@
+# Main orchestration
+from app.services.llm.orchestrator import execute_llm_call
+
+# Providers
+from app.services.llm.providers import (
+    BaseProvider,
+    ProviderFactory,
+    OpenAIProvider,
+)
+
+
+# Initialize model specs on module import
+import app.services.llm.specs  # noqa: F401
+
+__all__ = [
+    # Main entry point
+    "execute_llm_call",
+    # Providers
+    "BaseProvider",
+    "ProviderFactory",
+    "OpenAIProvider",
+]
diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py
@@ -0,0 +1,133 @@
+import logging
+from uuid import UUID
+
+from fastapi import HTTPException
+from sqlmodel import Session
+from asgi_correlation_id import correlation_id
+
+from app.celery.utils import start_high_priority_job
+from app.crud import JobCrud
+from app.core.db import engine
+
+from app.models import JobType, JobStatus, JobUpdate
+from app.models.llm import LLMCallRequest, LLMCallResponse
+from app.services.llm.orchestrator import execute_llm_call
+from app.utils import get_openai_client
+
+logger = logging.getLogger(__name__)
+
+
+def start_job(
+    db: Session, request: LLMCallRequest, project_id: int, organization_id: int
+) -> UUID:
+    """Create an LLM job and schedule Celery task."""
+    trace_id = correlation_id.get() or "N/A"
+    job_crud = JobCrud(session=db)
+    job = job_crud.create(job_type=JobType.LLM_API, trace_id=trace_id)
+
+    try:
+        task_id = start_high_priority_job(
+            function_path="app.services.llm.jobs.execute_job",
+            project_id=project_id,
+            job_id=str(job.id),
+            trace_id=trace_id,
+            request_data=request.model_dump(),
+            organization_id=organization_id,
+        )
+    except Exception as e:
+        logger.error(
+            f"[start_job] Error starting Celery task: {str(e)} | job_id={job.id}, project_id={project_id}",
+            exc_info=True,
+        )
+        job_update = JobUpdate(status=JobStatus.FAILED, error_message=str(e))
+        job_crud.update(job_id=job.id, job_update=job_update)
+        raise HTTPException(
+            status_code=500, detail="Internal server error while executing LLM call"
+        )
+
+    logger.info(
+        f"[start_job] Job scheduled for LLM call | job_id={job.id}, project_id={project_id}, task_id={task_id}"
+    )
+    return job.id
+
+
+def execute_job(
+    request_data: dict,
+    project_id: int,
+    organization_id: int,
+    job_id: str,
+    task_id: str,
+    task_instance,
+) -> LLMCallResponse | None:
+    """Celery task to process an LLM request asynchronously."""
+    request = LLMCallRequest(**request_data)
+    job_id_uuid = UUID(job_id)
+
+    logger.info(
+        f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
+        f"provider={request.llm.provider}, model={request.llm.llm_model_spec.model}"
+    )
+
-    logger.info(
-        f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
-        f"provider={request.llm.provider}, model={request.llm.llm_model_spec.model}"
-    )
+    logger.info(
+        f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
+        f"provider={request.llm.llm_model_spec.provider}, model={request.llm.llm_model_spec.model}"
+    )
-    logger.info(
-        f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
-        f"provider={request.llm.provider}, model={request.llm.llm_model_spec.model}"
-    )
+    logger.info(
+        f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, "
+        f"provider={request.llm.llm_model_spec.provider}, model={request.llm.llm_model_spec.model}"
+    )
+    try:
+        # Update job status to PROCESSING
+        with Session(engine) as session:
+            job_crud = JobCrud(session=session)
+            job_crud.update(
+                job_id=job_id_uuid, job_update=JobUpdate(status=JobStatus.PROCESSING)
+            )
+
+            provider_type = request.llm.llm_model_spec.provider
+
+            if provider_type == "openai":
+                client = get_openai_client(session, organization_id, project_id)
+            else:
+                error_msg = f"Provider '{provider_type}' is not yet supported"
+                logger.error(f"[execute_job] {error_msg} | job_id={job_id}")
+                job_crud = JobCrud(session=session)
+                job_crud.update(
+                    job_id=job_id_uuid,
+                    job_update=JobUpdate(
+                        status=JobStatus.FAILED, error_message=error_msg
+                    ),
+                )
+                return None
+
+        response, error = execute_llm_call(
+            request=request,
+            client=client,
+        )
+
+        with Session(engine) as session:
+            job_crud = JobCrud(session=session)
+            if response:
+                job_crud.update(
+                    job_id=job_id_uuid, job_update=JobUpdate(status=JobStatus.SUCCESS)
+                )
+                logger.info(
+                    f"[execute_job] Successfully completed LLM job | job_id={job_id}, "
+                    f"response_id={response.response_id}, tokens={response.total_tokens}"
+                )
+                return response
+            else:
+                job_crud.update(
+                    job_id=job_id_uuid,
+                    job_update=JobUpdate(
+                        status=JobStatus.FAILED,
+                        error_message=error or "Unknown error occurred",
+                    ),
+                )
+                logger.error(
+                    f"[execute_job] Failed to execute LLM job | job_id={job_id}, error={error}"
+                )
+                return None
+
+    except Exception as e:
+        error_message = f"Unexpected error in LLM job execution: {str(e)}"
+        logger.error(f"[execute_job] {error_message} | job_id={job_id}", exc_info=True)
+        with Session(engine) as session:
+            job_crud = JobCrud(session=session)
+            job_crud.update(
+                job_id=job_id_uuid,
+                job_update=JobUpdate(status=JobStatus.FAILED, error_message=str(e)),
+            )
+        raise