Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/http/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ func API(application *application.Application) (*echo.Echo, error) {
requestExtractor := httpMiddleware.NewRequestExtractor(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())

routes.RegisterElevenLabsRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterCambAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())

// Create opcache for tracking UI operations (used by both UI and LocalAI routes)
var opcache *services.OpCache
Expand Down
17 changes: 17 additions & 0 deletions core/http/endpoints/cambai/audio_separation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package cambai

import (
"net/http"

"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/schema"
)

// AudioSeparationEndpoint returns 501 Not Implemented for audio separation.
func AudioSeparationEndpoint() echo.HandlerFunc {
return func(c echo.Context) error {
return c.JSON(http.StatusNotImplemented, schema.CambAIErrorResponse{
Detail: "Audio separation is not currently supported. No backend available.",
})
}
}
50 changes: 50 additions & 0 deletions core/http/endpoints/cambai/sound_generation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package cambai

import (
"net/http"

"github.com/google/uuid"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
)

// SoundGenerationEndpoint handles CAMB AI text-to-sound (POST /apis/text-to-sound).
func SoundGenerationEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITextToSoundRequest)
if !ok {
return echo.ErrBadRequest
}

cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || cfg == nil {
return echo.ErrBadRequest
}

xlog.Debug("CAMB AI text-to-sound request received", "model", input.Model)

_, _, err := backend.SoundGeneration(
input.Prompt, input.Duration, nil, nil,
nil, nil,
nil, "", "", nil, "",
"", "",
nil,
ml, appConfig, *cfg)
if err != nil {
return err
}

taskID := uuid.New().String()

return c.JSON(http.StatusOK, schema.CambAITaskResponse{
TaskID: taskID,
Status: "SUCCESS",
RunID: taskID,
})
}
}
114 changes: 114 additions & 0 deletions core/http/endpoints/cambai/transcription.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package cambai

import (
"io"
"net/http"
"os"
"path"
"path/filepath"
"sync"

"github.com/google/uuid"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
)

var transcriptionTaskResults = sync.Map{}

// TranscriptionEndpoint handles CAMB AI transcription (POST /apis/transcribe).
// The SDK sends multipart form with optional file upload and/or media_url.
// Returns {"task_id": "..."} matching OrchestratorPipelineCallResult.
func TranscriptionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || cfg == nil {
return echo.ErrBadRequest
}

input, _ := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITranscriptionRequest)

language := ""
if input != nil && input.LanguageID > 0 {
language = schema.CambAILanguageCodeFromID(input.LanguageID)
}
// SDK sends language as multipart form field too
if language == "" {
if langField := c.FormValue("language"); langField != "" {
language = langField
}
}

// Try file upload first (field "file" or "media_file")
var audioPath string
for _, fieldName := range []string{"file", "media_file"} {
file, err := c.FormFile(fieldName)
if err != nil {
continue
}

f, err := file.Open()
if err != nil {
return err
}
defer f.Close()

dir, err := os.MkdirTemp("", "cambai-transcribe")
if err != nil {
return err
}
defer os.RemoveAll(dir)

dst := filepath.Join(dir, path.Base(file.Filename))
dstFile, err := os.Create(dst)
if err != nil {
return err
}

if _, err := io.Copy(dstFile, f); err != nil {
dstFile.Close()
return err
}
dstFile.Close()
audioPath = dst
break
}

// Fall back to media_url form field
if audioPath == "" {
mediaURL := c.FormValue("media_url")
if mediaURL == "" {
mediaURL = c.FormValue("audio_url")
}
if mediaURL != "" {
audioPath = mediaURL
}
}

if audioPath == "" {
return c.JSON(http.StatusBadRequest, schema.CambAIErrorResponse{
Detail: "Either a file upload or media_url is required.",
})
}

xlog.Debug("CAMB AI transcription request", "path", audioPath, "language", language)

tr, err := backend.ModelTranscription(audioPath, language, false, false, "", ml, *cfg, appConfig)
if err != nil {
return err
}

taskID := uuid.New().String()
transcriptionTaskResults.Store(taskID, tr.Text)

return c.JSON(http.StatusOK, schema.CambAITaskResponse{
TaskID: taskID,
Status: "SUCCESS",
RunID: taskID,
})
}
}
182 changes: 182 additions & 0 deletions core/http/endpoints/cambai/translation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
package cambai

import (
"context"
"fmt"
"net/http"
"strings"

"github.com/google/uuid"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
)

func buildTranslationPrompt(text, sourceLang, targetLang string) string {
return fmt.Sprintf(
"Translate the following text from %s to %s. Output ONLY the translation, nothing else.\n\n%s",
sourceLang, targetLang, text,
)
}

// TranslationEndpoint handles CAMB AI translation (POST /apis/translate).
// Uses an LLM chat backend to perform translation.
func TranslationEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITranslationRequest)
if !ok {
return echo.ErrBadRequest
}

cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || cfg == nil {
return echo.ErrBadRequest
}

xlog.Debug("CAMB AI translation request received", "model", input.Model)

sourceLang := schema.CambAILanguageCodeFromID(input.SourceLanguageID)
targetLang := schema.CambAILanguageCodeFromID(input.TargetLanguageID)

var translations []string
for _, text := range input.Texts {
prompt := buildTranslationPrompt(text, sourceLang, targetLang)

fn, err := backend.ModelInference(
c.Request().Context(), prompt, nil, nil, nil, nil,
ml, cfg, cl, appConfig, nil, "", "", nil, nil, nil,
)
if err != nil {
return err
}

resp, err := fn()
if err != nil {
return err
}

translations = append(translations, strings.TrimSpace(resp.Response))
}

taskID := uuid.New().String()

return c.JSON(http.StatusOK, schema.CambAITaskStatusResponse{
Status: "SUCCESS",
RunID: taskID,
Output: schema.CambAITranslationResponse{
Translation: translations,
SourceLang: input.SourceLanguageID,
TargetLang: input.TargetLanguageID,
},
})
}
}

// TranslationStreamEndpoint handles CAMB AI streaming translation (POST /apis/translation/stream).
func TranslationStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITranslationStreamRequest)
if !ok {
return echo.ErrBadRequest
}

cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || cfg == nil {
return echo.ErrBadRequest
}

xlog.Debug("CAMB AI translation stream request received", "model", input.Model)

sourceLang := schema.CambAILanguageCodeFromID(input.SourceLanguageID)
targetLang := schema.CambAILanguageCodeFromID(input.TargetLanguageID)
prompt := buildTranslationPrompt(input.Text, sourceLang, targetLang)

fn, err := backend.ModelInference(
context.Background(), prompt, nil, nil, nil, nil,
ml, cfg, cl, appConfig, nil, "", "", nil, nil, nil,
)
if err != nil {
return err
}

resp, err := fn()
if err != nil {
return err
}

return c.JSON(http.StatusOK, map[string]any{
"translation": strings.TrimSpace(resp.Response),
"source_language": input.SourceLanguageID,
"target_language": input.TargetLanguageID,
})
}
}

// TranslatedTTSEndpoint handles CAMB AI translated TTS (POST /apis/translated-tts).
// First translates text via LLM, then synthesizes speech from the translation.
func TranslatedTTSEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITranslatedTTSRequest)
if !ok {
return echo.ErrBadRequest
}

cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || cfg == nil {
return echo.ErrBadRequest
}

xlog.Debug("CAMB AI translated TTS request received", "model", input.Model)

sourceLang := schema.CambAILanguageCodeFromID(input.SourceLanguageID)
targetLang := schema.CambAILanguageCodeFromID(input.TargetLanguageID)
prompt := buildTranslationPrompt(input.Text, sourceLang, targetLang)

// Step 1: Translate
fn, err := backend.ModelInference(
c.Request().Context(), prompt, nil, nil, nil, nil,
ml, cfg, cl, appConfig, nil, "", "", nil, nil, nil,
)
if err != nil {
return err
}

resp, err := fn()
if err != nil {
return err
}

translatedText := strings.TrimSpace(resp.Response)

// Step 2: TTS on translated text
// Find a TTS model from config
ttsConfigs := cl.GetModelConfigsByFilter(config.BuildUsecaseFilterFn(config.FLAG_TTS))
if len(ttsConfigs) == 0 {
return c.JSON(http.StatusServiceUnavailable, schema.CambAIErrorResponse{
Detail: "No TTS model configured. Configure a TTS model to use translated TTS.",
})
}
ttsCfg := ttsConfigs[0]

voice := fmt.Sprintf("%d", input.VoiceID)
language := targetLang

filePath, _, err := backend.ModelTTS(translatedText, voice, language, ml, appConfig, ttsCfg)
if err != nil {
return err
}

taskID := uuid.New().String()
ttsTaskResults.Store(taskID, filePath)

return c.JSON(http.StatusOK, schema.CambAITaskResponse{
TaskID: taskID,
Status: "SUCCESS",
RunID: taskID,
})
}
}
Loading