Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ test-models/
test-dir/
tests/e2e-aio/backends
tests/e2e-aio/models
mock-backend

release/

Expand Down Expand Up @@ -69,3 +70,6 @@ docs/static/gallery.html
# React UI build artifacts (keep placeholder dist/index.html)
core/http/react-ui/node_modules/
core/http/react-ui/dist

# Extracted backend binaries for container-based testing
local-backends/
6 changes: 5 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates curl wget espeak-ng libgomp1 \
ffmpeg libopenblas0 libopenblas-dev sox && \
ffmpeg libopenblas0 libopenblas-dev libopus0 sox && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

Expand Down Expand Up @@ -190,6 +190,7 @@ RUN apt-get update && \
curl libssl-dev \
git \
git-lfs \
libopus-dev pkg-config \
unzip upx-ucl python3 python-is-python3 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
Expand Down Expand Up @@ -378,6 +379,9 @@ COPY ./entrypoint.sh .

# Copy the binary
COPY --from=builder /build/local-ai ./
# Copy the opus shim if it was built
RUN --mount=from=builder,src=/build/,dst=/mnt/build \
if [ -f /mnt/build/libopusshim.so ]; then cp /mnt/build/libopusshim.so ./; fi

# Make sure the models directory exists
RUN mkdir -p /models /backends
Expand Down
80 changes: 79 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,25 @@ react-ui-docker:
core/http/react-ui/dist: react-ui

## Build:
build: protogen-go install-go-tools core/http/react-ui/dist ## Build the project

# Build the opus shim if libopus is available
build-opus-shim:
@if command -v pkg-config >/dev/null 2>&1 && pkg-config --exists opus; then \
echo "$(GREEN)I Building opus shim (libopus found)$(RESET)"; \
$(MAKE) -C pkg/opus/shim; \
else \
echo "$(YELLOW)W libopus-dev not found, skipping opus shim build (WebRTC audio will not work)$(RESET)"; \
fi

build: protogen-go install-go-tools build-opus-shim core/http/react-ui/dist ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
rm -rf $(BINARY_NAME) || true
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./cmd/local-ai
@if [ -f pkg/opus/shim/libopusshim.so ]; then cp pkg/opus/shim/libopusshim.so .; fi

build-launcher: ## Build the launcher application
$(info ${GREEN}I local-ai launcher build info:${RESET})
Expand Down Expand Up @@ -250,6 +261,73 @@ test-stablediffusion: prepare-test
test-stores:
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration

test-realtime: build-mock-backend
@echo 'Running realtime e2e tests (mock backend)'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="Realtime && !real-models" --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e

# Real-model realtime tests. Set REALTIME_TEST_MODEL to use your own pipeline,
# or leave unset to auto-build one from the component env vars below.
REALTIME_VAD?=silero-vad-ggml
REALTIME_STT?=whisper-1
REALTIME_LLM?=qwen3-0.6b
REALTIME_TTS?=tts-1
REALTIME_BACKENDS_PATH?=$(abspath ./)/backends

test-realtime-models: build-mock-backend
@echo 'Running realtime e2e tests (real models)'
REALTIME_TEST_MODEL=$${REALTIME_TEST_MODEL:-realtime-test-pipeline} \
REALTIME_VAD=$(REALTIME_VAD) \
REALTIME_STT=$(REALTIME_STT) \
REALTIME_LLM=$(REALTIME_LLM) \
REALTIME_TTS=$(REALTIME_TTS) \
REALTIME_BACKENDS_PATH=$(REALTIME_BACKENDS_PATH) \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="Realtime" --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e

# --- Container-based real-model testing ---

REALTIME_BACKEND_NAMES ?= silero-vad whisper llama-cpp kokoro
REALTIME_MODELS_DIR ?= $(abspath ./models)
REALTIME_BACKENDS_DIR ?= $(abspath ./local-backends)
REALTIME_DOCKER_FLAGS ?= --gpus all

local-backends:
mkdir -p local-backends

extract-backend-%: docker-build-% local-backends
@echo "Extracting backend $*..."
@CID=$$(docker create local-ai-backend:$*) && \
rm -rf local-backends/$* && mkdir -p local-backends/$* && \
docker cp $$CID:/ - | tar -xf - -C local-backends/$* && \
docker rm $$CID > /dev/null

extract-realtime-backends: $(addprefix extract-backend-,$(REALTIME_BACKEND_NAMES))

test-realtime-models-docker: build-mock-backend
docker build --target build-requirements \
--build-arg BUILD_TYPE=$(or $(BUILD_TYPE),cublas) \
--build-arg CUDA_MAJOR_VERSION=$(or $(CUDA_MAJOR_VERSION),13) \
--build-arg CUDA_MINOR_VERSION=$(or $(CUDA_MINOR_VERSION),0) \
-t localai-test-runner .
docker run --rm \
$(REALTIME_DOCKER_FLAGS) \
-v $(abspath ./):/build \
-v $(REALTIME_MODELS_DIR):/models:ro \
-v $(REALTIME_BACKENDS_DIR):/backends \
-v localai-go-cache:/root/go/pkg/mod \
-v localai-go-build-cache:/root/.cache/go-build \
-e REALTIME_TEST_MODEL=$${REALTIME_TEST_MODEL:-realtime-test-pipeline} \
-e REALTIME_VAD=$(REALTIME_VAD) \
-e REALTIME_STT=$(REALTIME_STT) \
-e REALTIME_LLM=$(REALTIME_LLM) \
-e REALTIME_TTS=$(REALTIME_TTS) \
-e REALTIME_BACKENDS_PATH=/backends \
-e REALTIME_MODELS_PATH=/models \
-w /build \
localai-test-runner \
bash -c 'git config --global --add safe.directory /build && \
make protogen-go && make build-mock-backend && \
go run github.com/onsi/ginkgo/v2/ginkgo --label-filter="Realtime" --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e'

test-container:
docker build --target requirements -t local-ai-test-container .
docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
Expand Down
46 changes: 29 additions & 17 deletions core/backend/transcript.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ package backend
import (
"context"
"fmt"
"maps"
"time"

"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/trace"

"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
Expand All @@ -30,9 +31,12 @@ func ModelTranscription(audio, language string, translate, diarize bool, prompt
}

var startTime time.Time
var audioSnippet map[string]any
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
startTime = time.Now()
// Capture audio before the backend call — the backend may delete the file.
audioSnippet = trace.AudioSnippet(audio)
}

r, err := transcriptionModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
Expand All @@ -45,6 +49,16 @@ func ModelTranscription(audio, language string, translate, diarize bool, prompt
})
if err != nil {
if appConfig.EnableTracing {
errData := map[string]any{
"audio_file": audio,
"language": language,
"translate": translate,
"diarize": diarize,
"prompt": prompt,
}
if audioSnippet != nil {
maps.Copy(errData, audioSnippet)
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Expand All @@ -53,13 +67,7 @@ func ModelTranscription(audio, language string, translate, diarize bool, prompt
Backend: modelConfig.Backend,
Summary: trace.TruncateString(audio, 200),
Error: err.Error(),
Data: map[string]any{
"audio_file": audio,
"language": language,
"translate": translate,
"diarize": diarize,
"prompt": prompt,
},
Data: errData,
})
}
return nil, err
Expand All @@ -84,22 +92,26 @@ func ModelTranscription(audio, language string, translate, diarize bool, prompt
}

if appConfig.EnableTracing {
data := map[string]any{
"audio_file": audio,
"language": language,
"translate": translate,
"diarize": diarize,
"prompt": prompt,
"result_text": tr.Text,
"segments_count": len(tr.Segments),
}
if audioSnippet != nil {
maps.Copy(data, audioSnippet)
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceTranscription,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(audio+" -> "+tr.Text, 200),
Data: map[string]any{
"audio_file": audio,
"language": language,
"translate": translate,
"diarize": diarize,
"prompt": prompt,
"result_text": tr.Text,
"segments_count": len(tr.Segments),
},
Data: data,
})
}

Expand Down
58 changes: 46 additions & 12 deletions core/backend/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/binary"
"encoding/json"
"fmt"
"maps"
"os"
"path/filepath"
"time"
Expand Down Expand Up @@ -84,6 +85,16 @@ func ModelTTS(
errStr = fmt.Sprintf("TTS error: %s", res.Message)
}

data := map[string]any{
"text": text,
"voice": voice,
"language": language,
}
if err == nil && res.Success {
if snippet := trace.AudioSnippet(filePath); snippet != nil {
maps.Copy(data, snippet)
}
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Expand All @@ -92,11 +103,7 @@ func ModelTTS(
Backend: modelConfig.Backend,
Summary: trace.TruncateString(text, 200),
Error: errStr,
Data: map[string]any{
"text": text,
"voice": voice,
"language": language,
},
Data: data,
})
}

Expand Down Expand Up @@ -158,6 +165,11 @@ func ModelTTSStream(
headerSent := false
var callbackErr error

// Collect up to 30s of audio for tracing
var snippetPCM []byte
var totalPCMBytes int
snippetCapped := false

err = ttsModel.TTSStream(context.Background(), &proto.TTSRequest{
Text: text,
Model: modelPath,
Expand All @@ -166,7 +178,7 @@ func ModelTTSStream(
}, func(reply *proto.Reply) {
// First message contains sample rate info
if !headerSent && len(reply.Message) > 0 {
var info map[string]interface{}
var info map[string]any
if json.Unmarshal(reply.Message, &info) == nil {
if sr, ok := info["sample_rate"].(float64); ok {
sampleRate = uint32(sr)
Expand Down Expand Up @@ -207,6 +219,22 @@ func ModelTTSStream(
if writeErr := audioCallback(reply.Audio); writeErr != nil {
callbackErr = writeErr
}
// Accumulate PCM for tracing snippet
totalPCMBytes += len(reply.Audio)
if appConfig.EnableTracing && !snippetCapped {
maxBytes := int(sampleRate) * 2 * trace.MaxSnippetSeconds // 16-bit mono
if len(snippetPCM)+len(reply.Audio) <= maxBytes {
snippetPCM = append(snippetPCM, reply.Audio...)
} else {
remaining := maxBytes - len(snippetPCM)
if remaining > 0 {
// Align to sample boundary (2 bytes per sample)
remaining = remaining &^ 1
snippetPCM = append(snippetPCM, reply.Audio[:remaining]...)
}
snippetCapped = true
}
}
}
})

Expand All @@ -221,6 +249,17 @@ func ModelTTSStream(
errStr = resultErr.Error()
}

data := map[string]any{
"text": text,
"voice": voice,
"language": language,
"streaming": true,
}
if resultErr == nil && len(snippetPCM) > 0 {
if snippet := trace.AudioSnippetFromPCM(snippetPCM, int(sampleRate), totalPCMBytes); snippet != nil {
maps.Copy(data, snippet)
}
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Expand All @@ -229,12 +268,7 @@ func ModelTTSStream(
Backend: modelConfig.Backend,
Summary: trace.TruncateString(text, 200),
Error: errStr,
Data: map[string]any{
"text": text,
"voice": voice,
"language": language,
"streaming": true,
},
Data: data,
})
}

Expand Down
Loading
Loading