Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/.env.dist.composed
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Kafka settings
CROWD_KAFKA_BROKERS=kafka:9092
CROWD_KAFKA_TOPIC=data-sink-worker-normal

# Redis settings
CROWD_REDIS_HOST=redis
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Remove lockedAt column from git.repositories table
-- Rollback for V1751635377__addLockedAtToRepositories.sql

ALTER TABLE git.repositories
DROP COLUMN IF EXISTS "lockedAt";
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE git.repositories
DROP COLUMN IF EXISTS "lastProcessedCommit";
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
DROP INDEX IF EXISTS "ix_git_repositories_integrationId_segmentId";
DROP INDEX IF EXISTS "ix_git_repositories_segmentId";
DROP INDEX IF EXISTS "ix_git_repositories_integrationId";

ALTER TABLE git.repositories
DROP COLUMN IF EXISTS "integrationId",
DROP COLUMN IF EXISTS "segmentId";
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE git.repositories
DROP COLUMN IF EXISTS "lastMaintainerRunAt";
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE git.repositories
DROP COLUMN IF EXISTS "maintainerFile";
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE "maintainersInternal"
DROP COLUMN IF EXISTS "originalRole",
DROP COLUMN IF EXISTS "startDate",
DROP COLUMN IF EXISTS "endDate";
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- Down migration: Remove service executions table and related objects

-- Drop trigger first
DROP TRIGGER IF EXISTS trigger_auto_cleanup_service_executions ON git."serviceExecutions";

-- Drop trigger function
DROP FUNCTION IF EXISTS git.trigger_cleanup_service_executions();

-- Drop indexes
DROP INDEX IF EXISTS git."idx_serviceExecutions_composite";
DROP INDEX IF EXISTS git."idx_serviceExecutions_createdAt";
DROP INDEX IF EXISTS git."idx_serviceExecutions_status";
DROP INDEX IF EXISTS git."idx_serviceExecutions_operationType";
DROP INDEX IF EXISTS git."idx_serviceExecutions_repoId";

-- Drop table (this will also drop the foreign key constraint)
DROP TABLE IF EXISTS git."serviceExecutions";

-- Drop ENUM type
DROP TYPE IF EXISTS git.execution_status;
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ CREATE TABLE git.repositories (

-- Processing state and priority
state VARCHAR(50) NOT NULL DEFAULT 'pending',
priority INTEGER NOT NULL DEFAULT 0, -- 0=urgent, 1=high, 2=normal
priority INTEGER NOT NULL DEFAULT 1, -- 0=urgent, 1=high, 2=normal
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: would be nice if we can add documentation on this as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I've added it to the TODOs 🫡


-- Processing metadata
"lastProcessedAt" TIMESTAMP WITH TIME ZONE,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Add lockedAt column to git.repositories table
-- This column tracks when a repository was locked for processing

ALTER TABLE git.repositories
ADD COLUMN "lockedAt" TIMESTAMP WITH TIME ZONE DEFAULT NULL;

-- Add comment for documentation
COMMENT ON COLUMN git.repositories."lockedAt" IS 'Timestamp when the repository was locked for processing, NULL if not locked';
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ALTER TABLE git.repositories
ADD COLUMN "lastProcessedCommit" VARCHAR(64) DEFAULT NULL;

-- Add comment for documentation
COMMENT ON COLUMN git.repositories."lastProcessedCommit" IS 'The most recent commit hash that has been processed';
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
-- Add integrationId and segmentId columns to git.repositories table
-- These columns reference segments and integrations from public schema
-- Both are nullable and set to null on delete

ALTER TABLE git.repositories
ADD COLUMN "integrationId" UUID REFERENCES public.integrations (id) ON DELETE SET NULL,
ADD COLUMN "segmentId" UUID REFERENCES public.segments (id) ON DELETE SET NULL;

-- Create indexes for better query performance
CREATE INDEX "ix_git_repositories_integrationId" ON git.repositories ("integrationId");
CREATE INDEX "ix_git_repositories_segmentId" ON git.repositories ("segmentId");
CREATE INDEX "ix_git_repositories_integrationId_segmentId" ON git.repositories ("integrationId", "segmentId");
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DROP TRIGGER IF EXISTS cleanup_orphaned_repositories_trigger ON git."repositoryIntegrations";
DROP FUNCTION IF EXISTS git.cleanup_orphaned_repositories();

DROP TABLE IF EXISTS git."repositoryIntegrations";
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ALTER TABLE git.repositories
ADD COLUMN "lastMaintainerRunAt" TIMESTAMP WITH TIME ZONE DEFAULT NULL;

-- Add comment for documentation
COMMENT ON COLUMN git.repositories."lastMaintainerRunAt" IS 'Timestamp of when the repository maintainer processing was last executed';
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ALTER TABLE git.repositories
ADD COLUMN "maintainerFile" text DEFAULT NULL;

-- Add comment for documentation
COMMENT ON COLUMN git.repositories."maintainerFile" IS 'Name of the file containing repository maintainer information and responsibilities (e.g., MAINTAINERS, CODEOWNERS)';
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Add missing columns to maintainersInternal table (only if they don't exist)
ALTER TABLE "maintainersInternal"
ADD COLUMN IF NOT EXISTS "originalRole" VARCHAR(255),
ADD COLUMN IF NOT EXISTS "startDate" TIMESTAMP WITHOUT TIME ZONE,
ADD COLUMN IF NOT EXISTS "endDate" TIMESTAMP WITHOUT TIME ZONE;
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- Create ENUM type for execution status
CREATE TYPE git.execution_status AS ENUM ('success', 'failure');

-- Create service executions table for tracking service execution metrics
CREATE TABLE IF NOT EXISTS git."serviceExecutions" (
id UUID PRIMARY KEY NOT NULL DEFAULT uuid_generate_v4(),
"repoId" UUID NOT NULL REFERENCES git.repositories(id) ON DELETE CASCADE,
"operationType" VARCHAR(50) NOT NULL, -- Service name (e.g., 'Clone', 'Commit', etc.)
"status" git.execution_status NOT NULL,
"errorCode" VARCHAR(50), -- Custom error codes
"errorMessage" TEXT, -- Detailed error message if status is error
"executionTimeSec" DECIMAL NOT NULL, -- Execution time in seconds
"createdAt" TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);

-- Create indexes for efficient querying
CREATE INDEX IF NOT EXISTS "idx_serviceExecutions_repoId" ON git."serviceExecutions"("repoId");
CREATE INDEX IF NOT EXISTS "idx_serviceExecutions_operationType" ON git."serviceExecutions"("operationType");
CREATE INDEX IF NOT EXISTS "idx_serviceExecutions_status" ON git."serviceExecutions"("status");
CREATE INDEX IF NOT EXISTS "idx_serviceExecutions_createdAt" ON git."serviceExecutions"("createdAt");
CREATE INDEX IF NOT EXISTS "idx_serviceExecutions_composite" ON git."serviceExecutions"("repoId", "operationType", "status");

CREATE OR REPLACE FUNCTION git.trigger_cleanup_service_executions()
RETURNS trigger AS $$
BEGIN
-- Only run cleanup 1% of the time (1 in 100 inserts) - due to high write load, keep cleanup minimal to avoid performance impact
IF RANDOM() < 0.01 THEN
DELETE FROM git."serviceExecutions"
WHERE "createdAt" < NOW() - INTERVAL '14 days';
END IF;

RETURN NEW;
END;
$$ LANGUAGE plpgsql;

-- Create trigger that fires on each insert
CREATE TRIGGER trigger_auto_cleanup_service_executions
AFTER INSERT ON git."serviceExecutions"
FOR EACH ROW
EXECUTE FUNCTION git.trigger_cleanup_service_executions();
4 changes: 4 additions & 0 deletions scripts/builders/git-integration.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DOCKERFILE="./services/docker/Dockerfile.git_integration"
CONTEXT="../"
REPO="sjc.ocir.io/axbydjxa5zuh/git-integration"
SERVICES="git-integration"
31 changes: 29 additions & 2 deletions scripts/services/docker/Dockerfile.git_integration
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
# Base image for both stages
FROM python:3.13.5-slim-bullseye AS base

# Go builder stage: build the software-value binary and install scc
FROM golang:1.25-alpine AS go-builder

WORKDIR /go/src/software-value

# Install scc using the official Go toolchain (specific version as per project README)
RUN go install github.com/boyter/scc/[email protected]

# Copy Go module files
COPY ./services/apps/git_integration/src/crowdgit/services/software_value/go.mod ./
COPY ./services/apps/git_integration/src/crowdgit/services/software_value/go.sum ./

# Download dependencies
RUN go mod download

# Copy source code
COPY ./services/apps/git_integration/src/crowdgit/services/software_value/ ./

# Build the binary
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-w -s" -o software-value ./

# Builder stage: install build dependencies, uv, and dependencies
FROM base AS builder

Expand Down Expand Up @@ -59,11 +80,17 @@ WORKDIR /usr/crowd/app
COPY --from=builder /usr/crowd/app/.venv /usr/crowd/app/.venv
COPY --from=builder /usr/crowd/app /usr/crowd/app

# Copy both software-value and scc binaries from go-builder stage
COPY --from=go-builder /go/src/software-value/software-value /usr/local/bin/software-value
COPY --from=go-builder /go/bin/scc /usr/local/bin/scc

# Add virtual environment bin to PATH
ENV PATH="/usr/crowd/app/.venv/bin:$PATH"

# Make runner script executable
RUN chmod +x ./src/runner.sh
# Make runner script and binaries executable
RUN chmod +x ./src/runner.sh \
&& chmod +x /usr/local/bin/software-value \
&& chmod +x /usr/local/bin/scc

EXPOSE 8085

Expand Down
6 changes: 5 additions & 1 deletion services/apps/git_integration/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@ dependencies = [
"fastapi[standard]",
"uvicorn",
"asyncio",
"confluent-kafka",
"openai>=1.45.0",
"prettytable>=3.11.0",
"python-slugify>=8.0.4",
"asyncpg",
"loguru>=0.7.3",
"tenacity>=9.1.2",
"aiokafka>=0.12.0",
"aiofiles>=24.1.0",
"aioboto3>=15.1.0",
"slugify>=0.0.1",
]

[project.optional-dependencies]
Expand Down
23 changes: 0 additions & 23 deletions services/apps/git_integration/src/crowdgit/__init__.py

This file was deleted.

Loading
Loading