From 6db82e69b6bb6dd7c15156a07d55a7a99a31bd96 Mon Sep 17 00:00:00 2001 From: barberscott Date: Thu, 8 Aug 2024 23:58:17 -0500 Subject: [PATCH] Add queries for common checks --- charts/langsmith/docs/RUN-SUPPORT-QUERY-CH.md | 47 +++++++++++++++++++ charts/langsmith/docs/RUN-SUPPORT-QUERY-PG.md | 4 +- .../langsmith/scripts/run_support_query_ch.sh | 4 +- ..._get_historic_trace_counts_by_ws_daily.sql | 7 +++ .../ch_get_trace_counts_by_ws_daily.sql | 7 +++ .../pg_get_trace_counts_daily.sql | 0 .../pg_get_trace_counts_monthly.sql | 0 .../pg_get_trace_counts_weekly.sql | 0 .../postgres/pg_get_users_by_org.sql | 17 +++++++ .../postgres/pg_get_users_by_ws_and_org.sql | 21 +++++++++ .../postgres/pg_get_users_with_stats.sql | 23 +++++++++ .../pg_get_workspace_dataset_counts.sql | 24 ++++++++++ .../pg_get_workspace_prompt_counts.sql | 28 +++++++++++ .../postgres/pg_get_ws_by_org.sql | 14 ++++++ 14 files changed, 192 insertions(+), 4 deletions(-) create mode 100644 charts/langsmith/docs/RUN-SUPPORT-QUERY-CH.md create mode 100644 charts/langsmith/scripts/support_queries/clickhouse/ch_get_historic_trace_counts_by_ws_daily.sql create mode 100644 charts/langsmith/scripts/support_queries/clickhouse/ch_get_trace_counts_by_ws_daily.sql rename charts/langsmith/scripts/support_queries/{ => postgres}/pg_get_trace_counts_daily.sql (100%) rename charts/langsmith/scripts/support_queries/{ => postgres}/pg_get_trace_counts_monthly.sql (100%) rename charts/langsmith/scripts/support_queries/{ => postgres}/pg_get_trace_counts_weekly.sql (100%) create mode 100644 charts/langsmith/scripts/support_queries/postgres/pg_get_users_by_org.sql create mode 100644 charts/langsmith/scripts/support_queries/postgres/pg_get_users_by_ws_and_org.sql create mode 100644 charts/langsmith/scripts/support_queries/postgres/pg_get_users_with_stats.sql create mode 100644 charts/langsmith/scripts/support_queries/postgres/pg_get_workspace_dataset_counts.sql create mode 100644 charts/langsmith/scripts/support_queries/postgres/pg_get_workspace_prompt_counts.sql create mode 100644 charts/langsmith/scripts/support_queries/postgres/pg_get_ws_by_org.sql diff --git a/charts/langsmith/docs/RUN-SUPPORT-QUERY-CH.md b/charts/langsmith/docs/RUN-SUPPORT-QUERY-CH.md new file mode 100644 index 0000000..1017f6c --- /dev/null +++ b/charts/langsmith/docs/RUN-SUPPORT-QUERY-CH.md @@ -0,0 +1,47 @@ +# Generating Clickhouse Stats +This Helm repository contains queries to produce output that the LangSmith UI does not currently support directly (e.g. obtaining trace counts for multiple workspaces by date in a single query). + +This command takes a clickhouse connection string that contains an embedded name and password (which can be passed in from a call to a secrets manager) and executes a query from an input file. In the example below, we are using the `ch_get_trace_counts_daily.sql` input file in the `support_queries` directory. + +### Prerequisites + +Ensure you have the following tools/items ready. + +1. kubectl + + - https://kubernetes.io/docs/tasks/tools/ + +2. Clickhouse database credentials + + - Host + - Port + - Username + - If using the bundled version, this is `default` + - Password + - If using the bundled version, this is `password` + - Database name + - If using the bundled version, this is `default` + +3. Connectivity to the Clickhouse database from the machine you will be running the `get_clickhouse_stats` script on. + + - If you are using the bundled version, you may need to port forward the clickhouse service to your local machine. + - Run `kubectl port-forward svc/langsmith-clickhouse 8123:8123` to port forward the clickhouse service to your local machine. + +### Running the clickhouse stats generation script + +## Running the query script + +Run the following command to run the desired query: + +```bash +sh run_support_query_ch.sh --input path/to/query.sql +``` + +For example, if you are using the bundled version with port-forwarding, the command might look like: + +```bash +sh run_support_query_ch.sh "clickhouse://default:password@localhost:8123/default" --input support_queries/clickhouse/ch_get_trace_counts_daily.sql +``` + +which will output the count of daily traces by workspace ID and organization ID. To extract this to a file add the flag `--output path/to/file.csv` + diff --git a/charts/langsmith/docs/RUN-SUPPORT-QUERY-PG.md b/charts/langsmith/docs/RUN-SUPPORT-QUERY-PG.md index 38b4a68..452ede4 100644 --- a/charts/langsmith/docs/RUN-SUPPORT-QUERY-PG.md +++ b/charts/langsmith/docs/RUN-SUPPORT-QUERY-PG.md @@ -2,7 +2,7 @@ This Helm repository contains queries to produce output that the LangSmith UI does not currently support directly (e.g. obtaining trace counts for multiple organizations in a single query). -This command takes a postgres connection string that contains an embedded name and password (which can be passed in from a call to a secrets manager) and executes a query from an input file. In the example below, we are using the `pg_get_trace_counts_daily.sql` input file in the `support_queries` directory. +This command takes a postgres connection string that contains an embedded name and password (which can be passed in from a call to a secrets manager) and executes a query from an input file. In the example below, we are using the `pg_get_trace_counts_daily.sql` input file in the `support_queries/postgres` directory. ### Prerequisites @@ -44,7 +44,7 @@ sh run_support_query_pg.sh --input path/to/query.sql For example, if you are using the bundled version with port-forwarding, the command might look like: ```bash -sh run_support_query_pg.sh "postgres://postgres:postgres@localhost:5432/postgres" --input support_queries/pg_get_trace_counts_daily.sql +sh run_support_query_pg.sh "postgres://postgres:postgres@localhost:5432/postgres" --input support_queries/postgres/pg_get_trace_counts_daily.sql ``` which will output the count of daily traces by workspace ID and organization ID. To extract this to a file add the flag `--output path/to/file.csv` diff --git a/charts/langsmith/scripts/run_support_query_ch.sh b/charts/langsmith/scripts/run_support_query_ch.sh index 030ca49..7f39344 100644 --- a/charts/langsmith/scripts/run_support_query_ch.sh +++ b/charts/langsmith/scripts/run_support_query_ch.sh @@ -97,14 +97,14 @@ fi # Execute the query and output to the specified CSV file or stdout if [ -n "$output_file" ]; then - curl $curl_opts --user "$ch_user:$ch_passwd" --data-binary "$metrics_query_string" "$ch_protocol://$ch_host:$ch_port/?database=$ch_database" > "$output_file" + curl $curl_opts --user "$ch_user:$ch_passwd" -H "X-ClickHouse-Format: CSVWithNames" --data-binary "$metrics_query_string" "$ch_protocol://$ch_host:$ch_port/?database=$ch_database" > "$output_file" if [ $? -ne 0 ]; then echo "Error: Failed to connect to ClickHouse." exit 1 fi echo "Query results have been successfully written to $output_file" else - curl $curl_opts --user "$ch_user:$ch_passwd" --data-binary "$metrics_query_string" "$ch_protocol://$ch_host:$ch_port/?database=$ch_database" + curl $curl_opts --user "$ch_user:$ch_passwd" -H "X-ClickHouse-Format: CSVWithNames" --data-binary "$metrics_query_string" "$ch_protocol://$ch_host:$ch_port/?database=$ch_database" if [ $? -ne 0 ]; then echo "Error: Failed to connect to ClickHouse." exit 1 diff --git a/charts/langsmith/scripts/support_queries/clickhouse/ch_get_historic_trace_counts_by_ws_daily.sql b/charts/langsmith/scripts/support_queries/clickhouse/ch_get_historic_trace_counts_by_ws_daily.sql new file mode 100644 index 0000000..14219b5 --- /dev/null +++ b/charts/langsmith/scripts/support_queries/clickhouse/ch_get_historic_trace_counts_by_ws_daily.sql @@ -0,0 +1,7 @@ +select toStartOfInterval(inserted_at, interval 1 day) as ts, + tenant_id as workspace_id, + count(distinct id) as trace_count +from default.runs_history +where is_root = 1 +group by ts, tenant_id +order by ts, tenant_id \ No newline at end of file diff --git a/charts/langsmith/scripts/support_queries/clickhouse/ch_get_trace_counts_by_ws_daily.sql b/charts/langsmith/scripts/support_queries/clickhouse/ch_get_trace_counts_by_ws_daily.sql new file mode 100644 index 0000000..bede008 --- /dev/null +++ b/charts/langsmith/scripts/support_queries/clickhouse/ch_get_trace_counts_by_ws_daily.sql @@ -0,0 +1,7 @@ +select toStartOfInterval(inserted_at, interval 1 day) as ts, + tenant_id as workspace_id, + count(distinct id) as trace_count +from default.runs +where is_root = 1 +group by ts, tenant_id +order by ts, tenant_id \ No newline at end of file diff --git a/charts/langsmith/scripts/support_queries/pg_get_trace_counts_daily.sql b/charts/langsmith/scripts/support_queries/postgres/pg_get_trace_counts_daily.sql similarity index 100% rename from charts/langsmith/scripts/support_queries/pg_get_trace_counts_daily.sql rename to charts/langsmith/scripts/support_queries/postgres/pg_get_trace_counts_daily.sql diff --git a/charts/langsmith/scripts/support_queries/pg_get_trace_counts_monthly.sql b/charts/langsmith/scripts/support_queries/postgres/pg_get_trace_counts_monthly.sql similarity index 100% rename from charts/langsmith/scripts/support_queries/pg_get_trace_counts_monthly.sql rename to charts/langsmith/scripts/support_queries/postgres/pg_get_trace_counts_monthly.sql diff --git a/charts/langsmith/scripts/support_queries/pg_get_trace_counts_weekly.sql b/charts/langsmith/scripts/support_queries/postgres/pg_get_trace_counts_weekly.sql similarity index 100% rename from charts/langsmith/scripts/support_queries/pg_get_trace_counts_weekly.sql rename to charts/langsmith/scripts/support_queries/postgres/pg_get_trace_counts_weekly.sql diff --git a/charts/langsmith/scripts/support_queries/postgres/pg_get_users_by_org.sql b/charts/langsmith/scripts/support_queries/postgres/pg_get_users_by_org.sql new file mode 100644 index 0000000..a6c560b --- /dev/null +++ b/charts/langsmith/scripts/support_queries/postgres/pg_get_users_by_org.sql @@ -0,0 +1,17 @@ +-- This query retreives a list of users by organization. +-- There will be one row per unique user-organization combination + +select distinct + u.email as user_email, + u.full_name as user_name, + o.display_name as organization_name, + o.id as organization_id +from users u + +join identities i + on u.id = i.user_id + +join organizations o + on i.organization_id = o.id + and not o.is_personal + and i.tenant_id is null \ No newline at end of file diff --git a/charts/langsmith/scripts/support_queries/postgres/pg_get_users_by_ws_and_org.sql b/charts/langsmith/scripts/support_queries/postgres/pg_get_users_by_ws_and_org.sql new file mode 100644 index 0000000..6472ba7 --- /dev/null +++ b/charts/langsmith/scripts/support_queries/postgres/pg_get_users_by_ws_and_org.sql @@ -0,0 +1,21 @@ +-- This query retreives a list of users by workspace and organization. +-- There will be one row per unique user-workspace combination + +select + u.email as user_email, + u.full_name as user_name, + o.display_name as organization_name, + o.id as organization_id, + t.display_name as workspace_name, + t.id as workspace_id +from users u + +join identities i + on u.id = i.user_id + +join tenants t + on i.tenant_id = t.id + +join organizations o + on t.organization_id = o.id + and NOT o.is_personal \ No newline at end of file diff --git a/charts/langsmith/scripts/support_queries/postgres/pg_get_users_with_stats.sql b/charts/langsmith/scripts/support_queries/postgres/pg_get_users_with_stats.sql new file mode 100644 index 0000000..269f32c --- /dev/null +++ b/charts/langsmith/scripts/support_queries/postgres/pg_get_users_with_stats.sql @@ -0,0 +1,23 @@ +-- This query retreives a list of users and the count of organizations and workspaces they are a member of +-- There will be one row per unique user + +select + u.email as user_email, + u.full_name as user_name, + count(distinct o.id) as org_count, + count(distinct t.id) as workspace_count +from users u + +join identities i + on u.id = i.user_id + +join tenants t + on i.tenant_id = t.id + +join organizations o + on t.organization_id = o.id + and NOT o.is_personal + +group by + user_email, + user_name \ No newline at end of file diff --git a/charts/langsmith/scripts/support_queries/postgres/pg_get_workspace_dataset_counts.sql b/charts/langsmith/scripts/support_queries/postgres/pg_get_workspace_dataset_counts.sql new file mode 100644 index 0000000..421be49 --- /dev/null +++ b/charts/langsmith/scripts/support_queries/postgres/pg_get_workspace_dataset_counts.sql @@ -0,0 +1,24 @@ +-- This query returns a workspace ID + +select + organizations.id as org_id, + organizations.display_name as org_name, + tenant_id as workspace_id, + tenants.display_name as workspace_name, + count(distinct dataset.id) as dataset_count +from dataset + +join tenants + on dataset.tenant_id = tenants.id + +join organizations + on tenants.organization_id = organizations.id + +group by + org_id, + org_name, + workspace_id, + workspace_name + +order BY + prompt_count desc \ No newline at end of file diff --git a/charts/langsmith/scripts/support_queries/postgres/pg_get_workspace_prompt_counts.sql b/charts/langsmith/scripts/support_queries/postgres/pg_get_workspace_prompt_counts.sql new file mode 100644 index 0000000..55e8e69 --- /dev/null +++ b/charts/langsmith/scripts/support_queries/postgres/pg_get_workspace_prompt_counts.sql @@ -0,0 +1,28 @@ +-- This query returns a workspace ID + +select + organizations.id as org_id, + organizations.display_name as org_name, + tenant_id as workspace_id, + tenants.display_name as workspace_name, + count(distinct hub_repos.id) as prompt_count, + count(distinct hub_commits.id) as revision_count +from hub_repos + +join tenants + on hub_repos.tenant_id = tenants.id + +join organizations + on tenants.organization_id = organizations.id + +join hub_commits + on hub_repos.id = hub_commits.repo_id + +group by + org_id, + org_name, + workspace_id, + workspace_name + +order BY + prompt_count desc \ No newline at end of file diff --git a/charts/langsmith/scripts/support_queries/postgres/pg_get_ws_by_org.sql b/charts/langsmith/scripts/support_queries/postgres/pg_get_ws_by_org.sql new file mode 100644 index 0000000..686f97e --- /dev/null +++ b/charts/langsmith/scripts/support_queries/postgres/pg_get_ws_by_org.sql @@ -0,0 +1,14 @@ +-- This query pulls a list of workspaces by organization +-- Personal orgs if they exist are excluded + +select distinct + ws.organization_id as organization_id, + o.display_name as organization_name, + ws.id as workspace_id, + ws.display_name as workspace_name +from tenants ws + +join organizations o + on ws.organization_id = o.id + +where not o.is_personal