Skip to content

Commit

Permalink
Make the Github.MetricAdapter work with interval functions (#3987)
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanIvanoff authored Nov 17, 2023
1 parent e29361c commit 6077673
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 119 deletions.
98 changes: 54 additions & 44 deletions lib/sanbase/clickhouse/github/sql_query.ex
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
defmodule Sanbase.Clickhouse.Github.SqlQuery do
import Sanbase.Metric.SqlQuery.Helper, only: [timerange_parameters: 3]
import Sanbase.Metric.SqlQuery.Helper,
only: [
timerange_parameters: 3,
to_unix_timestamp: 3,
to_unix_timestamp_from_number: 2
]

import Sanbase.DateTimeUtils, only: [maybe_str_to_sec: 1]

@non_dev_events [
"IssueCommentEvent",
Expand Down Expand Up @@ -52,12 +59,24 @@ defmodule Sanbase.Clickhouse.Github.SqlQuery do
end

def dev_activity_contributors_count_query(organizations, from, to, interval) do
{from, to, _interval, span} = timerange_parameters(from, to, interval)

params = %{
interval: maybe_str_to_sec(interval),
organizations: organizations |> Enum.map(&String.downcase/1),
from: from,
to: to,
span: span,
non_dev_events: @non_dev_events
}

# {to_unix_timestamp(interval, "dt", argument_name: "interval")} AS time,
sql =
"""
SELECT time, toUInt32(SUM(uniq_contributors)) AS value
FROM (
SELECT
toUnixTimestamp(intDiv(toUInt32(dt), {{interval}}) * {{interval}}) AS time,
#{to_unix_timestamp(interval, "dt", argument_name: "interval")} AS time,
uniqExact(actor) AS uniq_contributors
FROM #{@table}
PREWHERE
Expand All @@ -69,29 +88,29 @@ defmodule Sanbase.Clickhouse.Github.SqlQuery do
)
GROUP BY time
"""
|> wrap_timeseries_in_gap_filling_query()
|> wrap_timeseries_in_gap_filling_query(interval)

Sanbase.Clickhouse.Query.new(sql, params)
end

{from, to, interval, span} = timerange_parameters(from, to, interval)
def github_activity_contributors_count_query(organizations, from, to, interval) do
{from, to, _interval, span} = timerange_parameters(from, to, interval)

params = %{
interval: interval,
interval: maybe_str_to_sec(interval),
organizations: organizations |> Enum.map(&String.downcase/1),
from: from,
to: to,
non_dev_events: @non_dev_events,
span: span
span: span,
non_dev_events: @non_dev_events
}

Sanbase.Clickhouse.Query.new(sql, params)
end

def github_activity_contributors_count_query(organizations, from, to, interval) do
sql =
"""
SELECT time, toUInt32(SUM(uniq_contributors)) AS value
FROM (
SELECT
toUnixTimestamp(intDiv(toUInt32(dt), {{interval}}) * {{interval}}) AS time,
#{to_unix_timestamp(interval, "dt", argument_name: "interval")} AS time,
uniqExact(actor) AS uniq_contributors
FROM #{@table}
PREWHERE
Expand All @@ -102,28 +121,29 @@ defmodule Sanbase.Clickhouse.Github.SqlQuery do
)
GROUP BY time
"""
|> wrap_timeseries_in_gap_filling_query()
|> wrap_timeseries_in_gap_filling_query(interval)

Sanbase.Clickhouse.Query.new(sql, params)
end

{from, to, interval, span} = timerange_parameters(from, to, interval)
def dev_activity_query(organizations, from, to, interval) do
{from, to, _interval, span} = timerange_parameters(from, to, interval)

params = %{
interval: interval,
interval: maybe_str_to_sec(interval),
organizations: organizations |> Enum.map(&String.downcase/1),
from: from,
to: to,
span: span
span: span,
non_dev_events: @non_dev_events
}

Sanbase.Clickhouse.Query.new(sql, params)
end

def dev_activity_query(organizations, from, to, interval) do
sql =
"""
SELECT time, SUM(events) AS value
FROM (
SELECT
toUnixTimestamp(intDiv(toUInt32(dt), {{interval}}) * {{interval}}) AS time,
#{to_unix_timestamp(interval, "dt", argument_name: "interval")} AS time,
count(events) AS events
FROM (
SELECT any(event) AS events, dt
Expand All @@ -139,29 +159,29 @@ defmodule Sanbase.Clickhouse.Github.SqlQuery do
)
GROUP BY time
"""
|> wrap_timeseries_in_gap_filling_query()
|> wrap_timeseries_in_gap_filling_query(interval)

{from, to, interval, span} = timerange_parameters(from, to, interval)
Sanbase.Clickhouse.Query.new(sql, params)
end

def github_activity_query(organizations, from, to, interval) do
{from, to, _interval, span} = timerange_parameters(from, to, interval)

params = %{
interval: interval,
interval: maybe_str_to_sec(interval),
organizations: organizations |> Enum.map(&String.downcase/1),
from: from,
to: to,
non_dev_events: @non_dev_events,
span: span
span: span,
non_dev_events: @non_dev_events
}

Sanbase.Clickhouse.Query.new(sql, params)
end

def github_activity_query(organizations, from, to, interval) do
sql =
"""
SELECT time, SUM(events) AS value
FROM (
SELECT
toUnixTimestamp(intDiv(toUInt32(dt), {{interval}}) * {{interval}}) AS time,
#{to_unix_timestamp(interval, "dt", argument_name: "interval")} AS time,
count(events) AS events
FROM (
SELECT any(event) AS events, dt
Expand All @@ -176,17 +196,7 @@ defmodule Sanbase.Clickhouse.Github.SqlQuery do
)
GROUP BY time
"""
|> wrap_timeseries_in_gap_filling_query()

{from, to, interval, span} = timerange_parameters(from, to, interval)

params = %{
interval: interval,
organizations: organizations |> Enum.map(&String.downcase/1),
from: from,
to: to,
span: span
}
|> wrap_timeseries_in_gap_filling_query(interval)

Sanbase.Clickhouse.Query.new(sql, params)
end
Expand Down Expand Up @@ -307,12 +317,12 @@ defmodule Sanbase.Clickhouse.Github.SqlQuery do
"""
end

defp wrap_timeseries_in_gap_filling_query(query) do
defp wrap_timeseries_in_gap_filling_query(query, interval) do
"""
SELECT time, SUM(value)
FROM (
SELECT
toUnixTimestamp(intDiv(toUInt32({{from}} + number * {{interval}}), {{interval}}) * {{interval}}) AS time,
#{to_unix_timestamp_from_number(interval, from_argument_name: "from")} AS time,
toUInt32(0) AS value
FROM numbers({{span}})
Expand Down
35 changes: 34 additions & 1 deletion lib/sanbase/metric/sql_query_helper.ex
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,47 @@ defmodule Sanbase.Metric.SqlQuery.Helper do
"if({{#{arg_name}}} = {{#{arg_name}}}, toUnixTimestamp(toDateTime(#{function}(#{dt_column}))), null)"
end

def to_unix_timestamp_from_number(<<digit::utf8, _::binary>> = _interval, opts \\ [])
def to_unix_timestamp_from_number(interval_or_function, opts \\ [])

def to_unix_timestamp_from_number(<<digit::utf8, _::binary>> = _interval, opts)
when digit in ?0..?9 do
interval_name = Keyword.get(opts, :interval_argument_name, "interval")
from_name = Keyword.get(opts, :from_argument_name, "from")

"toUnixTimestamp(intDiv(toUInt32({{#{from_name}}} + number * {{#{interval_name}}}), {{#{interval_name}}}) * {{#{interval_name}}})"
end

def to_unix_timestamp_from_number(function, opts)
when function in @supported_interval_functions do
from_name = Keyword.get(opts, :from_argument_name, "from")

expression =
case function do
"toStartOfMonth" ->
"addMonths(toStartOfMonth(toDateTime({{#{from_name}}})), number)"

"toStartOfWeek" ->
"addDays(toStartOfWeek(toDateTime({{#{from_name}}})), number * 7)"

"toMonday" ->
"addDays(toMonday(toDateTime({{#{from_name}}})), number * 7)"

"toStartOfDay" ->
"addDays(toStartOfDay(toDateTime({{#{from_name}}})), number)"

"toStartOfQuarter" ->
"addQuarters(toStartOfQuarter(toDateTime({{#{from_name}}})), number)"

"toStartOfYear" ->
"addYears(toStartOfYear(toDateTime({{#{from_name}}})), number)"

"toStartOfHour" ->
"addHours(toStartOfHour(toDateTime({{#{from_name}}})), number)"
end

"toUnixTimestamp(toDateTime(#{expression}))"
end

def aggregation(:ohlc, value_column, dt_column) do
"""
argMin(#{value_column}, #{dt_column}) AS open,
Expand Down
Loading

0 comments on commit 6077673

Please sign in to comment.