Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions migrated_sql/.sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[sqlfluff]
## verbose is an integer (0-2) indicating the level of log output
verbose = 0
## Turn off color formatting of output
nocolor = False
## Supported dialects https://docs.sqlfluff.com/en/stable/dialects.html
## Or run 'sqlfluff dialects'
dialect = bigquery
## One of [raw|jinja|python|placeholder]
templater = jinja
## Comma separated list of rules to check, or None for all
rules = None
## Comma separated list of rules to exclude, or None
exclude_rules = AL01,AL04,AL07,AL09,AM03,AM05,CP02,CP03,CV02,CV12,LT05,LT09,LT14,RF01,RF02,RF03,RF04,ST01,ST02,ST05,ST06,ST07
# AL04 - Asks for unique table aliases meaning it complains if selecting from two 2021_07_01 tables as implicit alias is table name (not fully qualified) so same.
# AL07 - Avoid aliases in from and join - why?
# AM03 - if using DESC in one ORDER BY column, then insist on ASC/DESC for all.
# AM05 - INNER JOIN must be fully qualified. Probably should use this but not our style.
# CP02 - Unquoted identifiers (e.g. column names) will be mixed case so don't enforce case
# CP03 - Function names will be mixed case so don't enforce case
# CV02 - Use COALESCE instead of IFNULL or NVL. We think ISNULL is clearer.
# CV12 - Doesn't work with UNNEST. https://github.com/sqlfluff/sqlfluff/issues/6558
# LT05 - We allow longer lines as some of our queries are complex. Maybe should limit in future?
# LT09 - Select targets should be on new lines but sub clauses don't always obey this. Maybe revisit in future?
# LT14 - Keywords on newline. We have some simple, single line joins
# RF01 - BigQuery uses STRUCTS which can look like incorrect table references
# RF02 - Asks for qualified columns for ambiguous ones, but we not qualify our columns, and they are not really ambiguous (or BigQuery would complain)
# RF03 - Insists on references in column names even if not ambiguous. Bit OTT.
# RF04 - Avoids keywords as identifiers but we use this a lot (e.g. AS count, AS max...etc.)
# ST07 - Uses joins instead of USING - why?
# ST06 - Insists on wildcards (*) in certain SELECT order - why?
# ST01 - Do not use ELSE NULL as redundant. But it's clearer!?
# ST05 - Use CTEs instead of subqueries. We don't use this consistently and big rewrite to do that.
# ST02 - Use coalesce instead of case if you can. But it's clearer!?

[sqlfluff:indentation]
tab_space_size = 2
indent_unit = space
indented_using_on = False

[sqlfluff:layout:type:binary_operator]
line_position = trailing

[sqlfluff:templater:jinja:context]
BLINK_DATE_JOIN="AND 1=2"

[tool.sqlfluff.rules.capitalisation.keywords]
capitalisation_policy = "upper"

[sqlfluff:rules:convention.count_rows]
# Consistent syntax to count all rows
prefer_count_0 = True

[sqlfluff:rules:references.special_chars]
# Special characters in identifiers
additional_allowed_characters = ".-${}"
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
193 changes: 193 additions & 0 deletions migrated_sql/histograms/cruxShopifyThemes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
-- Core web vitals by Shopify theme
CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good / (good + needs_improvement + poor) >= 0.75
);

CREATE TEMP FUNCTION IS_POOR(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
poor / (good + needs_improvement + poor) > 0.25
);

CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);

-- Test CrUX data exists
WITH crux_test AS ( -- noqa: ST03
SELECT
1
FROM
`chrome-ux-report.all.${YYYYMM}`
),

-- All Shopify shops in HTTPArchive
archive_pages AS (
SELECT
client,
page AS url,
JSON_VALUE(custom_metrics.ecommerce.Shopify.theme.name) AS theme_name,
JSON_VALUE(custom_metrics.ecommerce.Shopify.theme.theme_store_id) AS theme_store_id
FROM
`httparchive.crawl.pages`
WHERE
date = '${YYYY-MM-DD}' AND
is_root_page AND
JSON_VALUE(custom_metrics.ecommerce.Shopify.theme.name) IS NOT NULL --first grab all shops for market share
)

SELECT
client,
archive_pages.theme_store_id AS id,
theme_names.theme_name AS top_theme_name,
COUNT(DISTINCT origin) AS origins,
-- Origins with good LCP divided by origins with any LCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
) AS pct_good_lcp,
-- Origins with needs improvement are anything not good, nor poor.
1 -
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
)
-
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL)))
AS pct_ni_lcp,
-- Origins with poor LCP divided by origins with any LCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
) AS pct_poor_lcp,

-- Origins with good TTFB divided by origins with any TTFB.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL))
) AS pct_good_ttfb,
-- Origins with needs improvement are anything not good, nor poor.
1 -
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL))
)
-
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL)))
AS pct_ni_ttfb,
-- Origins with poor TTFB divided by origins with any TTFB.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL))
) AS pct_poor_ttfb,

-- Origins with good FCP divided by origins with any FCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_fcp, avg_fcp, slow_fcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp), origin, NULL))
) AS pct_good_fcp,
-- Origins with needs improvement are anything not good, nor poor.
1 -
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_fcp, avg_fcp, slow_fcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp), origin, NULL))
)
-
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(fast_fcp, avg_fcp, slow_fcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp), origin, NULL)))
AS pct_ni_fcp,
-- Origins with poor FCP divided by origins with any FCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(fast_fcp, avg_fcp, slow_fcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp), origin, NULL))
) AS pct_poor_fcp,

-- Origins with good INP divided by origins with any INP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_inp, avg_inp, slow_inp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))
) AS pct_good_inp,
-- Origins with needs improvement are anything not good, nor poor.
1 -
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_inp, avg_inp, slow_inp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))
)
-
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(fast_inp, avg_inp, slow_inp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL)))
AS pct_ni_inp,
-- Origins with poor INP divided by origins with any INP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(fast_inp, avg_inp, slow_inp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))
) AS pct_poor_inp,

-- Origins with good CLS divided by origins with any CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
) AS pct_good_cls,
-- Origins with needs improvement are anything not good, nor poor.
1 -
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
)
-
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL)))
AS pct_ni_cls,
-- Origins with poor CLS divided by origins with any CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_POOR(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
) AS pct_poor_cls,

-- Origins with good LCP, INP (optional), and CLS divided by origins with any LCP and CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(fast_inp, avg_inp, slow_inp) IS NOT FALSE AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL
)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL
))
) AS pct_good_cwv
FROM
`chrome-ux-report.materialized.device_summary`
JOIN archive_pages
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
JOIN (
-- Add in top theme name for a theme store id AS this should usually be the real theme name
SELECT
COUNT(DISTINCT url) AS pages_count,
theme_store_id,
theme_name,
row_number() OVER (PARTITION BY theme_store_id ORDER BY COUNT(DISTINCT url) DESC) AS rank
FROM archive_pages
GROUP BY
theme_store_id,
theme_name
ORDER BY COUNT(DISTINCT url) DESC
) theme_names
-- Include null theme store ids so that we can get full market share within CrUX
ON IFNULL(theme_names.theme_store_id, 'N/A') = IFNULL(archive_pages.theme_store_id, 'N/A')
WHERE
date = '${YYYY-MM-DD}' AND
theme_names.rank = 1
GROUP BY
client,
id,
top_theme_name
ORDER BY
origins DESC
1 change: 0 additions & 1 deletion sql/histograms/dcl.sql → migrated_sql/histograms/dcl.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
1 change: 0 additions & 1 deletion sql/histograms/fcp.sql → migrated_sql/histograms/fcp.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
CREATE TEMPORARY FUNCTION getElements(payload STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
1 change: 0 additions & 1 deletion sql/histograms/ol.sql → migrated_sql/histograms/ol.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
1 change: 0 additions & 1 deletion sql/histograms/tcp.sql → migrated_sql/histograms/tcp.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#standardSQL
SELECT
*,
SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf
Expand Down
Loading