Skip to content

Commit bdd992d

Browse files
cgoodfredsamdoran
andauthored
[COST-4745] OCPGCP Network data processing SQL (#5058)
* [COST-4745] OCPGCP Network data processing SQL --------- Co-authored-by: Sam Doran <[email protected]>
1 parent 07ae2b8 commit bdd992d

File tree

3 files changed

+174
-1
lines changed

3 files changed

+174
-1
lines changed

dev/scripts/nise_ymls/ocp_on_gcp/gcp_static_data.yml

+25
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ generators:
44
start_date: {{start_date}}
55
end_date: {{end_date}}
66
price: 2
7+
sku_id: CF4E-A0C7-E3BF
78
usage.amount_in_pricing_units: 1
89
usage.pricing_unit: hour
910
currency: USD
@@ -12,6 +13,30 @@ generators:
1213
resource.name: projects/nise-populator/instances/gcp_compute1
1314
resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
1415
labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
16+
- ComputeEngineGenerator:
17+
start_date: {{start_date}}
18+
end_date: {{end_date}}
19+
price: 2
20+
sku_id: BBF8-C07D-1DF4 #inbound data transfer
21+
usage.amount_in_pricing_units: 50
22+
currency: USD
23+
instance_type: m2-megamem-416
24+
location.region: australia-southeast1-a
25+
resource.name: projects/nise-populator/instances/gcp_compute1
26+
resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
27+
labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
28+
- ComputeEngineGenerator:
29+
start_date: {{start_date}}
30+
end_date: {{end_date}}
31+
price: 30
32+
sku_id: 9DE9-9092-B3BC # outbound data transfer
33+
usage.amount_in_pricing_units: 10
34+
currency: USD
35+
instance_type: m2-megamem-416
36+
location.region: australia-southeast1-a
37+
resource.name: projects/nise-populator/instances/gcp_compute1
38+
resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
39+
labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
1540
- ComputeEngineGenerator:
1641
start_date: {{start_date}}
1742
end_date: {{end_date}}

koku/masu/database/sql/reporting_ocpgcp_ocp_infrastructure_back_populate.sql

+11
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
1919
source_uuid,
2020
infrastructure_raw_cost,
2121
infrastructure_project_raw_cost,
22+
infrastructure_data_in_gigabytes,
23+
infrastructure_data_out_gigabytes,
2224
infrastructure_usage_cost,
2325
supplementary_usage_cost,
2426
pod_usage_cpu_core_hours,
@@ -65,6 +67,14 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
6567
rp.provider_id as source_uuid,
6668
sum(ocp_gcp.unblended_cost + ocp_gcp.markup_cost + ocp_gcp.credit_amount) AS infrastructure_raw_cost,
6769
sum(ocp_gcp.unblended_cost + ocp_gcp.project_markup_cost + ocp_gcp.pod_credit) AS infrastructure_project_raw_cost,
70+
CASE
71+
WHEN upper(data_transfer_direction) = 'IN' THEN sum(infrastructure_data_in_gigabytes)
72+
ELSE NULL
73+
END as infrastructure_data_in_gigabytes,
74+
CASE
75+
WHEN upper(data_transfer_direction) = 'OUT' THEN sum(infrastructure_data_out_gigabytes)
76+
ELSE NULL
77+
END as infrastructure_data_out_gigabytes,
6878
'{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as infrastructure_usage_cost,
6979
'{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as supplementary_usage_cost,
7080
0 as pod_usage_cpu_core_hours,
@@ -101,5 +111,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
101111
ocp_gcp.persistentvolumeclaim,
102112
ocp_gcp.resource_id,
103113
ocp_gcp.pod_labels,
114+
ocp_gcp.data_transfer_direction,
104115
rp.provider_id
105116
;

koku/masu/database/trino_sql/gcp/openshift/reporting_ocpgcpcostlineitem_daily_summary_resource_id.sql

+138-1
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.gcp_openshift_daily_resource_matched_temp
181181
instance_type,
182182
service_id,
183183
service_alias,
184+
data_transfer_direction,
184185
sku_id,
185186
sku_alias,
186187
region,
@@ -205,6 +206,11 @@ SELECT cast(uuid() as varchar),
205206
json_extract_scalar(json_parse(gcp.system_labels), '$["compute.googleapis.com/machine_spec"]') as instance_type,
206207
gcp.service_id,
207208
max(nullif(gcp.service_description, '')) as service_alias,
209+
CASE
210+
WHEN service_description = 'Compute Engine' AND STRPOS(lower(sku_description), 'data transfer in') != 0 THEN 'IN'
211+
WHEN service_description = 'Compute Engine' AND STRPOS(lower(sku_description), 'data transfer') != 0 THEN 'OUT'
212+
ELSE NULL
213+
END as data_transfer_direction,
208214
max(nullif(gcp.sku_id, '')) as sku_id,
209215
max(nullif(gcp.sku_description, '')) as sku_alias,
210216
gcp.location_region as region,
@@ -233,7 +239,8 @@ GROUP BY gcp.usage_start_time,
233239
gcp.service_id,
234240
gcp.location_region,
235241
gcp.invoice_month,
236-
gcp.labels
242+
gcp.labels,
243+
10 -- data transfer direction
237244
;
238245

239246
INSERT INTO hive.{{schema | sqlsafe}}.gcp_openshift_daily_tag_matched_temp (
@@ -438,6 +445,8 @@ WHERE ocp.source = {{ocp_source_uuid}}
438445
AND gcp.ocp_source = {{ocp_source_uuid}}
439446
AND gcp.year = {{year}}
440447
AND gcp.month = {{month}}
448+
-- Filter out Node Network Costs because they cannot be tied to namespace level
449+
AND data_transfer_direction IS NULL
441450
GROUP BY gcp.uuid, ocp.namespace, ocp.data_source, ocp.pod_labels, ocp.volume_labels
442451
;
443452

@@ -590,6 +599,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily
590599
instance_type,
591600
service_id,
592601
service_alias,
602+
data_transfer_direction,
593603
sku_id,
594604
sku_alias,
595605
region,
@@ -657,6 +667,7 @@ SELECT pds.gcp_uuid,
657667
instance_type,
658668
service_id,
659669
service_alias,
670+
NULL as data_transfer_direction,
660671
sku_id,
661672
sku_alias,
662673
region,
@@ -711,6 +722,110 @@ JOIN cte_rankings as r
711722
WHERE pds.ocp_source = {{ocp_source_uuid}} AND pds.year = {{year}} AND pds.month = {{month}}
712723
;
713724

725+
-- Network costs are currently not mapped to pod metrics
726+
-- and are filtered out of the above SQL since that is grouped by namespace
727+
-- and costs are split out by pod metrics, this puts all network costs per node
728+
-- into a "Network unattributed" project with no cost split and one record per
729+
-- data direction
730+
INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily_summary (
731+
gcp_uuid,
732+
cluster_id,
733+
cluster_alias,
734+
data_source,
735+
namespace,
736+
node,
737+
persistentvolumeclaim,
738+
persistentvolume,
739+
storageclass,
740+
resource_id,
741+
usage_start,
742+
usage_end,
743+
account_id,
744+
project_id,
745+
project_name,
746+
instance_type,
747+
service_id,
748+
service_alias,
749+
data_transfer_direction,
750+
sku_id,
751+
sku_alias,
752+
region,
753+
unit,
754+
usage_amount,
755+
currency,
756+
invoice_month,
757+
credit_amount,
758+
unblended_cost,
759+
markup_cost,
760+
project_markup_cost,
761+
pod_cost,
762+
pod_credit,
763+
tags,
764+
cost_category_id,
765+
gcp_source,
766+
ocp_source,
767+
year,
768+
month,
769+
day
770+
)
771+
SELECT gcp.uuid as gcp_uuid,
772+
max(ocp.cluster_id) as cluster_id,
773+
max(ocp.cluster_alias) as cluster_alias,
774+
max(ocp.data_source),
775+
'Network unattributed' as namespace,
776+
ocp.node as node,
777+
max(nullif(ocp.persistentvolumeclaim, '')) as persistentvolumeclaim,
778+
max(nullif(ocp.persistentvolume, '')) as persistentvolume,
779+
max(nullif(ocp.storageclass, '')) as storageclass,
780+
max(ocp.resource_id) as resource_id,
781+
max(gcp.usage_start) as usage_start,
782+
max(gcp.usage_start) as usage_end,
783+
max(gcp.account_id) as account_id,
784+
max(gcp.project_id) as project_id,
785+
max(gcp.project_name) as project_name,
786+
max(instance_type) as instance_type,
787+
max(nullif(gcp.service_id, '')) as service_id,
788+
max(gcp.service_alias) as service_alias,
789+
max(data_transfer_direction) as data_transfer_direction,
790+
max(gcp.sku_id) as sku_id,
791+
max(gcp.sku_alias) as sku_alias,
792+
max(nullif(gcp.region, '')) as region,
793+
max(gcp.unit) as unit,
794+
max(gcp.usage_amount) as usage_amount,
795+
max(gcp.currency) as currency,
796+
max(gcp.invoice_month) as invoice_month,
797+
max(gcp.credit_amount) as credit_amount,
798+
max(gcp.unblended_cost) as unblended_cost,
799+
max(gcp.unblended_cost * {{markup | sqlsafe}}) as markup_cost,
800+
max(gcp.unblended_cost * {{markup | sqlsafe}}) AS project_markup_cost,
801+
max(gcp.unblended_cost) AS pod_cost,
802+
cast(NULL AS double) AS pod_credit,
803+
max(gcp.labels) as tags,
804+
max(ocp.cost_category_id) as cost_category_id,
805+
{{gcp_source_uuid}} as gcp_source,
806+
{{ocp_source_uuid}} as ocp_source,
807+
cast(year(max(gcp.usage_start)) as varchar) as year,
808+
cast(month(max(gcp.usage_start)) as varchar) as month,
809+
cast(day(max(gcp.usage_start)) as varchar) as day
810+
FROM hive.{{ schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
811+
JOIN hive.{{schema | sqlsafe}}.gcp_openshift_daily_resource_matched_temp as gcp
812+
ON gcp.usage_start = ocp.usage_start
813+
AND (
814+
(strpos(gcp.resource_name, ocp.node) != 0 AND ocp.data_source='Pod')
815+
)
816+
WHERE ocp.source = {{ocp_source_uuid}}
817+
AND ocp.year = {{year}}
818+
AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
819+
AND ocp.day IN {{days | inclause}}
820+
AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '')
821+
AND gcp.ocp_source = {{ocp_source_uuid}}
822+
AND gcp.year = {{year}}
823+
AND gcp.month = {{month}}
824+
-- Filter for Node Network Costs to tie them to the Network unattributed project
825+
AND data_transfer_direction IS NOT NULL
826+
GROUP BY gcp.uuid, ocp.node
827+
;
828+
714829
INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily_summary_p (
715830
uuid,
716831
report_period_id,
@@ -733,6 +848,9 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_d
733848
instance_type,
734849
service_id,
735850
service_alias,
851+
infrastructure_data_in_gigabytes,
852+
infrastructure_data_out_gigabytes,
853+
data_transfer_direction,
736854
sku_id,
737855
sku_alias,
738856
region,
@@ -771,6 +889,25 @@ SELECT uuid(),
771889
instance_type,
772890
service_id,
773891
service_alias,
892+
CASE
893+
WHEN upper(data_transfer_direction) = 'IN' THEN
894+
-- GCP uses gibibyte but we are tracking this field in gigabytes
895+
CASE unit
896+
WHEN 'gibibyte' THEN usage_amount * 1.07374
897+
ELSE usage_amount
898+
END
899+
ELSE 0
900+
END as infrastructure_data_in_gigabytes,
901+
CASE
902+
WHEN upper(data_transfer_direction) = 'OUT' THEN
903+
-- GCP uses gibibyte but we are tracking this field in gigabytes
904+
CASE unit
905+
WHEN 'gibibyte' THEN usage_amount * 1.07374
906+
ELSE usage_amount
907+
END
908+
ELSE 0
909+
END as infrastructure_data_out_gigabytes,
910+
data_transfer_direction as data_transfer_direction,
774911
sku_id,
775912
sku_alias,
776913
region,

0 commit comments

Comments
 (0)