Skip to content

Commit 6c28e4b

Browse files
samdorancgoodfred
andauthored
[COST-4741] - Separate AWS node network costs into a Network unattributed namespace (#5104)
- Add data transfer direction to daily summary - Separate node network costs out into network unattributed * Separate node network costs into a Network unattributed project * Group by lineitem_usagetype It’s not possible to use an aggregation in a GROUP BY statement, so grouping by the lineitem_usagetype is the closest I could think of. It may result in too many distinct rows, though. * Only use Pod data source for Network unattributed costs * Group by lineitem_operation in order to determine correct direction Without this group by, the data transfer direction would not be accurate since the operation value may sometimes contain ‘-in’ and other times ‘-out’. This ensures we are getting the correct direction consistently. * Set data value to NULL for non-applicable direction If set to 0, the records will be inserted twice into the network summary tables because that query is filtering on values where the field is not null, and zero is not null. --------- Co-authored-by: Corey Goodfred <[email protected]>
1 parent ddfeb17 commit 6c28e4b

File tree

2 files changed

+158
-3
lines changed

2 files changed

+158
-3
lines changed

koku/masu/database/sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
2121
infrastructure_project_raw_cost,
2222
infrastructure_usage_cost,
2323
supplementary_usage_cost,
24+
infrastructure_data_in_gigabytes,
25+
infrastructure_data_out_gigabytes,
2426
pod_usage_cpu_core_hours,
2527
pod_request_cpu_core_hours,
2628
pod_limit_cpu_core_hours,
@@ -67,6 +69,14 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
6769
sum(coalesce(nullif(ocp_aws.savingsplan_effective_cost, 0), ocp_aws.unblended_cost) + coalesce(nullif(ocp_aws.markup_cost_savingsplan, 0), ocp_aws.markup_cost)) AS infrastructure_project_raw_cost,
6870
'{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as infrastructure_usage_cost,
6971
'{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as supplementary_usage_cost,
72+
CASE
73+
WHEN upper(data_transfer_direction) = 'IN' THEN sum(infrastructure_data_in_gigabytes)
74+
ELSE NULL
75+
END as infrastructure_data_in_gigabytes,
76+
CASE
77+
WHEN upper(data_transfer_direction) = 'OUT' THEN sum(infrastructure_data_out_gigabytes)
78+
ELSE NULL
79+
END as infrastructure_data_out_gigabytes,
7080
0 as pod_usage_cpu_core_hours,
7181
0 as pod_request_cpu_core_hours,
7282
0 as pod_limit_cpu_core_hours,
@@ -102,5 +112,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
102112
ocp_aws.persistentvolumeclaim,
103113
ocp_aws.resource_id,
104114
ocp_aws.pod_labels,
115+
ocp_aws.data_transfer_direction,
105116
rp.provider_id
106117
;

koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql

Lines changed: 147 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.aws_openshift_daily_resource_matched_temp
174174
region,
175175
unit,
176176
usage_amount,
177+
data_transfer_direction,
177178
currency_code,
178179
unblended_cost,
179180
blended_cost,
@@ -200,6 +201,19 @@ SELECT cast(uuid() as varchar) as uuid,
200201
nullif(aws.product_region, '') as region,
201202
max(nullif(aws.pricing_unit, '')) as unit,
202203
sum(aws.lineitem_usageamount) as usage_amount,
204+
-- Determine network direction
205+
CASE
206+
-- Is this a network record?
207+
WHEN max(aws.lineitem_productcode) = 'AmazonEC2' AND max(aws.product_productfamily) = 'Data Transfer' THEN
208+
-- Yes, it's a network record. What's the direction?
209+
CASE
210+
WHEN strpos(lower(max(aws.lineitem_usagetype)), 'in-bytes') > 0 THEN 'IN'
211+
WHEN strpos(lower(max(aws.lineitem_usagetype)), 'out-bytes') > 0 THEN 'OUT'
212+
WHEN (strpos(lower(max(aws.lineitem_usagetype)), 'regional-bytes') > 0 AND strpos(lower(max(lineitem_operation)), '-in') > 0) THEN 'IN'
213+
WHEN (strpos(lower(max(aws.lineitem_usagetype)), 'regional-bytes') > 0 AND strpos(lower(max(lineitem_operation)), '-out') > 0) THEN 'OUT'
214+
ELSE NULL
215+
END
216+
END AS data_transfer_direction,
203217
max(nullif(aws.lineitem_currencycode, '')) as currency_code,
204218
sum(aws.lineitem_unblendedcost) as unblended_cost,
205219
sum(aws.lineitem_blendedcost) as blended_cost,
@@ -228,13 +242,15 @@ WHERE aws.source = {{aws_source_uuid}}
228242
AND aws.resource_id_matched = TRUE
229243
GROUP BY aws.lineitem_usagestartdate,
230244
aws.lineitem_resourceid,
231-
4, -- CASE satement
245+
4, -- product_code
232246
aws.product_productfamily,
233247
aws.product_instancetype,
234248
aws.lineitem_availabilityzone,
235249
aws.product_region,
250+
aws.lineitem_usagetype,
236251
aws.resourcetags,
237-
aws.costcategory
252+
aws.costcategory,
253+
lineitem_operation
238254
;
239255

240256
INSERT INTO hive.{{schema | sqlsafe}}.aws_openshift_daily_tag_matched_temp (
@@ -321,7 +337,7 @@ WHERE aws.source = {{aws_source_uuid}}
321337
AND (aws.resource_id_matched = FALSE OR aws.resource_id_matched IS NULL)
322338
GROUP BY aws.lineitem_usagestartdate,
323339
aws.lineitem_resourceid,
324-
4, -- CASE satement
340+
4, -- product_code
325341
aws.product_productfamily,
326342
aws.product_instancetype,
327343
aws.lineitem_availabilityzone,
@@ -448,6 +464,8 @@ SELECT aws.uuid as aws_uuid,
448464
AND aws.ocp_source = {{ocp_source_uuid}}
449465
AND aws.year = {{year}}
450466
AND aws.month = {{month}}
467+
-- Filter out Node Network Costs since they cannot be attributed to a namespace and are accounted for later
468+
AND aws.data_transfer_direction IS NULL
451469
GROUP BY aws.uuid, ocp.namespace, ocp.pod_labels
452470
;
453471

@@ -604,6 +622,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily
604622
region,
605623
unit,
606624
usage_amount,
625+
data_transfer_direction,
607626
currency_code,
608627
unblended_cost,
609628
markup_cost,
@@ -652,6 +671,7 @@ SELECT pds.aws_uuid,
652671
region,
653672
unit,
654673
usage_amount / aws_uuid_count as usage_amount,
674+
NULL AS data_transfer_direction,
655675
currency_code,
656676
CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod'
657677
THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * unblended_cost
@@ -721,6 +741,118 @@ LEFT JOIN postgres.{{schema | sqlsafe}}.reporting_awsaccountalias AS aa
721741
WHERE pds.ocp_source = {{ocp_source_uuid}} AND year = {{year}} AND month = {{month}}
722742
;
723743

744+
-- Put Node Network Costs into the Network unattributed namespace
745+
INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary (
746+
aws_uuid,
747+
cluster_id,
748+
cluster_alias,
749+
data_source,
750+
namespace,
751+
node,
752+
persistentvolumeclaim,
753+
persistentvolume,
754+
storageclass,
755+
resource_id,
756+
usage_start,
757+
usage_end,
758+
product_code,
759+
product_family,
760+
instance_type,
761+
usage_account_id,
762+
account_alias_id,
763+
availability_zone,
764+
region,
765+
unit,
766+
usage_amount,
767+
data_transfer_direction,
768+
currency_code,
769+
unblended_cost,
770+
markup_cost,
771+
blended_cost,
772+
markup_cost_blended,
773+
savingsplan_effective_cost,
774+
markup_cost_savingsplan,
775+
calculated_amortized_cost,
776+
markup_cost_amortized,
777+
pod_cost,
778+
project_markup_cost,
779+
pod_labels,
780+
tags,
781+
aws_cost_category,
782+
cost_category_id,
783+
aws_source,
784+
ocp_source,
785+
year,
786+
month,
787+
day
788+
)
789+
SELECT
790+
aws.uuid AS aws_uuid,
791+
max(cluster_id),
792+
max(cluster_alias),
793+
max(data_source),
794+
'Network unattributed' AS namespace,
795+
ocp.node AS node,
796+
max(persistentvolumeclaim),
797+
max(persistentvolume),
798+
max(storageclass),
799+
max(aws.resource_id),
800+
max(aws.usage_start),
801+
max(usage_end),
802+
max(product_code),
803+
max(product_family),
804+
max(instance_type),
805+
max(usage_account_id),
806+
max(aa.id) AS account_alias_id,
807+
max(availability_zone),
808+
max(region),
809+
max(unit),
810+
max(usage_amount),
811+
data_transfer_direction,
812+
max(currency_code),
813+
max(unblended_cost),
814+
max(unblended_cost) * cast({{markup}} AS decimal(24,9)),
815+
max(blended_cost),
816+
max(blended_cost) * cast({{markup}} AS decimal(24,9)),
817+
max(savingsplan_effective_cost),
818+
max(savingsplan_effective_cost) * cast({{markup}} AS decimal(24,9)),
819+
max(calculated_amortized_cost),
820+
max(calculated_amortized_cost) * cast({{markup}} AS decimal(33,9)),
821+
max(unblended_cost) AS pod_cost,
822+
max(unblended_cost) * cast({{markup}} AS decimal(24,9)) AS project_markup_cost,
823+
max(ocp.pod_labels),
824+
cast(NULL AS varchar) AS tags,
825+
cast(NULL AS varchar) AS aws_cost_category,
826+
max(cost_category_id),
827+
max({{aws_source_uuid}}) AS aws_source,
828+
max({{ocp_source_uuid}}) AS ocp_source,
829+
max(cast(year(aws.usage_start) AS varchar)) AS year,
830+
max(cast(month(aws.usage_start) AS varchar)) AS month,
831+
max(cast(day(aws.usage_start) AS varchar)) AS day
832+
FROM hive.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS ocp
833+
JOIN hive.{{schema | sqlsafe}}.aws_openshift_daily_resource_matched_temp AS aws
834+
ON aws.usage_start = ocp.usage_start
835+
AND strpos(aws.resource_id, ocp.resource_id) != 0
836+
LEFT JOIN postgres.{{schema | sqlsafe}}.reporting_awsaccountalias AS aa
837+
ON aws.usage_account_id = aa.account_id
838+
WHERE ocp.source = {{ocp_source_uuid}}
839+
AND ocp.year = {{year}}
840+
AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
841+
AND ocp.day IN {{days | inclause}}
842+
AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '')
843+
AND aws.ocp_source = {{ocp_source_uuid}}
844+
AND aws.year = {{year}}
845+
AND aws.month = {{month}}
846+
-- Network related costs
847+
AND aws.data_transfer_direction IS NOT NULL
848+
-- Storage and Pod can have the same resource_id and we want the Pod
849+
AND ocp.data_source = 'Pod'
850+
GROUP BY
851+
aws.uuid,
852+
ocp.node,
853+
aws.data_transfer_direction
854+
;
855+
724856
INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary_p (
725857
uuid,
726858
report_period_id,
@@ -745,6 +877,9 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_d
745877
region,
746878
unit,
747879
usage_amount,
880+
infrastructure_data_in_gigabytes,
881+
infrastructure_data_out_gigabytes,
882+
data_transfer_direction,
748883
currency_code,
749884
unblended_cost,
750885
markup_cost,
@@ -785,6 +920,15 @@ SELECT uuid(),
785920
region,
786921
unit,
787922
usage_amount,
923+
CASE
924+
WHEN upper(data_transfer_direction) = 'IN' THEN usage_amount
925+
ELSE 0
926+
END AS infrastructure_data_in_gigabytes,
927+
CASE
928+
WHEN upper(data_transfer_direction) = 'OUT' THEN usage_amount
929+
ELSE 0
930+
END AS infrastructure_data_out_gigabytes,
931+
data_transfer_direction,
788932
currency_code,
789933
unblended_cost,
790934
markup_cost,

0 commit comments

Comments
 (0)