diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..169af70 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +volumes: + minio-data: + +services: + minio: + image: minio/minio + ports: + - 9000:9000 + - 9001:9001 + command: server /data --console-address ":9001" + volumes: + - "minio-data:/data" + environment: + MINIO_ROOT_USER: admin + MINIO_ROOT_PASSWORD: 12345678 diff --git a/generate_test_dataset_parquet_influx.py b/generate_test_dataset_parquet_influx.py new file mode 100644 index 0000000..4ce6c5b --- /dev/null +++ b/generate_test_dataset_parquet_influx.py @@ -0,0 +1,49 @@ +import datetime +import random +import uuid + +import pandas + +NUM_OF_ASSETS = 250 +KPIs = ["HeatIn_Q1", "Heat_flow1", "PostProc_Velocity1", "HeatIn_Q2", "Heat_flow2", "PostProc_Velocity2", "HeatIn_Q3", "Heat_flow3", "PostProc_Velocity3", "HeatIn_Q4"] +START_DATETIME = datetime.datetime.fromisoformat('2020-01-01T00:00:00+00:00') +END_DATETIME = datetime.datetime.fromisoformat('2021-01-01T00:00:00+00:00') +RESOLUTION = datetime.timedelta(minutes=15) + +CARRIER_ID = str(uuid.uuid4()) +SIMULATION_RUN_ID = str(uuid.uuid4()) + +esdl_id = str(uuid.uuid4()) +asset_ids = [str(uuid.uuid4()) for _ in range(0, NUM_OF_ASSETS)] + + +times = [] +carrier_ids = [] +asset_ids_ = [] +asset_classes = [] +asset_names = [] +capabilities = [] +simulation_runs = [] +simulation_types = [] +kpis = {} + +current_time = START_DATETIME +while current_time < END_DATETIME: + for asset_i, asset_id in enumerate(asset_ids): + times.append(current_time) + carrier_ids.append(CARRIER_ID) + asset_ids_.append(asset_id) + asset_names.append(asset_id) + asset_classes.append(random.choice(['HeatingDemand', 'Pipe', 'ResidualHeatSource'])) + capabilities.append(random.choice(['Consumer', 'Transport', 'Producer'])) + simulation_runs.append(SIMULATION_RUN_ID) + simulation_types.append("EndScenarioSizingDiscountedStagedHIGHS") + + for kpi in KPIs: + kpis.setdefault(kpi, []).append(random.uniform(0, 10)) + + current_time = current_time + RESOLUTION + +df = pandas.DataFrame({'time': times, 'carrier_id': carrier_ids, 'asset_id': asset_ids_, 'asset_class': asset_classes, 'asset_name': asset_names, 'capabilities': capabilities, 'simulation_run': simulation_runs, 'simulation_type': simulation_types, **kpis}) +print(df) +df.to_parquet('test.parquet') diff --git a/generate_test_dataset_parquet_single_file_per_asset_part.py b/generate_test_dataset_parquet_single_file_per_asset_part.py new file mode 100644 index 0000000..7587aba --- /dev/null +++ b/generate_test_dataset_parquet_single_file_per_asset_part.py @@ -0,0 +1,39 @@ +import datetime +import random +import uuid + +import pandas + +NUM_OF_ASSETS = 250 +KPIs = ["HeatIn_Q1", "Heat_flow1", "PostProc_Velocity1", "HeatIn_Q2", "Heat_flow2", "PostProc_Velocity2", "HeatIn_Q3", "Heat_flow3", "PostProc_Velocity3", "HeatIn_Q4"] +START_DATETIME = datetime.datetime.fromisoformat('2020-01-01T00:00:00+00:00') +END_DATETIME = datetime.datetime.fromisoformat('2021-01-01T00:00:00+00:00') +RESOLUTION = datetime.timedelta(minutes=15) + +CARRIER_ID = str(uuid.uuid4()) +SIMULATION_RUN_ID = str(uuid.uuid4()) + +esdl_id = str(uuid.uuid4()) +asset_ids = [str(uuid.uuid4()) for _ in range(0, NUM_OF_ASSETS)] + + +df_times = [] +df_kpis = {} +df_asset_ids = [] +for asset_i, asset_id in enumerate(asset_ids): + current_time = START_DATETIME + + while current_time < END_DATETIME: + df_times.append(current_time) + + for kpi in KPIs: + df_kpis.setdefault(kpi, []).append(random.uniform(0, 10)) + df_asset_ids.append(asset_id) + current_time = current_time + RESOLUTION + + print(f'Done {asset_i}/{len(asset_ids)}') + +print('Writing out results') +df = pandas.DataFrame({'time': df_times, 'asset_id': df_asset_ids, **df_kpis}) +df.to_parquet(f'single_file_per_asset_part/{esdl_id}', partition_cols=['asset_id']) +print('Done!') diff --git a/generate_test_dataset_parquet_single_file_per_asset_part_with_carrier.py b/generate_test_dataset_parquet_single_file_per_asset_part_with_carrier.py new file mode 100644 index 0000000..2d9161b --- /dev/null +++ b/generate_test_dataset_parquet_single_file_per_asset_part_with_carrier.py @@ -0,0 +1,42 @@ +import datetime +import random +import uuid + +import polars + +NUM_OF_ASSETS = 250 +KPIs = ["HeatIn_Q1", "Heat_flow1", "PostProc_Velocity1", "HeatIn_Q2", "Heat_flow2", "PostProc_Velocity2", "HeatIn_Q3", "Heat_flow3", "PostProc_Velocity3", "HeatIn_Q4"] +START_DATETIME = datetime.datetime.fromisoformat('2020-01-01T00:00:00+00:00') +END_DATETIME = datetime.datetime.fromisoformat('2021-01-01T00:00:00+00:00') +RESOLUTION = datetime.timedelta(minutes=15) + +CARRIER_IDS = [str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4())] +SIMULATION_RUN_ID = str(uuid.uuid4()) + +esdl_id = str(uuid.uuid4()) +asset_ids = [str(uuid.uuid4()) for _ in range(0, NUM_OF_ASSETS)] + + +df_times = [] +df_kpis = {} +df_asset_ids = [] +df_carrier_ids = [] +for carrier_id in CARRIER_IDS: + for asset_i, asset_id in enumerate(asset_ids): + current_time = START_DATETIME + + while current_time < END_DATETIME: + df_times.append(current_time) + + for kpi in KPIs: + df_kpis.setdefault(kpi, []).append(random.uniform(0, 10)) + df_asset_ids.append(asset_id) + df_carrier_ids.append(carrier_id) + current_time = current_time + RESOLUTION + + print(f'Done {asset_i}/{len(asset_ids)}') + +print('Writing out results') +df = polars.DataFrame({'time': df_times, 'asset_id': df_asset_ids, 'carrier_id': df_carrier_ids, **df_kpis}) +df.write_parquet(f'single_file_per_asset_part_with_carrier/{esdl_id}', partition_by=['asset_id', 'carrier_id']) +print('Done!') diff --git a/query_influx_style.py b/query_influx_style.py new file mode 100644 index 0000000..342fe16 --- /dev/null +++ b/query_influx_style.py @@ -0,0 +1,24 @@ +import time + +import duckdb + +con = duckdb.connect(":memory:") +con.sql(""" +CREATE SECRET secret1 ( + TYPE S3, + PROVIDER CREDENTIAL_CHAIN, + CHAIN 'env;config', + REGION 'XX-XXXX-X ', + ENDPOINT 'localhost:9000', + URL_STYLE 'path', + USE_SSL false, + KEY_ID 'test', + SECRET '12345678' +); +""") + +start = time.time() +print(len(con.sql('SELECT time, PostProc_Velocity1 FROM read_parquet([\'s3://test-parquet/test.parquet\']) WHERE asset_id = \'77563ead-7bba-4739-abb4-47b415655062\';').fetchall())) +end = time.time() + +print(f'Took {end - start} seconds') diff --git a/query_single_file_per_asset.py b/query_single_file_per_asset.py new file mode 100644 index 0000000..cbab412 --- /dev/null +++ b/query_single_file_per_asset.py @@ -0,0 +1,300 @@ +asset_ids = ['0201ce34-cb70-4728-be09-e9748de64d6f', +'020c352a-3d35-4b2f-9150-2387fa82e27b', +'02ccff7b-4713-4502-a7d5-5561cef5246f', +'0301d946-166b-406e-a3ee-9ebe50f8da22', +'0330bd32-36d0-4ea4-bbd0-d2c81907d910', +'04d37b4b-5761-4f85-8754-796e337b5d57', +'067cc051-a494-4ec8-a9fb-c85d081f9cd8', +'0692f115-773a-4b71-a11b-5f58cd92156c', +'07620812-22a9-4786-8c97-4c635478c8a4', +'0826b1c4-288b-4c63-afd6-41fd3605f05b', +'0973c883-0ac0-4d78-b65c-b6d2ceed5d9b', +'09feca31-3a7b-4e02-87d8-2f2f32233464', +'0c86e2d4-e53a-4534-ad33-20ba530925a1', +'0c896f87-1969-48cd-8920-266cd44f3c43', +'0ce6d9f2-f9d0-4cbb-9e6c-ada598588e6b', +'0d312d47-42a9-4aa4-9390-03a5312dbdcb', +'0d61f567-4082-4376-bbb7-ee02d78885e3', +'0e56849c-fb1f-4879-a512-e41db5c439bf', +'0f718fb7-4e27-4d31-bc0f-78abc7e9b844', +'101b59fe-e1e4-4f7a-a35b-acc997a964a5', +'10c6467d-1834-4cbf-a34a-d065b3d7c213', +'1284ea34-0aa7-499e-a458-baaf79bd7bdb', +'137df02d-48a3-45ae-954d-389159dbd57a', +'13d474e6-7f16-4649-aaed-afc6fb84d360', +'1428ec4a-bdfa-4dc7-9f16-5d496076d2d8', +'14355b50-867f-4504-b187-68a56abbc68c', +'14bd333b-c99f-4156-b3d0-ff2a184f86ed', +'18b56ec7-b702-4188-a5ff-f2a8e3f8ec04', +'19c6147c-a1d2-4020-815b-e2d6277f463c', +'1ab625f0-b132-444d-b6ba-f5ec701a21b6', +'1b4bbba3-f032-4a79-8c97-09c76a8a2607', +'1bc54eb9-6dae-47db-91de-5370877b6bf4', +'1bcf1044-b68c-4a4e-a3e6-8e2f32e389d5', +'1c68c67a-55f9-430b-bc5f-53db8cd885dd', +'1d69002e-b061-4b4d-b003-a1e1ca826bb9', +'1e4eda8f-a06a-4056-8d97-f79c8515df9a', +'1f452ffd-05e4-42cf-9b66-9a2d9bf19b73', +'21aa8793-93cf-43cd-a753-b8e5a1b41f77', +'2571fc29-c49b-4072-bf70-72250386857a', +'279564f8-4812-4458-88d9-90fd3e91751e', +'280600b2-5fd6-49cb-a09b-3b2d247fb4cf', +'285d17c4-db90-4322-89dd-c86b139148f8', +'298e46b2-8287-4ac8-b003-2e7e1c87b6a0', +'2b52df81-eaee-42de-b876-916193e4ae43', +'2be1c5f5-9b5e-4c4b-b1c0-9d2fd2214195', +'2eee4013-85f3-4b6d-8906-ac30ca576896', +'2fadf62b-0f9c-472a-9549-7e775705532d', +'2fc0a688-75ea-4841-9d51-dd5050567eb3', +'3099b166-fbea-47d6-b0a7-4e9a16178f86', +'3119f7e8-6711-4007-a208-d2403a332c37', +'31f66447-1ab4-473b-82bf-2eddf9e1e9b6', +'327f913b-0253-4ba9-838f-fdcb68d8d98a', +'3583f4d4-4b99-4a9f-916c-14e4eb491083', +'3608afe0-42bb-4d7c-ab1e-6bb88997b456', +'36dae341-f8cd-4b4e-a503-8b2983967e00', +'379f126b-f33e-4dd1-b1ec-1c6375359ec4', +'38041c32-5053-451e-bb6e-0c7e784658a3', +'387acfef-b279-4014-8409-88e58a84a9ea', +'3912e6c2-fa09-4e42-8759-9a777d743885', +'39fdb989-d6ad-48ca-bde2-40cf2d45d1a6', +'3ab5730d-1d80-4fd0-bfd4-0e65f6559d09', +'3d96376a-47cb-4fc3-987f-b70817eaab5c', +'3f5b27a8-4f29-4006-a90f-5e47823dd377', +'42b3b855-7393-4cde-9ad5-1c15065e5c5d', +'43285246-ebea-4674-95f2-024e975c4cfc', +'44040115-2fbd-43d8-b6df-ed10b2e24356', +'44274ceb-befc-41c4-b510-45ddd47ee1f7', +'456d3d62-efe9-4d22-985e-544a300bdc50', +'4630eb91-1e3a-466d-8856-89dfab75330f', +'46dffd8f-170b-4469-8e0f-1bc5b3ad0167', +'47215ffc-ab96-4850-8a80-215ee8533478', +'48c446e2-64c7-4e1f-b4af-acc1d6780aff', +'49c6e03a-6720-4bf1-8a5e-d22c159fcd6a', +'49f3e26a-fc78-4fee-b0ea-ca9ae179af6a', +'4b20cd55-9f5c-4f0a-8a96-2c2fdb1d5cc8', +'4b2fd6dc-8596-422a-bb51-5179f788db26', +'4bacf0e7-a50c-41a4-bcbb-f2914a16c991', +'4bb48c2f-02a1-43a7-860f-8cc235588dc2', +'4f8cfbed-c07f-4282-88c2-2127f7f2ee8b', +'508e1480-3cb3-4b23-bdbc-b734749cb40f', +'50a21f7b-3917-4842-9390-e8720960c475', +'5187bd9e-f157-4cb3-adb2-9fb0a7b40063', +'5212c81b-5949-4b8d-9431-c1414b12547a', +'5280e4c2-4cd1-4876-8b55-149c69f0771a', +'533aba5c-6341-4e7e-8efe-8aecf4915fc6', +'54c4a078-b16b-4d89-a00c-0350e2d9de2a', +'57aa58a5-92e7-4d61-a762-10e29d8ecd24', +'57d7d730-1248-4666-9a3a-2e3815fd25e3', +'5aa4c3ce-bb4d-489f-b1bc-20eb27ce991c', +'5b198c4d-c57f-4d33-aab1-50e31934f3ea', +'5b290f79-f35e-42ed-9f1b-e46232c695e0', +'5bbb6615-52a4-4aa8-a26c-1617433fb27e', +'5ca6f803-7bbb-4ccd-86dc-d8462d73f445', +'5cd153fd-76b7-4b2a-8fd5-3615a3746f38', +'5cdc8526-6109-4307-93f4-16f182419291', +'5d5d3cd3-f1c3-461c-abf0-0570a2e4a912', +'5e269b16-ecd3-4a05-bba8-4a9d3ef53cb2', +'5e4ce256-14e4-45af-acb4-d20176ca5e6f', +'5e814d05-90c9-4f8c-ae23-c3d61026a819', +'60d25f86-171e-4c52-9c5a-6091df9804ea', +'624cdde9-81f9-486f-bc13-da1b876c23d8', +'630e93eb-628f-4f4a-98c8-2bfdb11cb97f', +'63237442-ebcd-4e2d-8ba0-f9cdf1d489bb', +'64b93c80-72e9-4ad3-b88b-94791c252872', +'654f3db4-7241-4a70-bc99-e709a799b4a0', +'6af7bd3a-84af-4345-8873-3a4424d9c775', +'6b7408d9-0360-497f-8fa0-d5da17f4f50a', +'6dba1d23-a19f-4d80-a29a-2c9e29d5c18b', +'6ec11d5c-6306-4eca-b1c6-32820b83f2ea', +'6ee732b7-1f06-45f3-add7-e4c1162a2f34', +'6f572a47-b04b-4b10-84b5-1ba85f33dcf2', +'703c7530-2f98-459f-b859-d9ad30cfd2b9', +'73b36af2-d37b-4daa-94d3-20709599751d', +'73b9cc98-8eb1-4e39-919e-db5f4a254c1e', +'742bfd46-57cb-4c60-8be0-ccc4bd64c1fc', +'7526e863-7f4e-45fc-a800-31c47e362d26', +'76f3728d-58e3-4c8d-8a52-580e87bcfa59', +'77af2970-5733-482a-88af-4f11ab1b8f53', +'78ca0aa5-a564-42f2-8088-9490c245014d', +'79ad76eb-6539-42ab-8ae4-2b78affb54cd', +'79bc8921-9ee9-4571-a794-9da332f9688f', +'7ec74d44-a014-45b8-bc8d-1a22623a8e86', +'7f6a5362-9d95-4b64-9516-f5e35d4dcdbf', +'82008115-575e-4542-8b76-518e611d8fe1', +'835a32c4-4dfa-422d-b3a3-98c9f9c53853', +'841c0c33-84bd-4c50-8eb3-59196ae098d8', +'85904f78-47c0-42f7-a8c5-d09c6be11ae0', +'864ec78d-b4b1-404c-8c44-3dc86d66d60b', +'8892c13e-e87f-446d-abf9-281729e593ab', +'88ae8613-d530-4e0a-8fef-bd36d741a93d', +'88d61166-ac6d-4b05-a793-698553288bbc', +'8bd7d616-2666-46d4-ad6e-b99d6b440ef3', +'8cb2d285-449d-41a0-9dde-d4fe91af0442', +'8cc86847-83f0-4a61-a5f0-e51e42e06410', +'8e439f0f-1d59-48f3-aed9-05d302bbf923', +'8e84eed1-3b85-4fb1-a9eb-7cabf4e09422', +'8ef189ad-020f-4ef4-88b2-932ec21e8f5e', +'8f444024-f2a3-4ec9-a780-60bde88b31c4', +'8f78ebad-4a65-4f68-a5b7-72798ef11026', +'90aa4bc3-d23a-4648-bbd0-ef77129487ca', +'9100e3ba-7998-4f90-91f6-8acd17e41418', +'9124e3ba-d59e-4bed-9902-2a95b3d0e4ff', +'917ff3a7-43be-43ef-949e-47d7c58d43c0', +'9292539b-6dad-4584-806d-49bd2c1c8ab1', +'934945c7-0c0e-4b52-9ad9-7043ee8de922', +'934e2108-4db2-441b-a7bf-6d41a818870c', +'940b679e-73f7-4191-b7d6-043f862b3e53', +'94750c88-149f-4b3c-b4cb-67632e1a23a0', +'9579d53c-969d-468c-9f4d-290f13c5a79c', +'968e7a2c-b9ea-4c7e-8fb5-37bf0f1f61b6', +'976dfaae-7238-4cdd-aeca-304a759d1cde', +'979666fc-dd27-4d58-a058-d403853e1982', +'97c7e2f3-57c5-430d-93ab-1f59f56a736d', +'98d6ea89-5002-441f-841e-618f26d7fdb7', +'99a37e69-b124-48e8-9b5b-4296631260c3', +'9a41e94b-5f49-4b47-a703-bd20e7469029', +'9c2b1afd-4b6f-494f-8e5a-08bc2bddff74', +'9d1af260-4a4e-4224-a8b5-81c4c74fb4f1', +'9df02068-efc2-447e-88e8-db6b846f4088', +'9e697651-e430-41e9-b4e0-856b372cf1e9', +'9ef2a43b-7426-45af-aeec-5bc48094e693', +'a1790e39-9370-43de-9f18-d4a07162cc3e', +'a30e8ac9-5591-4ec3-93b0-17a3dc086699', +'a3bbadf2-3acc-4ee3-8fbd-4d9d6573c0a1', +'a4123a0f-b6e5-467a-8707-8a0c5d5264a2', +'a4485a66-7003-4a47-a4c2-5a0a42d83cf5', +'a48c897f-b980-49dc-92f8-a2907b1950b6', +'a4d55625-8248-42b7-b042-4f959a16d3df', +'a66bc6b9-e984-42a7-a83e-87ff349cf0f4', +'a68a3a14-0b33-47cc-aa3a-cd9e30392e96', +'a6d06716-0bae-4db3-b12f-f01ab1a13a31', +'a6d91e12-4fb2-49e5-ae09-7055dacc661b', +'ab2d9f79-5e05-47e7-a0bc-b261535f024b', +'ac7fd783-cf54-4845-a33f-5dcbdafdb526', +'ac96f252-d9aa-4fa6-a3d9-9cfa45a7f621', +'aefbd7d4-7e55-4950-a613-cfd8c00628eb', +'afedf5fb-6e78-4424-ba20-24bac3205886', +'affbc6bf-9485-4ff8-9cef-b956bf7cde82', +'b0f1f9b0-d418-48be-893e-a2f385d5ca54', +'b2a0e0f1-2683-4212-a0e7-6fbe571001ad', +'b2bad957-0faa-4db0-b0e1-f9428081a639', +'b31f19cc-91d3-4807-82c7-6c5141e2db0f', +'b45cdb23-9651-460d-a407-07098d579b03', +'b68b5d61-e18e-4bcf-a7df-66e24c361d1c', +'b72d97c2-a277-4a26-afcc-9ecd7cd7d0ce', +'b78be47f-f9b1-4574-81cd-033c2b007b46', +'b97f3248-dc14-4b09-b7e9-965fdfef5db7', +'b9a2ad22-e06b-4d20-b1f0-87ea8e0bf909', +'c12f0b4b-dbb3-45ef-bc99-61d82654e278', +'c25bf7d6-599c-45a7-839d-010864a62385', +'c2789f26-3e4f-4322-8f06-dc6c1ee7ecb5', +'c34c1be5-a825-4d75-b076-173d59fe375e', +'c393f2f1-ace1-455b-99e7-22ced828f070', +'c4904c84-461c-4769-8648-45b0c6be0e4f', +'c5811970-7ba0-4e11-b30a-037bcd6a73a1', +'c6d829ee-5fb6-41cd-938b-cdf5c0cb70f5', +'c7c931df-d0f2-491c-b003-5805a1538aef', +'c978c4df-300d-4f0d-94fd-fb8db183234d', +'c9dcbf8b-b4f1-4725-983f-df01c3da3bc8', +'ca569dcd-ad16-482e-b772-666d650d991f', +'cbbd776f-f48a-4f1a-8bab-566da000021e', +'cfeec40c-af14-4df9-9e15-1bc27297745d', +'d0349351-f28f-4648-aa15-c12c86c3e114', +'d184bfd5-69d0-4086-ac29-5cb5e9399d1f', +'d1c61a2d-4b8f-4850-8501-32a8ee0cde62', +'d3333dc4-f691-4698-a906-6f5cf08694a8', +'d39e4cb9-e86f-4a5c-a393-ba2694580a6d', +'d49534e7-7770-4f6d-aaf9-ce9276424d8d', +'d4f75c14-40aa-474f-8f8d-d2253fa436d0', +'d50ee31a-7beb-4b62-ad49-dc09a32a3c2d', +'d5d39fd6-1918-4b0d-8e35-b5a883f0572b', +'d6ca02fb-1402-4948-a3d5-72db83188266', +'d6f449dc-c443-4636-ae89-ac0762446818', +'d718637b-7ecf-41c7-9eed-dd1422f9c75b', +'d97e4791-9123-43e5-9c3f-a16cbc873662', +'d998b483-059f-4fe5-8dac-aa92ab339b35', +'d99ed4eb-f08f-4472-8db8-f638644dd77e', +'da145bec-7d2f-40e3-9038-5392f31b9f34', +'db268849-bc1f-4993-8934-41a1d4b56102', +'dbab866d-a4a0-48b9-bd4b-d742602d2a49', +'dca70bd5-e9a2-4f9d-ad97-94f3a0b03923', +'de382a95-ede3-4c14-8995-f87ad5ea29f5', +'df00db77-b8fe-4e6c-924d-091c1e964f26', +'e196b6df-e145-4562-9d43-eb29bd914341', +'e22b567a-11b0-4c6c-bd7f-7245f637337e', +'e2e45fc5-dde5-4a96-90e8-b5284df37fe2', +'e356dc73-543d-4bc6-ab28-9607f6506721', +'e80d23cf-1a98-482e-8639-db3ffbd714cf', +'e910b71d-c830-4d01-92cf-9dea870fb114', +'e9ecab4a-c4cc-444a-9ca6-33377c71b564', +'ea663435-a2ed-4d69-85ac-f15af6702b83', +'ea7826c5-a5c7-4657-a904-7d920a61d733', +'eca3a237-c046-49b7-b532-f1b868466b40', +'ed4cfcae-6cf5-4481-a845-2fec82a9c4a5', +'ed8f5b20-da0e-4121-9e69-d23aeaac790d', +'ef3e6067-c852-41ba-abe6-9b40f9175fa5', +'eff892af-73ae-49f7-bc52-c07313a57d54', +'f26ee2d6-c769-4f79-999b-b43d17366c0c', +'f28798fd-6b41-460a-8ade-8067a1405008', +'f497b73e-65c3-4344-9199-0c415f39bbd8', +'f591e4d4-3aa9-4590-b8f0-e5104c19ba54', +'f8aa25c2-f504-40a9-a148-1d2caf8458c0', +'fb523988-b75e-40fb-913c-451f39ca17c8', +'fbbae02d-3f67-474a-9e08-aeb89330809f', +'fd2a87de-ba00-4675-a782-496e5d947cc4', +'fdcc9f15-e984-46c8-b35a-4adbdba86026', +'febf2f13-991c-4c8e-924f-d0431919d9f8', +'fedd279e-617d-4724-98af-4aafed359e1c', +'feef4c70-0a72-4c87-bba7-7207d832615f', +'ff8a020c-c852-443e-ba07-b62277561544', +] + +import time +import pandas +import duckdb +import pytz +start_init = time.time() +con = duckdb.connect(":memory:") +con.sql(""" +CREATE SECRET secret1 ( + TYPE S3, + PROVIDER CREDENTIAL_CHAIN, + CHAIN 'env;config', + REGION 'XX-XXXX-X ', + ENDPOINT 'localhost:9000', + URL_STYLE 'path', + USE_SSL false, + KEY_ID 'test', + SECRET '12345678' +); +""") + +start = time.time() +#for asset_id in asset_ids: +print(con.sql(f'SELECT HeatIn_Q1 FROM read_parquet([\'s3://test-parquet/8357896d-dd72-43bb-814c-ed7728ea122b_*.parquet\']);').df()) +end = time.time() + +diff = end - start +per_q = diff / len(asset_ids) +print(f'DuckDB Took {diff} seconds (with init {end - start_init} which means {per_q} second per q') + +# +# from pyarrow import fs +# import pyarrow.parquet as pq +# +# s3 = fs.S3FileSystem(access_key='test', +# secret_key='12345678', +# scheme='http', +# endpoint_override='localhost:9000') +# +# start_arrow = time.time() +# for asset_id in asset_ids: +# len(pq.read_table(f"test-parquet/8357896d-dd72-43bb-814c-ed7728ea122b_{asset_id}.parquet", filesystem=s3, columns=['HeatIn_Q1']).to_pandas()) +# end_arrow = time.time() +# +# diff = end_arrow - start_arrow +# per_q = diff / len(asset_ids) +# print(f'Pyarrow took {end_arrow - start_arrow} seconds which means {per_q} second per q') +# + diff --git a/query_single_file_per_asset_part.py b/query_single_file_per_asset_part.py new file mode 100644 index 0000000..9593c2c --- /dev/null +++ b/query_single_file_per_asset_part.py @@ -0,0 +1,311 @@ +asset_ids = [ +'0072f6a9-f1ee-4df4-8784-99502509fb44', +'01fe3dee-379a-4406-8519-e0c2352067f4', +'03095e79-dba3-4309-a580-e9a3ac10b12f', +'034f3ca1-ed79-46b8-a5d2-e8cebe5358ca', +'03580145-5d8a-4616-a325-02aa028b17f7', +'037a6610-8eaf-4104-9b51-2fc57b690baf', +'04726b2a-985f-4ae7-8645-0776168dc148', +'04802f7f-398f-491e-b467-497a8b585c23', +'05e00022-02b0-4407-b2d8-cf9026aec440', +'05f1b916-41bf-451b-b84f-c0eb30da84cd', +'096162a0-7826-483e-b166-1a2b8b269098', +'0a9842fc-ab7f-4daf-960a-30bbb13bb06d', +'0c83dabf-5def-4b74-a6e1-f3d1dfd89fee', +'0ebe7571-9956-4ba9-9756-78047e37278b', +'0f3349e2-2e62-4c6a-a4fa-48e27823f6dc', +'0f8d1ac6-f529-4845-ab1d-e2587456db3d', +'0ff7309d-2539-4f1b-a9eb-7ac50566630f', +'0ffc209e-e41d-4429-abea-fcb6b068dff6', +'10f49bce-f7f4-47cb-9e95-68dc95bd6ee8', +'112f342d-1959-45f0-a70d-3210f8bb9fba', +'1301d46d-f816-4bef-b35d-fe4b1e163a4d', +'141f45c9-c040-41af-872b-602a085f1b76', +'14fc14b2-68ef-4a50-a232-2adc295de76b', +'15132dea-555d-4c4d-a436-da658e3a05e8', +'15f9ad4b-c225-43dc-8ed1-5891d438cbea', +'161c5d2c-c0cf-4d23-bbca-34c33382f0e0', +'16797f47-da39-425c-9516-b5700e03172e', +'173cf6e6-4544-49cf-afa5-c3d70c7ca2fe', +'1796aac3-7c55-4758-938e-34411789bc90', +'183f3031-edec-4213-99e6-485d42f8d803', +'1b632ed0-eb12-42cc-931d-1037d02bee60', +'1c5d5add-c090-4169-b645-0f8355d63e5b', +'1c9bebc5-8944-4693-81df-701b6a08bc56', +'1d6e9814-f3c4-484d-a74c-166271e03978', +'1eb5555b-2232-4290-9b75-7db481334d7a', +'1faa070f-c120-4b3b-a9c5-cd824aefa272', +'20fb616e-814f-4543-be49-c0465eab4502', +'2115e8ba-2c87-4682-8998-e1fb77ee23a1', +'21860edc-fca3-4611-9dd8-30847a0d9303', +'223827f1-24df-4f4c-bcae-bae47dae689a', +'229638a5-ae38-4163-881e-436148ccce66', +'22b3b59e-83a2-4e33-a936-dfa941e47b1a', +'22e953cb-265d-4780-b8dc-90850b6c9371', +'22f18fe4-8fcf-4973-8f0b-faacbda8eb3c', +'281cc7d8-e7e7-4d18-9e23-0dcda9117cd0', +'294949c1-0c80-408e-bbe9-3dc576c67dc1', +'2a14cac1-5423-46cd-b9d5-876864b7f2c9', +'2a95b978-47e7-4d6e-838f-0de7da569fd5', +'2b424283-ecb1-4c0d-8a77-f6e5c899104f', +'2c064ed7-ee3b-4506-9e36-6377853a0f22', +'2f06723d-9ac4-44f6-a84c-8f3616305158', +'2f07646d-c482-4101-a701-00c505c095bd', +'300d7a63-aaaa-4240-825f-16154f4c3970', +'3088aba4-ddff-4ff1-9cdc-574acacb7c66', +'30e107f7-4f8b-4b85-b295-b89baa316739', +'319f4d19-26d5-4312-953b-2924a8fb8ca9', +'34a932a6-549b-4df1-8808-067ee1c2e119', +'34fbd087-f869-48e6-bca0-c945ecd06b07', +'35436b05-450a-4e8b-8b0c-ef6ae8793d72', +'3637b12b-07ce-4c20-a746-18690b22377d', +'36be8c1b-8334-4e89-a690-8da7b3f2f58c', +'37efc8e0-370f-4c79-96b2-87debe9a88d9', +'382128ae-cdb8-4f5c-88cc-f0eb4f506bdf', +'38b7d381-9375-4820-99d8-7deffbd29053', +'38fdb513-19a3-4c95-a720-479f7a41ae70', +'3ab93039-1f8c-4788-a25b-ee15ad2085ca', +'3af17951-6d9b-4c68-9260-f89e006c01d9', +'3ba41ac3-abd0-4142-bee1-f999fb5d52a9', +'3c6aa4a9-3281-4521-b102-2f5992759802', +'3c84c106-f456-4fbe-be8d-75c8bb780680', +'3cebf3ef-fa29-4f91-8aa8-fb01daa38227', +'3de6f96a-cdb7-474d-8f76-f6f13e1b53e2', +'3ff472c5-47b5-4243-950c-9f6d3c4c372c', +'40edac8b-632d-4c14-ab40-58763071914a', +'42dbc6fb-39be-423c-856d-ffbdb6fd85b6', +'4316132c-b6bb-40ed-9b36-7fcc26415f98', +'45b757b4-85fc-4608-87ff-9350464f3f78', +'45e5d83a-b198-46ae-af81-257cdfefd7f5', +'46743f0e-baf3-4a31-b9ee-2ab316717794', +'4875ac42-e787-4069-ab71-9dca82adacf2', +'49abe6c2-04b3-43de-970e-d0fd01089d7a', +'4ad3c6d5-4dfe-4e9c-8659-7188adaf7002', +'50d70445-1545-490f-9911-8251c045c579', +'5164685f-5c07-4406-9741-e51b193ea68b', +'520ef374-b7cf-4a70-ab40-c9c2b645ec47', +'52cc454a-390b-43ae-a67f-1bd5ec280c0d', +'5483e35c-9445-4df9-af9e-1030a863d621', +'54afb068-3056-446c-965a-8d0df9f5627f', +'55ab067f-679f-432c-a5f9-dcc601197d39', +'55e77ccd-c14c-4e1a-8ccf-182986298c20', +'56a0553f-69df-450e-ae5f-b6fb5142e752', +'5869d251-d4e0-4dfe-8489-d5a9283337b5', +'5873ca8d-c6a1-4346-a3aa-520528ca5bc9', +'599bbe18-f746-49cd-ade6-7c49b58e0b21', +'5a9b6cc4-aaae-4977-a220-d0824cc8a46d', +'5b1b1d56-db4d-40d1-8c6f-7614fe3724b3', +'5d2e83cb-0869-46a1-9420-96fec052b5a1', +'5e928198-5fe9-4017-8773-977c9493b5b6', +'5fc24a75-2bc6-4d14-b9a6-3acc34980a13', +'6165801d-6877-46af-af44-9f50ed06b10b', +'619835d4-307f-40c4-8319-950c1537daba', +'61b7faf3-e2ef-4562-ab32-78edd6b461ff', +'633b55d7-e00b-4f30-8f3f-028c95429618', +'639f662f-61c5-4f14-b69d-832ee38159f5', +'664ea8bb-0ca7-412f-858a-f5ed78ae120e', +'670017a8-6cf8-4a34-84e5-dfb5065cd586', +'67081912-c2ae-4782-be6a-c53c7db63544', +'67614d54-682b-45bc-bab7-486eb07c3bd7', +'680cb99b-4d0c-4482-a165-0dac0eb70d2b', +'695f2050-cdb6-4582-bebf-cbe63912f2dc', +'6b5b511a-8137-4127-9eb4-6b370c980aec', +'6c81af18-6643-483a-872e-52543e4235c9', +'6d1ee846-03d7-404f-a495-b5097934eedd', +'6d50a945-7bb5-4211-983c-cea0c1350f8a', +'6d645457-5afe-43d2-abe9-32f83c1a9508', +'6e3fc504-5a58-4933-896b-4429c9f0a7ba', +'6f9fb918-56ec-4568-913b-94e8e79e7234', +'71aee447-dc9b-4e77-a3f8-62d51a7c0003', +'72f4ca2c-981a-45a0-85f9-00ecaef9a04d', +'7359af99-d27d-4a97-b9fd-996a19a96902', +'738bf4c8-25df-407c-ae9a-e61ae75e43c7', +'739aea80-653f-473d-a2cc-e1d06af8750f', +'74a3532d-91d1-449c-aac1-3a0e90cff7f6', +'74b8dbf4-c1c6-4c92-abb4-9209ea20a461', +'757988fe-e9c6-463a-8e25-cc08e28cd510', +'7706e14c-f530-4f4d-a783-d8f3be3b653c', +'774ae793-ed51-42e5-91ac-8aa1c4ae398f', +'7821b2e1-9bd9-4061-a98e-cff6fad3adcd', +'7c11dc5a-6507-41d4-98b8-7574b9a67157', +'7c99e14d-be2f-4b82-8de4-4969b350db81', +'7d0b6fd9-0491-426b-b7d9-d938e42debba', +'7d5d100b-ae6e-4856-937a-a8edba6a4df7', +'7ebe0f0f-793d-4d3d-a025-86874e38b683', +'7edc5a5f-5894-4c84-afb0-dc50c461ca90', +'7f5f15dc-7cb9-48b1-a259-47cefe321a6f', +'7f8cbb47-0153-4a4c-8309-772264d9a6b2', +'7f914d1d-8c85-4fd8-83cb-42862b460526', +'804bbb55-a20c-4b06-8d59-24f42ee2a844', +'82849578-4aea-4f96-85a5-4e751b8608f3', +'82c1a105-3dde-4d54-8205-d9462393fd74', +'82e9d9f4-d3e1-42bd-92bc-a349da269ac5', +'846f154c-d546-4a9a-bbc6-b073bf259909', +'8590fbb1-f722-40ea-abdf-a136b3f18423', +'86d6859f-8115-4620-aac1-6ab90c335245', +'890d3fe2-8c0b-40c8-9472-46cbd7354caa', +'8aea3baa-454c-4879-90ed-e4a69ad7e47e', +'8c366bd0-d4eb-40d6-b047-9944324d2cf2', +'8c9f914a-d765-4904-ac60-f93763702b05', +'8e70ceac-cd8a-4ad6-802c-0a210461b69d', +'8f239165-a14c-42c9-b09d-3cfde306ce0b', +'8f4a58ed-226d-45e6-aa5f-be6fd7353b5b', +'8fd73917-a270-4714-bd7a-6aa405c44880', +'9109cdc1-7b30-4992-bc9c-953c85e60442', +'912c7ee0-7687-4a22-a329-7a8bf73b3181', +'927213ea-bdf1-48f1-8098-548e599f555b', +'928ca3c0-e76d-4b4a-af9b-396fc2dfa7a7', +'946a3fb6-b716-4159-912d-c3bf478776fc', +'954d7320-84af-4c92-b861-ab73e70fd9a3', +'97d970c7-5362-412e-b525-8a7d36ee3eb0', +'98d2e116-98e2-4d53-9245-791e8da15704', +'9a4bcec4-1795-40aa-856d-b2a695ece957', +'9b7b4a79-63ad-4e67-93f2-d5a982832031', +'9c770ecb-3f17-4d1e-8665-902e348f30cc', +'9cae434c-6580-49ba-935a-6ac8b665a825', +'9d105b7d-caea-40eb-b9f7-f945a3ff339d', +'9d5355d7-bf0a-4ba7-9e4b-1036e35c3d44', +'9e97c9fe-2af5-479a-bb2f-bd8b86bcaa11', +'9eb788cd-4e8b-445c-8ecc-4756014d90b3', +'9f05aa4f-d161-4d79-8010-7b7bccf7caaf', +'a0c8fd3d-67ca-41a0-baab-628d7fd88c01', +'a298c7ba-6355-4c52-86b6-7672df411c14', +'a4608e33-e8f2-448e-be32-2bc9d56ebeed', +'a6c14b1c-6129-4067-a1a5-e6721d37a368', +'a7113049-f80b-4aeb-8919-573dbcf230a8', +'a8084ffc-7e0d-4811-95b1-d46877e7497a', +'a8eb6ea0-5e30-41ac-872c-a7089455ef51', +'a9ab102f-8f0a-4b6e-8d23-5771859be120', +'aada0654-d337-4214-b362-14234d03f05e', +'ac8cd7c7-8186-46b3-9852-65e5d89409d4', +'acaeb758-5de6-48a1-9c84-adbb69f9fc01', +'aed66a91-2f04-4811-89db-b927ace5649b', +'b1770976-f702-4593-a2cd-beb2e75f9202', +'b2f40060-eca2-4f17-9ea5-68e20abf8d07', +'b4be6a13-b22a-41f4-94b5-8330ae013a75', +'b5c2ac31-f903-4e8a-99b5-b8ceefbd98f4', +'b6639d1e-e19a-4063-8a07-80c395acedcc', +'b67fb1dc-526f-432d-bea8-ab79c5b30210', +'b6bfa952-6f68-45d7-9f84-51238b558a3a', +'b9988a49-3303-4d41-8a3e-1e1c6333b234', +'b9b3c29d-4931-4b23-af19-8ef614e7d576', +'b9ecedd5-321c-44a5-b9eb-4756c1a94cba', +'bacc229f-e4e1-4229-b635-7ae2647f8f8a', +'bbf0c360-a55e-4594-9673-9ad76a187170', +'bc186969-ee31-44f4-a3fe-e6112905df09', +'bc4b711c-15f7-4787-ad05-3dcb80b8031e', +'bd7d4c9f-65c5-4237-a91a-6b9149988d11', +'c077a86c-4b3b-4074-a0b3-7aacc369a6f7', +'c1b1825b-b436-4dbe-8fa2-49a38fa8c1a9', +'c2327af7-131d-4f5c-84a2-0365eecde28a', +'c3cb1850-5e08-48c4-bd39-e94c4f0fbfa2', +'c4347dfb-17ae-4835-8628-9a261815129b', +'c4a465ab-b0c1-49ee-b8cf-c595cc1f6c4a', +'c4bd6138-8d67-4d46-b3f9-de6f0f7db94c', +'c4dcbee5-1adc-48b6-b37f-03f2eef2f159', +'c6ec615c-3eb5-4cef-9ac4-33026861e18a', +'c79cd108-ba69-4c21-9831-826b934432b2', +'ca6ec436-98ca-4c82-96ca-d22b46d775d1', +'cae97d97-970e-4063-a42c-824bcd60f9f5', +'caf8afb2-0962-48ba-9bc1-c16285dcf42c', +'ccc32fd3-3b92-49fc-b163-3bbe41c341df', +'ccc795b1-d899-403a-8016-c6e06ef7cdb3', +'cdb0ea73-f561-4cda-ba1d-d52fcc093159', +'cdfb5e71-755d-42b9-80a9-2fe71fa2132c', +'d164f428-43b4-4d01-ad7b-33c602a89d64', +'d2ded9af-30be-452f-9fbe-025c01d444fa', +'d3ccf0af-edfa-4837-a9fe-18815e2f6227', +'d5735b6f-6df7-4f3c-baf6-272efb8434a4', +'d5fa38ad-1a75-4e53-b88f-bf53c9a46f8e', +'d68f59e3-7f23-42bf-81e4-ff79becee609', +'d726c481-4aea-44c2-94fc-3ec72304d851', +'d7a355c9-b482-4a7b-887a-3294eddd4d8e', +'da678c46-f054-4ee4-a4de-556ee5bcd8c4', +'dca683b8-72e0-42d0-b73f-349e6a1c363c', +'dce36f79-8c2f-40d7-a057-94bcf6473020', +'df791f92-6de5-48b9-aa7b-f6ef54c267b3', +'e20d968f-df7e-4a13-816b-404f88c74de7', +'e3062068-8594-46bf-b6e3-4810ce98368f', +'e561cbf9-9833-479c-aecc-4b4e6c13a438', +'e57a9e22-5d59-4f44-b57d-ad9ea0ae1cb4', +'e5ee8e60-de1c-4bfd-a02a-9e14b76e5277', +'e6f6ea2b-3863-4c92-8874-617a64c62c85', +'e793e723-9fed-49fb-acf0-6002a2fbeee6', +'e805235f-09a0-4949-9a54-ce8c18061440', +'ea5c7149-6108-4846-9705-dcc006ee550f', +'eb28ae31-a89d-42ea-9c26-1cf03fefe8bb', +'eb44956a-b566-42eb-b3d2-7b25c68f3213', +'ee610391-80fe-4b30-b985-06037b5bf56b', +'f252e6c4-4bc1-457c-91bf-fa0b8d5b20bd', +'f674e6bb-4c62-4d3c-9163-0a3bb6900485', +'f76bc350-35d0-4cf4-a006-3d0092bde6f3', +'f7d47d29-d6f8-498d-b45f-85af220dea27', +'f8b7d520-e7fb-4908-9866-5ed5f6a2f1fb', +'f8f8abe3-36c3-42db-b60a-96b66be5c86c', +'f9b95550-1e82-4315-9b05-167301c7eb66', +'facb6aa3-d210-447b-b9dd-0e97b93a6087', +'fbacc6e4-25bf-4cbc-8a65-c5b2e758661f', +'fc6b1d9e-37e2-452e-9103-345ae2397970', +'fdbaa518-d1bd-40c0-be6b-e6492a9276b0', +'fe25f3ce-ec4a-40dd-b4a4-1d6e6c48aa8a', +'fe5cd093-ba48-490d-910f-fa8fc6ea1bc6', +] + +import time +import pandas +import duckdb +import pytz +start_init = time.time() +con = duckdb.connect(":memory:") +con.sql(""" +CREATE SECRET secret1 ( + TYPE S3, + PROVIDER CREDENTIAL_CHAIN, + CHAIN 'env;config', + REGION 'XX-XXXX-X ', + ENDPOINT 'localhost:9000', + URL_STYLE 'path', + USE_SSL false, + KEY_ID 'test', + SECRET '12345678' +); +""") + +start = time.time() +for asset_id in asset_ids: + con.sql(f'SELECT HeatIn_Q1 FROM read_parquet([\'s3://test-parquet/62a079a5-5aff-45cd-930c-eab253700375/*/*.parquet\']) WHERE asset_id=\'{asset_id}\';').df() +end = time.time() + +diff = end - start +per_q = diff / len(asset_ids) +print(f'DuckDB took {diff} seconds which means {per_q} second per q when accessing each profile individually') + + + +start = time.time() +con.sql(f'SELECT time, asset_id, HeatIn_Q1 FROM read_parquet([\'s3://test-parquet/62a079a5-5aff-45cd-930c-eab253700375/*/*.parquet\']) WHERE asset_id IN ({','.join(f"'{asset_id}'" for asset_id in asset_ids)});').df() +end = time.time() + +diff = end - start +per_q = diff / len(asset_ids) +print(f'DuckDB Took {diff} seconds when accessing the profile for all assets at once') + + +from pyarrow import fs +import pyarrow.parquet as pq + +s3 = fs.S3FileSystem(access_key='test', + secret_key='12345678', + scheme='http', + endpoint_override='localhost:9000') + +start_arrow = time.time() +print(pq.read_table(f"test-parquet/62a079a5-5aff-45cd-930c-eab253700375/", filesystem=s3, columns=['HeatIn_Q1', 'time', 'asset_id']).to_pandas()) +end_arrow = time.time() + +diff = end_arrow - start_arrow +per_q = diff / len(asset_ids) +print(f'Pyarrow took {end_arrow - start_arrow} seconds which means {per_q} second per q') + + + diff --git a/query_single_file_per_asset_part_with_carrier.py b/query_single_file_per_asset_part_with_carrier.py new file mode 100644 index 0000000..40ab51b --- /dev/null +++ b/query_single_file_per_asset_part_with_carrier.py @@ -0,0 +1,392 @@ +asset_ids = [ +'02f75d16-d840-4f8e-8f47-09d298711e1e', +'0485e823-b4e5-4a7f-88fb-e42f36afca6f', +'05a7dfaf-e429-4fd8-93fd-ebc031e4f84f', +'063cc768-7e55-49ca-814a-ecb2e4862bd6', +'07729321-54d0-4099-a9eb-93e2d977ddc6', +'08365cf3-1ac8-4d63-b73c-2c39e20eb0b7', +'08ebbd35-bfee-47cc-adbf-c17a8f007113', +'0a098118-ca01-414d-b964-c6fa6c2e7cd1', +'0a4b054e-e6f4-43ee-85dd-134c6b25ef8c', +'0b0da402-3d90-4090-95b9-535f32d4d001', +'0ce74cde-b60f-4e0c-8764-4766fb9825f7', +'10e14b61-96f0-4bf3-80a0-7899df16bed5', +'10ebc951-bf42-44e3-a394-9e7cc0da9c47', +'1236b14c-8cbc-4767-9849-befa98e2718c', +'13060e78-8e5d-43dd-9c78-11122ebb6242', +'1433a976-4c18-4ce2-9820-375b58981b1e', +'153b589f-5db7-48b1-a590-b69322541a47', +'174cc624-8990-4320-a15e-1e27890b241d', +'18224367-44ad-4251-83d0-da03954f14c7', +'18a01830-d2aa-4d6e-b5d3-3f3e03bb8c23', +'18cf7674-f6fe-4982-be93-5a5e0ec2ad8f', +'18eb50c9-a184-4f36-b3e3-4230f0549eeb', +'1b7acb2f-2877-4a66-895d-70c3a7c8fccf', +'1c2ae0b8-8216-4f77-9579-c35104a43ac2', +'1c989eae-1bb8-4fb2-a1d2-07a3bcdc74e2', +'1e7fb4c8-9abe-4b00-98f5-d0e46d43c0ef', +'1ed54f66-9560-444c-aac1-a153691334f0', +'1fb8765e-164c-46b1-b7ad-00b4066fbebb', +'2133743e-00e0-4bce-a932-41cf74774e69', +'21e2b6fb-fcf5-4df4-9a15-ac7b8fdd6b14', +'2317d337-e457-44e4-8cf3-b5c48f032db0', +'23b804fe-5f50-4cf8-ae69-69b12eabd4c8', +'25fce21c-1d5b-45d9-af78-ea1ba151f679', +'273fffc5-f0b9-4cbf-aac4-8bf172e75e63', +'277bb433-6182-41ff-8362-68bd1b6a3613', +'280cfd67-1492-4ca5-af27-0969720e2f65', +'297d0e93-d719-42a5-8f8c-d8d909d05d0c', +'2b19eb42-fedd-4751-bba0-462f82c5d8dc', +'2c0937f1-d56f-4da7-ab73-e865c5976ffa', +'2c0f831e-8612-471e-8bcc-aa2b53a3f25c', +'2c2017c9-d1ad-43dc-8606-85e190279d82', +'2c78fa49-5f6e-452f-91f8-9028c21daf01', +'2e60831a-b71e-4158-adf6-ced5fcc537e2', +'2f7d326d-5d70-4d65-83c3-9acc19ed9c04', +'31384981-6d06-45d6-b70a-3ade5b1d8af8', +'313f71d3-23e7-4469-bdda-db037544784d', +'31e9b082-ff9f-4302-9c0c-6e1bea1d6b55', +'31ea683e-f68c-4f2a-9c66-f484ce0c19ca', +'32f6c040-c7c1-4760-8f75-edbe8dc71db0', +'3390dbec-3fe5-4d9e-86bd-5713bb9711bd', +'34171fbf-66a3-4b87-a6fd-8fdb9cb408c7', +'3494feae-b7c8-4040-b717-2fd8e6eaec22', +'34cc583b-4f09-45dc-b28a-3e734f9030cd', +'35755199-d5e8-4c7e-aada-a798eb335cb7', +'35b72ed7-283d-4e3e-a6aa-5fff7382e23c', +'36af591e-da75-4e49-8e60-3131632d22d8', +'36ec8d32-2a5b-4ab8-92ed-70cf90f2d6af', +'3797ff91-0a7f-4d89-a56d-1e40d2c12dc3', +'397c6151-f8ae-4b2b-98bd-bbd752befbbb', +'3a46877d-b329-42ed-93be-7a0be1e37319', +'3aaba43b-5350-4f9a-89f9-e332bd214955', +'3aae0335-644e-4937-a912-18857604b631', +'3b367516-3db8-49dc-b0ac-6c69f4764b9e', +'3b97ba7f-1ddb-4750-b914-c4dbec86234e', +'3e23faf7-00c1-4a37-b33f-a3ab51c780a7', +'3f00bf5d-d3e4-49c2-b2db-293b86c35260', +'3f15631a-0a8e-488f-b856-e643378d35a1', +'402ee185-d88e-4def-ada0-e8022d5d22d4', +'4048cf8e-d552-471e-85da-a078239676aa', +'40c3834a-9ba1-4c12-83d6-9d456e31170a', +'40d6f00e-8709-4e24-b355-fb2003798f4e', +'4269fde9-d803-49a4-a7d6-82022b302f1f', +'42f7f9c1-07ac-4860-b8ea-6c6efb13373e', +'43651d57-ebe6-4108-971d-baf07f452ef9', +'448e2522-c74c-4cd0-8f59-ce6e49f91eb3', +'46ba0d8d-d398-4aeb-9698-ece43a54e5b7', +'47bd2557-2900-4ff4-bdce-1407aa1a52d8', +'49f333fa-941a-4afc-bb95-1895a78a6157', +'4a0a0d5a-e51c-4fcd-810c-572e742a9e23', +'4ae343b6-036f-47ee-b8b7-ec1ed744346c', +'4cae40b0-e756-40c1-8d9e-0d7f95bb3d1c', +'4cf8f9df-ab89-44f1-a7df-cf9985b8f4e7', +'4dadc112-d330-4ca5-bc94-126a3c24b56b', +'4e7bf93b-633f-4938-b007-69f8d253291a', +'4f67baeb-d624-4753-b856-ccb76742b9bf', +'52aa069a-c128-4f39-a891-dc6d9fa6a62e', +'52e31617-fba5-42b6-8c4d-551a83fd1c18', +'5756822a-af9e-4d74-83bc-03b6c82a4d6c', +'57f26c2f-af47-4d42-8df7-50622f2a4e56', +'59ec8cbb-b3e2-4e57-b4e1-ad813abd3a52', +'5a7ceafb-cfcd-4109-abcc-a971271ecaf1', +'5d47ac5a-e264-4927-beea-8bf9935422fc', +'610f8a66-d902-4e79-a87b-43ce977ff953', +'629def48-91fa-4b79-9713-fe7ae74bf59d', +'64049c54-7cee-4a92-a195-2e2f22b2c9b6', +'64494fa4-1544-4045-ac1e-d423b0bbeab6', +'64c81ddb-0fd6-47bf-be0a-be380ac8c9f4', +'65b59c4a-0126-43c0-8002-48ad8f2c395b', +'6a693ac8-90c7-40ce-b985-9bd2d620fa92', +'6ae29ec7-23fb-4025-9997-d4d47b640eef', +'6b180805-89d8-492f-b747-60b5980e421c', +'6bd80b62-25cb-4797-940d-7b55959073de', +'6f3b74c6-62ee-49f9-b84c-bd24055d39dd', +'6fa4da3d-52e0-44ec-8d41-be09957fb120', +'7028b09a-b115-4ba7-b247-170d7f1ae56e', +'70e42b4a-3ae8-4583-83e6-aaac13b2a1ea', +'714f07a3-e989-4a12-a7d6-ea8a9b57f28d', +'72704bcd-4fa2-4fda-96b1-c723d5874f7c', +'727b75a4-739c-4d00-98b4-9eba1003e7be', +'72e34c4a-8f71-4ad6-9c4a-05aab0e842a2', +'7321523d-f60b-4a55-ad6d-d23ae0f457ae', +'7383f552-ddb3-4416-a112-7d529889dbb2', +'740e4ae0-31dc-4523-be55-7ff3ca0d23ab', +'743de131-fd8a-4798-b356-e78a1213d29d', +'76e0c92d-6dcc-47d3-bab8-69bdc44dd649', +'77bece92-d531-4a4a-b4ec-d5da2d99cbda', +'77cf3618-07c1-46c2-81e8-032edeb5754e', +'79b10c81-4885-4a2b-8a51-0383e5f940a2', +'79bc5fb7-58a3-45a1-857b-b870359c2c8d', +'7a88f533-4392-4cfd-bbe8-4a2e22db5573', +'7ca9ba42-7367-4c0b-bcd3-19e781a4ec70', +'7da94c32-989b-454d-9134-dcf2d7ef75d2', +'7eba670e-f42e-4090-b3c6-b431c787d701', +'7f4b2003-dc5c-4c2e-a636-61e20ccde8cf', +'80646a1d-4dba-43b1-b527-25d1dbcbe2d7', +'80e0fab8-66f8-4874-9606-623b3eec8afb', +'825f382e-e065-457e-842e-01ff03b4b284', +'8286cc53-3f2a-41df-898e-1eabeed26720', +'845bdadc-4768-4f54-bcad-371b6caa0e82', +'880ba8cd-8875-4418-aa5e-be3051917f2e', +'897cde44-51e7-4e19-8f0d-8f5978b3eef5', +'8aa2887d-1c92-483c-9c74-e99b9b51e7d9', +'8ae719e3-bcbe-4e2c-b9ad-192094b26246', +'8c769c95-c13b-4a52-bbff-4dc64a9fcac0', +'8cf41808-53ea-4d9c-9852-45f0f574b650', +'8cf956fb-8ead-488b-9947-c4dcd25af46c', +'8deeb300-03cd-4f52-924b-4ba4aba8599f', +'8e43ff49-e332-47e4-9a45-c1e96b243fc1', +'919dc8e4-f358-49a3-9671-353e1a9a6b95', +'9380f119-c748-4790-bbf4-18dbbb7c56eb', +'93f319c1-691b-4126-854e-5dd073c83edb', +'9420b740-688e-4a4f-a7b7-540eebb5d23e', +'94557b82-a811-4616-a491-f55b14c35b11', +'95131f4a-7de2-4378-a137-c63874830c66', +'95eab690-15c1-47de-8785-bf4d980a87e6', +'98436c8c-75d9-4c74-8962-c1f587bd5810', +'98694246-478a-4d54-9f1d-2159771f918c', +'98e238eb-9589-40d1-9293-c62d9eba4eb4', +'98e5f154-8824-45a8-9dcc-c6974195bae2', +'98e858dd-58e2-494b-bd15-bcc5813b2133', +'999ce16c-4db5-466b-b4b9-9a2beec9aedc', +'9be9e762-64de-48f1-956a-ee956e77764b', +'9c935095-e3cd-4914-8700-8eac8184dd92', +'9ce76654-6555-479a-98a5-976906285d2f', +'9e9f2983-6610-4249-aeca-6a9d54d38ad3', +'9f4f72d8-fee2-4196-91de-cc670bdd5ebd', +'a0a1634f-32f8-4b97-9128-4be8aecabf35', +'a140e523-7135-41df-9653-66708fe0f3a8', +'a24c8574-7414-449a-be9c-e6f0399266b6', +'a2ff83b7-be63-42f9-90d2-9591ce47cd30', +'a4925c63-16d1-4d51-94ed-9d02a35ae2c2', +'a57197a9-2a05-4b76-8aa8-ec2b2e21c476', +'a7418e2e-3f39-4b84-9afd-6e7f3a9d0842', +'a85ef2e1-375b-421d-b66e-626fb0f8c302', +'a959fe7b-6b98-45a7-af94-a5e57a7434d4', +'aa47178d-9406-416a-b80b-366e1d6f662d', +'acdb043a-75d9-4777-84cf-e573066d2d91', +'af6b1234-d542-4df7-b2f8-d1fcc453bb3d', +'b0eed91f-c510-4463-88b2-2844ad58048a', +'b423a0a2-db70-40e0-bb7b-19e82e375b62', +'b4640691-4ec0-4bbe-9390-f975407669ab', +'b5c0c1a6-ad0f-45fa-a5dc-fd3203ee0ebb', +'b690b5ce-6dfc-4ecd-97af-8838f948117a', +'b74243ac-5ad6-4b1d-869f-482d7728248b', +'b759f9f5-e9b1-424e-93c2-ef1579dcf448', +'b7e7f626-435f-437c-a8ce-86ebf60926c7', +'b87df7e2-5424-4540-95c7-85800794f6f1', +'b9aa76bb-a593-4c38-8fad-d7a4a9bb1ce8', +'b9b4ca10-06c5-45d1-88db-6b7cf3170b23', +'ba1ac14c-77be-4c8f-9947-d25dbd61ba5a', +'bb94427b-9721-4036-91e7-580792bd12a4', +'bbd1db46-3f19-4e23-8fdf-75eee57d2041', +'be88a8d9-c6d0-4960-bc06-1d1f73ad74e9', +'bf5f81c0-e82d-4801-9a5f-0e47d7ca4665', +'c03f288e-b5fb-4ef2-8279-a87a745281a3', +'c055edc6-e2a4-4b1b-8d5d-73d28bbe0f62', +'c0f41412-b0ac-430a-a14c-bdde8ed08910', +'c0f85428-c363-40b4-ad55-9047e7d9dedb', +'c14a7bb0-f4c4-460c-9ae0-687c130b0166', +'c1c08e2b-0a7a-4a5a-93e5-bde386f3ffb3', +'c2f02942-2df2-4f9a-ade4-cf320903370d', +'c3248b1e-2fdf-4ccb-b1ee-27e4331ad89c', +'c75430ac-7a14-445f-801c-6119d4ab5ba8', +'c84335ee-f253-4760-9a04-f7fed029dfcb', +'c8eeff9c-5e88-41d5-9be4-a5f1307ed46b', +'c9b5c0d9-ef11-4d62-944d-cb9aa1df8236', +'c9dc5b77-f836-480d-8f82-e37d0a2fcd95', +'cb67ceaa-6aed-4912-b6db-a473d69340c7', +'cec3497f-f1c1-454b-b772-db978c167436', +'cf0facdb-6a27-44df-abd3-828917577ee9', +'cfa75cad-7ea5-4ebe-b9a5-c415d269b757', +'d1a0d90c-2a2e-436a-91a9-4a0b06858963', +'d3681cc9-1dbb-489a-ae5c-26ea5529bca3', +'d3aa9385-958b-4a63-a6e5-d6f251534222', +'d5b3ce98-b668-47fa-bb36-b8d44e6c14fe', +'d6546ec2-b009-40ec-9a5a-09cbc37ae29d', +'d78bae9e-d3d2-4f46-a067-01a7fdfb971c', +'d7a76a57-9a54-4c85-a516-5af983413dc4', +'d7ca9369-764c-42cd-9e82-5b4c972eff81', +'d856b022-36e5-43bc-a5aa-d7538f1ab348', +'d963be49-9a9d-4c94-afa9-def7dc7f9e80', +'da4b54ab-f24b-4973-ae73-9c8d731f107d', +'da8b1f2a-1290-4c05-a931-d210ac52fdff', +'dac40bcd-2441-454c-8de7-5aeb7be03226', +'db3dfb3c-93aa-47db-ab88-bedad8d6a123', +'db623cc7-86ae-42ba-a257-c4734a8f41cd', +'dbfe6c78-cfe3-45d3-95e8-eebe51ff6f77', +'ddbf4f32-6af9-43d1-855e-82f36c30cfc7', +'ddda8950-de9c-404b-b4f6-7c820b67c1db', +'df25109b-b2a2-46de-a0ba-88739356f91a', +'dff7d942-6335-4cdd-ab2a-d22185b0f3b3', +'e04356a7-e7e5-4ef8-8cbe-c7b699121a45', +'e0899c13-f795-471e-a363-8193d6844c02', +'e0adbfbd-51e3-4997-b4b6-3c44a52e25ee', +'e19b9015-f8cb-4066-a27e-613737fee192', +'e295be8a-2be7-440d-bddd-a61c4214a60b', +'e3a8fa70-34d5-4ea4-965a-a29f04816100', +'e4fd3827-d071-4d7d-b06e-849762700b78', +'e5f06aff-d055-4c6b-8823-b63bf7dd9254', +'e68e6165-f22c-4f28-90e7-de76fa82d0ff', +'e766e0fc-4bfa-4655-99a7-d7c3911f18c7', +'e9b4f24d-f508-4244-a344-9226c6ad5412', +'ea5c6042-e082-4359-b70a-a683abab4489', +'eb01bb4a-b421-4ad6-a902-3a88affbb2b5', +'ec1f833c-d29b-4ba7-a0d4-d9485c366e81', +'ed5e4998-baef-4d59-9dd8-1b1112488ac8', +'eead3ee0-4430-40a4-8e74-9208f06fbcc2', +'f336766f-b1a9-4ec2-813f-369d61760f90', +'f371dc03-b4c3-42d8-b972-e8915445e1ae', +'f660d764-698c-4091-a9d5-12ed518fc413', +'f6b29410-1692-461d-8f03-2207f0086d99', +'f88bd87c-986c-469c-b91f-fef28624ef2f', +'f9a0a3c8-52f4-4c4f-a95e-8fe028a96a20', +'f9aa5fa5-2af3-4e51-8593-7f1722393b24', +'f9c6d748-2c59-45be-9d27-cdf00e09cc99', +'fb58f79c-45de-4f50-9e74-a30190cfbd71', +'fc64db72-b68a-4b50-bf01-09b0fc939ecb', +'fda010cf-b803-4349-8d8b-d0454cc0d176', +'fdd1951a-f014-4f13-bd9a-f5b7ec6fc22f', +'ff63a396-e738-4942-9768-8c85686e981c', +] + +carrier_ids = [ + '48950278-7545-4d20-867e-d05392a46aea', + '61fb537d-6378-4438-9a2b-8e52b0f0f04f', + '79ead452-9994-49a2-b726-cdf29d182a8e', +] + +import time +import pandas +import duckdb +import pytz +start_init = time.time() +con = duckdb.connect(":memory:") +con.sql(""" +CREATE SECRET secret1 ( + TYPE S3, + PROVIDER CREDENTIAL_CHAIN, + CHAIN 'env;config', + REGION 'XX-XXXX-X ', + ENDPOINT 'localhost:9000', + URL_STYLE 'path', + USE_SSL false, + KEY_ID 'test', + SECRET '12345678' +); +""") + +start = time.time() +for asset_id in asset_ids: + for carrier_id in carrier_ids: + con.sql(f'SELECT time, HeatIn_Q1 FROM read_parquet([\'s3://test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/asset_id={asset_id}/carrier_id={carrier_id}/*.parquet\']) ;').df() +end = time.time() + +diff = end - start +per_q = diff / (len(asset_ids) * len(carrier_ids)) +print(f'DuckDB took {diff} seconds which means {per_q} second per q when accessing a single KPI individually for a single carrier and asset') + +start = time.time() +for asset_id in asset_ids: + con.sql(f'SELECT time, carrier_id, HeatIn_Q1 FROM read_parquet([\'s3://test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/asset_id={asset_id}/*/*.parquet\']);').df() +end = time.time() + +diff = end - start +per_q = diff / len(asset_ids) +print(f'DuckDB took {diff} seconds which means {per_q} second per q when accessing each profile individually for all carriers') + +start = time.time() +for carrier_id in carrier_ids: + con.sql(f'SELECT time, asset_id, HeatIn_Q1 FROM read_parquet([\'s3://test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/*/*/*.parquet\']) WHERE carrier_id=\'{carrier_id}\';').df() +end = time.time() + +diff = end - start +per_q = diff / len(carrier_ids) +print(f'DuckDB took {diff} seconds which means {per_q} second per q when accessing each carrier individually for all assets using WHERE') + +start = time.time() +for carrier_id in carrier_ids: + con.sql(f'SELECT time, asset_id, HeatIn_Q1 FROM read_parquet([\'s3://test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/*/carrier_id={carrier_id}/*.parquet\']);').df() +end = time.time() + +diff = end - start +per_q = diff / len(carrier_ids) +print(f'DuckDB took {diff} seconds which means {per_q} second per q when accessing each carrier individually for all assets using partition key') + + + +start = time.time() +con.sql(f'SELECT time, asset_id, carrier_id, HeatIn_Q1 FROM read_parquet([\'s3://test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/*/*/*.parquet\']) WHERE asset_id IN ({','.join(f"'{asset_id}'" for asset_id in asset_ids)});').df() +end = time.time() + +diff = end - start +print(f'DuckDB Took {diff} seconds when accessing the profile for all assets and all carriers at once') + +start = time.time() +con.sql(f'SELECT time, asset_id, HeatIn_Q1 FROM read_parquet([\'s3://test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/*/*/*.parquet\']) WHERE carrier_id=\'{carrier_ids[0]}\' AND asset_id IN ({','.join(f"'{asset_id}'" for asset_id in asset_ids)});').df() +end = time.time() + +diff = end - start +print(f'DuckDB Took {diff} seconds when accessing the profile for all assets at once for a single carrier') + + +import pyarrow as pa +import pyarrow.dataset +from pyarrow import fs +import pyarrow.parquet as pq + +s3 = fs.S3FileSystem(access_key='test', + secret_key='12345678', + scheme='http', + endpoint_override='localhost:9000') +asset_carrier_partitioning = pa.dataset.partitioning(pa.schema([pa.field("asset_id", pa.large_string()), pa.field("carrier_id", pa.large_string())]), flavor='hive') +asset_partitioning = pa.dataset.partitioning(pa.schema([pa.field("asset_id", pa.large_string())]), flavor='hive') +carrier_partitioning = pa.dataset.partitioning(pa.schema([pa.field("carrier_id", pa.large_string())]), flavor='hive') + +# print(pq.read_table(f"test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/asset_id=ff63a396-e738-4942-9768-8c85686e981c/carrier_id=48950278-7545-4d20-867e-d05392a46aea/00000000.parquet", filesystem=s3, partitioning=asset_carrier_partitioning)) +# print('') +# print(pq.read_table(f"test-parquet/62a079a5-5aff-45cd-930c-eab253700375/asset_id=0072f6a9-f1ee-4df4-8784-99502509fb44/ef8a30d975bd44e9b0feeddbb513ea05-0.parquet", filesystem=s3, partitioning=asset_partitioning).to_pandas()) + +start_arrow = time.time() +pq.read_table(f"test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/", filesystem=s3, columns=['HeatIn_Q1', 'time', 'asset_id', 'carrier_id'], partitioning=asset_carrier_partitioning).to_pandas() +end_arrow = time.time() + +diff = end_arrow - start_arrow +print(f'Pyarrow took {diff} seconds to retrieve one kpi for all assets and carriers') + +start_arrow = time.time() +for asset_id in asset_ids: + pa.parquet.read_table(f"test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/asset_id={asset_id}/", filesystem=s3, partitioning=carrier_partitioning, columns=['HeatIn_Q1', 'time', 'carrier_id']).to_pandas() +end_arrow = time.time() + +diff = end_arrow - start_arrow +per_q = diff / len(asset_ids) +print(f'Pyarrow took {diff} seconds which means {per_q} second per q to retrieve one kpi for a single asset and all carriers') + +start_arrow = time.time() +df = pa.parquet.read_table(f"test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/", filesystem=s3, partitioning=asset_carrier_partitioning, filters=[('asset_id', 'in', asset_ids), ('carrier_id', 'in', carrier_ids[:1])], columns=['HeatIn_Q1', 'time', 'asset_id', 'carrier_id']).to_pandas() +end_arrow = time.time() + + +diff = end_arrow - start_arrow +per_q = diff / len(asset_ids) +print(f'Pyarrow took {diff} seconds which means {per_q} second per asset to retrieve one kpi for all assets and one carrier') + +start_arrow = time.time() +for asset_id in asset_ids: + pa.parquet.read_table(f"test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/", filesystem=s3, partitioning=asset_carrier_partitioning, filters=[('asset_id', '=', asset_id), ('carrier_id', '=', carrier_ids[0])], columns=['HeatIn_Q1', 'time']).to_pandas() +end_arrow = time.time() + +diff = end_arrow - start_arrow +per_q = diff / (len(asset_ids)) +print(f'Pyarrow took {diff} seconds which means {per_q} second per q to retrieve one kpi for one asset and one carrier using filters') + +start_arrow = time.time() +for asset_id in asset_ids: + pa.parquet.read_table(f"test-parquet/3f59be60-1597-46e0-9f1f-c5aa0a466a96/asset_id={asset_id}/carrier_id={carrier_ids[0]}", filesystem=s3, partitioning=asset_carrier_partitioning, columns=['HeatIn_Q1', 'time']).to_pandas() +end_arrow = time.time() + +diff = end_arrow - start_arrow +per_q = diff / (len(asset_ids)) +print(f'Pyarrow took {diff} seconds which means {per_q} second per q to retrieve one kpi for one asset and one carrier using prefix') diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0554cf3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pandas +duckdb +pyarrow