@@ -492,6 +492,198 @@ bool IcebergMetadata::update(const ContextPtr & local_context)
492
492
return previous_snapshot_schema_id != relevant_snapshot_schema_id;
493
493
}
494
494
495
+ namespace
496
+ {
497
+
498
+ using IdToName = std::unordered_map<Int32, String>;
499
+
500
+ IdToName buildIdToNameMap (const Poco::JSON::Object::Ptr & metadata_obj)
501
+ {
502
+ IdToName map;
503
+ if (!metadata_obj || !metadata_obj->has (" current-schema-id" ) || !metadata_obj->has (" schemas" ))
504
+ return map;
505
+
506
+ const auto current_schema_id = metadata_obj->getValue <Int32>(" current-schema-id" );
507
+ auto schemas = metadata_obj->getArray (" schemas" );
508
+ if (!schemas)
509
+ return map;
510
+
511
+ for (size_t i = 0 ; i < schemas->size (); ++i)
512
+ {
513
+ auto schema = schemas->getObject (i);
514
+ if (!schema || !schema->has (" schema-id" ))
515
+ continue ;
516
+ if (schema->getValue <Int32>(" schema-id" ) != current_schema_id)
517
+ continue ;
518
+
519
+ if (auto fields = schema->getArray (" fields" ))
520
+ {
521
+ for (size_t j = 0 ; j < fields->size (); ++j)
522
+ {
523
+ auto f = fields->getObject (j);
524
+ if (!f || !f->has (" id" ) || !f->has (" name" ))
525
+ continue ;
526
+ map.emplace (f->getValue <Int32>(" id" ), f->getValue <String>(" name" ));
527
+ }
528
+ }
529
+ break ;
530
+ }
531
+ return map;
532
+ }
533
+
534
+ String formatTransform (
535
+ const String & transform,
536
+ const Poco::JSON::Object::Ptr & field_obj,
537
+ const IdToName & id_to_name)
538
+ {
539
+ Int32 source_id = (field_obj && field_obj->has (" source-id" ))
540
+ ? field_obj->getValue <Int32>(" source-id" )
541
+ : -1 ;
542
+
543
+ const auto it = id_to_name.find (source_id);
544
+ const String col = (it != id_to_name.end ()) ? it->second : (" col_" + toString (source_id));
545
+
546
+ String base = transform;
547
+ String param;
548
+ if (const auto lpos = transform.find (' [' ); lpos != String::npos && transform.back () == ' ]' )
549
+ {
550
+ base = transform.substr (0 , lpos);
551
+ param = transform.substr (lpos + 1 , transform.size () - lpos - 2 ); // strip [ and ]
552
+ }
553
+
554
+ String result;
555
+ if (base == " identity" )
556
+ result = col;
557
+ else if (base == " year" || base == " month" || base == " day" || base == " hour" )
558
+ result = base + " (" + col + " )" ;
559
+ else if (base != " void" )
560
+ {
561
+ if (!param.empty ())
562
+ result = base + " (" + param + " , " + col + " )" ;
563
+ else
564
+ result = base + " (" + col + " )" ;
565
+ }
566
+ return result;
567
+ }
568
+
569
+ Poco::JSON::Array::Ptr findActivePartitionFields (const Poco::JSON::Object::Ptr & metadata_obj)
570
+ {
571
+ if (!metadata_obj)
572
+ return nullptr ;
573
+
574
+ if (metadata_obj->has (" partition-spec" ))
575
+ return metadata_obj->getArray (" partition-spec" );
576
+
577
+ // If for some reason there is no partition-spec, try partition-specs + default-
578
+ if (metadata_obj->has (" partition-specs" ) && metadata_obj->has (" default-spec-id" ))
579
+ {
580
+ const auto default_spec_id = metadata_obj->getValue <Int32>(" default-spec-id" );
581
+ if (auto specs = metadata_obj->getArray (" partition-specs" ))
582
+ {
583
+ for (size_t i = 0 ; i < specs->size (); ++i)
584
+ {
585
+ auto spec = specs->getObject (i);
586
+ if (!spec || !spec->has (" spec-id" ))
587
+ continue ;
588
+ if (spec->getValue <Int32>(" spec-id" ) == default_spec_id)
589
+ return spec->has (" fields" ) ? spec->getArray (" fields" ) : nullptr ;
590
+ }
591
+ }
592
+ }
593
+
594
+ return nullptr ;
595
+ }
596
+
597
+ Poco::JSON::Array::Ptr findActiveSortFields (const Poco::JSON::Object::Ptr & metadata_obj)
598
+ {
599
+ if (!metadata_obj || !metadata_obj->has (" default-sort-order-id" ) || !metadata_obj->has (" sort-orders" ))
600
+ return nullptr ;
601
+
602
+ const auto default_sort_order_id = metadata_obj->getValue <Int32>(" default-sort-order-id" );
603
+ auto orders = metadata_obj->getArray (" sort-orders" );
604
+ if (!orders)
605
+ return nullptr ;
606
+
607
+ for (size_t i = 0 ; i < orders->size (); ++i)
608
+ {
609
+ auto order = orders->getObject (i);
610
+ if (!order || !order->has (" order-id" ))
611
+ continue ;
612
+ if (order->getValue <Int32>(" order-id" ) == default_sort_order_id)
613
+ return order->has (" fields" ) ? order->getArray (" fields" ) : nullptr ;
614
+ }
615
+ return nullptr ;
616
+ }
617
+
618
+ String composeList (
619
+ const Poco::JSON::Array::Ptr & fields,
620
+ const IdToName & id_to_name,
621
+ bool lookup_sort_modifiers)
622
+ {
623
+ if (!fields || fields->size () == 0 )
624
+ return {};
625
+
626
+ Strings parts;
627
+ parts.reserve (fields->size ());
628
+
629
+ for (size_t i = 0 ; i < fields->size (); ++i)
630
+ {
631
+ auto field = fields->getObject (i);
632
+ if (!field)
633
+ continue ;
634
+
635
+ const String transform = field->has (" transform" ) ? field->getValue <String>(" transform" ) : " identity" ;
636
+ String expr = formatTransform (transform, field, id_to_name);
637
+ if (expr.empty ())
638
+ continue ;
639
+
640
+ if (lookup_sort_modifiers)
641
+ {
642
+ if (field->has (" direction" ))
643
+ {
644
+ auto d = field->getValue <String>(" direction" );
645
+ expr += (Poco::icompare (d, " desc" ) == 0 ) ? " DESC" : " ASC" ;
646
+ }
647
+ if (field->has (" null-order" ))
648
+ {
649
+ auto n = field->getValue <String>(" null-order" );
650
+ expr += (Poco::icompare (n, " nulls-last" ) == 0 ) ? " NULLS LAST" : " NULLS FIRST" ;
651
+ }
652
+ }
653
+
654
+ parts.push_back (std::move (expr));
655
+ }
656
+
657
+ if (parts.empty ())
658
+ return {};
659
+
660
+ String res;
661
+ for (size_t i = 0 ; i < parts.size (); ++i)
662
+ {
663
+ if (i) res += " , " ;
664
+ res += parts[i];
665
+ }
666
+ return res;
667
+ }
668
+
669
+ std::pair<std::optional<String>, std::optional<String>> extractIcebergKeys (const Poco::JSON::Object::Ptr & metadata_obj)
670
+ {
671
+ std::optional<String> partition_key;
672
+ std::optional<String> sort_key;
673
+
674
+ if (metadata_obj)
675
+ {
676
+ auto id_to_name = buildIdToNameMap (metadata_obj);
677
+
678
+ partition_key = composeList (findActivePartitionFields (metadata_obj), id_to_name, /* lookup_sort_modifiers=*/ false );
679
+ sort_key = composeList (findActiveSortFields (metadata_obj), id_to_name, /* lookup_sort_modifiers=*/ true );
680
+ }
681
+
682
+ return {partition_key, sort_key};
683
+ }
684
+
685
+ }
686
+
495
687
void IcebergMetadata::updateSnapshot (ContextPtr local_context, Poco::JSON::Object::Ptr metadata_object)
496
688
{
497
689
auto configuration_ptr = configuration.lock ();
@@ -526,10 +718,11 @@ void IcebergMetadata::updateSnapshot(ContextPtr local_context, Poco::JSON::Objec
526
718
total_bytes = summary_object->getValue <Int64>(f_total_files_size);
527
719
}
528
720
721
+ auto [partition_key, sorting_key] = extractIcebergKeys (metadata_object);
529
722
relevant_snapshot = IcebergSnapshot{
530
723
getManifestList (local_context, getProperFilePathFromMetadataInfo (
531
724
snapshot->getValue <String>(f_manifest_list), configuration_ptr->getPathForRead ().path , table_location)),
532
- relevant_snapshot_id, total_rows, total_bytes};
725
+ relevant_snapshot_id, total_rows, total_bytes, partition_key, sorting_key };
533
726
534
727
if (!snapshot->has (f_schema_id))
535
728
throw Exception (
@@ -973,6 +1166,29 @@ std::optional<size_t> IcebergMetadata::totalBytes(ContextPtr local_context) cons
973
1166
return result;
974
1167
}
975
1168
1169
+ std::optional<String> IcebergMetadata::partitionKey (ContextPtr) const
1170
+ {
1171
+ SharedLockGuard lock (mutex);
1172
+ if (relevant_snapshot->partition_key .has_value ())
1173
+ {
1174
+ return relevant_snapshot->partition_key ;
1175
+ }
1176
+
1177
+ return std::nullopt ;
1178
+ }
1179
+
1180
+ std::optional<String> IcebergMetadata::sortingKey (ContextPtr) const
1181
+ {
1182
+ SharedLockGuard lock (mutex);
1183
+ if (relevant_snapshot->sorting_key .has_value ())
1184
+ {
1185
+ return relevant_snapshot->sorting_key ;
1186
+ }
1187
+
1188
+ return std::nullopt ;
1189
+ }
1190
+
1191
+
976
1192
ObjectIterator IcebergMetadata::iterate (
977
1193
const ActionsDAG * filter_dag,
978
1194
FileProgressCallback callback,
0 commit comments