Skip to content

Commit

Permalink
[iceberg] Add UUID type support
Browse files Browse the repository at this point in the history
The iceberg spec lists uuid as a valid schema type. Presto supports
UUID types but there was no support for reading or writing them
in the connector.

This commit makes the necessary changes in the connector to create
tables with UUID columns and support for UUIDs in the parquet reader.
This includes an implementation for UUIDs in the batchreader.
  • Loading branch information
ZacBlanco committed Oct 22, 2024
1 parent 342ceb0 commit 6909c85
Show file tree
Hide file tree
Showing 27 changed files with 1,009 additions and 42 deletions.
41 changes: 23 additions & 18 deletions presto-docs/src/main/sphinx/connector/iceberg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1737,26 +1737,28 @@ Map of Iceberg types to the relevant PrestoDB types:
- PrestoDB type
* - ``BOOLEAN``
- ``BOOLEAN``
* - ``BINARY``, ``FIXED``
- ``VARBINARY``
* - ``DATE``
- ``DATE``
* - ``DECIMAL``
- ``DECIMAL``
* - ``DOUBLE``
- ``DOUBLE``
* - ``INTEGER``
- ``INTEGER``
* - ``LONG``
- ``BIGINT``
* - ``FLOAT``
- ``REAL``
* - ``INTEGER``
- ``INTEGER``
* - ``DOUBLE``
- ``DOUBLE``
* - ``DECIMAL``
- ``DECIMAL``
* - ``STRING``
- ``VARCHAR``
* - ``BINARY``, ``FIXED``
- ``VARBINARY``
* - ``DATE``
- ``DATE``
* - ``TIME``
- ``TIME``
* - ``TIMESTAMP``
- ``TIMESTAMP``
* - ``STRING``
- ``VARCHAR``
* - ``UUID``
- ``UUID``
* - ``LIST``
- ``ARRAY``
* - ``MAP``
Expand Down Expand Up @@ -1796,17 +1798,20 @@ Map of PrestoDB types to the relevant Iceberg types:
- ``BINARY``
* - ``DATE``
- ``DATE``
* - ``ROW``
- ``STRUCT``
* - ``ARRAY``
- ``LIST``
* - ``MAP``
- ``MAP``
* - ``TIME``
- ``TIME``
* - ``TIMESTAMP``
- ``TIMESTAMP WITHOUT ZONE``
* - ``TIMESTAMP WITH TIMEZONE``
- ``TIMESTAMP WITH ZONE``
* - ``UUID``
- ``UUID``
* - ``ARRAY``
- ``LIST``
* - ``MAP``
- ``MAP``
* - ``ROW``
- ``STRUCT``


No other types are supported.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.common.type.TypeSignature;
import com.facebook.presto.common.type.TypeSignatureParameter;
import com.facebook.presto.common.type.UuidType;
import com.facebook.presto.spi.PrestoException;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
Expand Down Expand Up @@ -47,6 +48,7 @@
import static com.facebook.presto.common.type.IntegerType.INTEGER;
import static com.facebook.presto.common.type.RealType.REAL;
import static com.facebook.presto.common.type.SmallintType.SMALLINT;
import static com.facebook.presto.common.type.StandardTypes.UUID;
import static com.facebook.presto.common.type.TimestampType.TIMESTAMP;
import static com.facebook.presto.common.type.TinyintType.TINYINT;
import static com.facebook.presto.common.type.VarbinaryType.VARBINARY;
Expand All @@ -58,6 +60,7 @@
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.binaryTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.booleanTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.byteTypeInfo;
Expand Down Expand Up @@ -87,6 +90,47 @@ public final class HiveType
public static final HiveType HIVE_TIMESTAMP = new HiveType(timestampTypeInfo);
public static final HiveType HIVE_DATE = new HiveType(dateTypeInfo);
public static final HiveType HIVE_BINARY = new HiveType(binaryTypeInfo);
public static final HiveType HIVE_UUID = new HiveType(new TypeInfo()
{
@Override
public Category getCategory()
{
return PRIMITIVE;
}

@Override
public String getTypeName()
{
return UUID;
}

@Override
public boolean equals(Object other)
{
if (this == other) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}

TypeInfo ti = (TypeInfo) other;

return UUID.equals(ti.getTypeName());
}

@Override
public int hashCode()
{
return UUID.hashCode();
}

@Override
public String toString()
{
return UUID;
}
});

private final HiveTypeName hiveTypeName;
private final TypeInfo typeInfo;
Expand Down Expand Up @@ -223,6 +267,9 @@ private static TypeSignature getTypeSignature(TypeInfo typeInfo)
switch (typeInfo.getCategory()) {
case PRIMITIVE:
Type primitiveType = getPrimitiveType((PrimitiveTypeInfo) typeInfo);
if (primitiveType == null && typeInfo.getTypeName().equals(UUID)) {
return UuidType.UUID.getTypeSignature();
}
if (primitiveType == null) {
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import static com.facebook.presto.common.type.SmallintType.SMALLINT;
import static com.facebook.presto.common.type.TimestampType.TIMESTAMP;
import static com.facebook.presto.common.type.TinyintType.TINYINT;
import static com.facebook.presto.common.type.UuidType.UUID;
import static com.facebook.presto.common.type.VarbinaryType.VARBINARY;
import static com.facebook.presto.hive.HiveType.HIVE_BINARY;
import static com.facebook.presto.hive.HiveType.HIVE_BOOLEAN;
Expand All @@ -52,6 +53,7 @@
import static com.facebook.presto.hive.HiveType.HIVE_SHORT;
import static com.facebook.presto.hive.HiveType.HIVE_STRING;
import static com.facebook.presto.hive.HiveType.HIVE_TIMESTAMP;
import static com.facebook.presto.hive.HiveType.HIVE_UUID;
import static com.facebook.presto.hive.metastore.MetastoreUtil.isArrayType;
import static com.facebook.presto.hive.metastore.MetastoreUtil.isMapType;
import static com.facebook.presto.hive.metastore.MetastoreUtil.isRowType;
Expand Down Expand Up @@ -91,6 +93,9 @@ public TypeInfo translate(Type type, Optional<HiveType> defaultHiveType)
if (DOUBLE.equals(type)) {
return HIVE_DOUBLE.getTypeInfo();
}
if (UUID.equals(type)) {
return HIVE_UUID.getTypeInfo();
}
if (type instanceof VarcharType) {
VarcharType varcharType = (VarcharType) type;
int varcharLength = varcharType.getLength();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.facebook.presto.common.type.TimeType;
import com.facebook.presto.common.type.TimestampType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.UuidType;
import com.facebook.presto.common.type.VarbinaryType;
import com.facebook.presto.common.type.VarcharType;
import com.google.common.base.VerifyException;
Expand All @@ -41,6 +42,7 @@
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import static com.facebook.presto.common.predicate.Marker.Bound.ABOVE;
import static com.facebook.presto.common.predicate.Marker.Bound.BELOW;
Expand Down Expand Up @@ -220,6 +222,12 @@ private static Object getIcebergLiteralValue(Type type, Marker marker)
return new BigDecimal(Decimals.decodeUnscaledValue((Slice) value), decimalType.getScale());
}

if (type instanceof UuidType) {
UuidType uuidType = (UuidType) type;
return marker.getValueBlock()
.map(block -> UUID.fromString((String) uuidType.getObjectValue(null, block, 0)))
.orElseThrow(NullPointerException::new);
}
return marker.getValue();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.common.type.TypeSignature;
import com.facebook.presto.common.type.TypeSignatureParameter;
import com.facebook.presto.common.type.UuidType;
import com.facebook.presto.common.type.VarbinaryType;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.hive.HiveType;
Expand Down Expand Up @@ -120,6 +121,8 @@ public static Type toPrestoType(org.apache.iceberg.types.Type type, TypeManager
return TimestampType.TIMESTAMP;
case STRING:
return VarcharType.createUnboundedVarcharType();
case UUID:
return UuidType.UUID;
case LIST:
Types.ListType listType = (Types.ListType) type;
return new ArrayType(toPrestoType(listType.elementType(), typeManager));
Expand Down Expand Up @@ -203,6 +206,9 @@ public static org.apache.iceberg.types.Type toIcebergType(Type type)
if (type instanceof TimestampWithTimeZoneType) {
return Types.TimestampType.withZone();
}
if (type instanceof UuidType) {
return Types.UUIDType.get();
}
throw new PrestoException(NOT_SUPPORTED, "Type not supported for Iceberg: " + type.getDisplayName());
}

Expand Down
Loading

0 comments on commit 6909c85

Please sign in to comment.