Skip to content

Commit

Permalink
Add UPDATE sql support in Presto Engine
Browse files Browse the repository at this point in the history
Cherry-pick of trinodb/trino@af17e51

Co-authored-by: djsstarburst
  • Loading branch information
agrawalreetika authored and yingsu00 committed Dec 4, 2023
1 parent d865192 commit 76ae3ed
Show file tree
Hide file tree
Showing 73 changed files with 1,711 additions and 166 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ public void testDelete()
// Deletes are not supported by the connector
}

@Override
public void testUpdate()
{
// Updates are not supported by the connector
}

@Override
public void testInsert()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.facebook.presto.common.transaction.TransactionId;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.TableHandle;
import com.facebook.presto.spi.analyzer.AccessControlInfo;
import com.facebook.presto.spi.analyzer.AccessControlInfoForTable;
Expand Down Expand Up @@ -168,6 +169,8 @@ public class Analysis
private Optional<RefreshMaterializedViewAnalysis> refreshMaterializedViewAnalysis = Optional.empty();
private Optional<TableHandle> analyzeTarget = Optional.empty();

private Optional<List<ColumnMetadata>> updatedColumns = Optional.empty();

// for describe input and describe output
private final boolean isDescribe;

Expand Down Expand Up @@ -684,6 +687,16 @@ public Optional<Insert> getInsert()
return insert;
}

public void setUpdatedColumns(List<ColumnMetadata> updatedColumns)
{
this.updatedColumns = Optional.of(updatedColumns);
}

public Optional<List<ColumnMetadata>> getUpdatedColumns()
{
return updatedColumns;
}

public void setRefreshMaterializedViewAnalysis(RefreshMaterializedViewAnalysis refreshMaterializedViewAnalysis)
{
this.refreshMaterializedViewAnalysis = Optional.of(refreshMaterializedViewAnalysis);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
import com.facebook.presto.sql.tree.StartTransaction;
import com.facebook.presto.sql.tree.Statement;
import com.facebook.presto.sql.tree.TruncateTable;
import com.facebook.presto.sql.tree.Update;
import com.facebook.presto.sql.tree.Use;
import com.google.common.collect.ImmutableMap;

Expand Down Expand Up @@ -98,6 +99,7 @@ private StatementUtils() {}
builder.put(RefreshMaterializedView.class, QueryType.INSERT);

builder.put(Delete.class, QueryType.DELETE);
builder.put(Update.class, QueryType.UPDATE);

builder.put(ShowCatalogs.class, QueryType.DESCRIBE);
builder.put(ShowCreate.class, QueryType.DESCRIBE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ public void testDelete()
// Cassandra connector currently does not support delete
}

@Override
public void testUpdate()
{
// Updates are not supported by the connector
}

@Override
public void testShowColumns()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ public void testDelete()
throw new SkipException("TODO: test not implemented yet");
}

@Override
public void testUpdate()
{
// Updates are not supported by the connector
}

@Test
@Override
public void testInsert()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public enum QueryType
INSERT(6),
SELECT(7),
CONTROL(8),
UPDATE(9)
/**/;

private final int value;
Expand Down
1 change: 1 addition & 0 deletions presto-docs/src/main/sphinx/develop.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This guide is intended for Presto contributors and plugin developers.
develop/spi-overview
develop/connectors
develop/example-http
develop/delete-and-update
develop/types
develop/functions
develop/system-access-control
Expand Down
181 changes: 181 additions & 0 deletions presto-docs/src/main/sphinx/develop/delete-and-update.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
====================================
Supporting ``DELETE`` and ``UPDATE``
====================================

The Presto engine provides APIs to support row-level SQL ``DELETE`` and ``UPDATE``.
To implement ``DELETE`` or ``UPDATE``, a connector must:

* Layer an ``UpdatablePageSource`` on top of the connector's ``ConnectorPageSource``
* Define ``ConnectorMetadata`` methods to get a rowId column handle
* Start the operation using ``beginUpdate()`` or ``beginDelete()``
* Finish the operation using ``finishUpdate()`` or ``finishDelete()``

``DELETE`` and ``UPDATE`` Data Flow
===================================

``DELETE`` and ``UPDATE`` have a similar flow:

* For each split, the connector will create an ``UpdatablePageSource`` instance, layered over the
connector's ``ConnectorPageSource``, to read pages on behalf of the Presto engine, and to
write deletions or updates to the underlying data store.
* The connector's ``UpdatablePageSource.getNextPage()`` implementation fetches the next page
from the underlying ``ConnectorPageSource``, optionally reformats the page, and returns it
to the Presto engine.
* The Presto engine performs filtering and projection on the page read, producing a page of filtered,
projected results.
* The Presto engine passes that filtered, projected page of results to the connector's
``UpdatablePageSource`` ``deleteRows()`` or ``updateRows()`` method. Those methods persist
the deletions or updates in the underlying data store.
* When all the pages for a specific split have been processed, the Presto engine calls
``UpdatablePageSource.finish()``, which returns a ``Collection<Slice>`` of fragments
representing connector-specific information about the rows processed by the calls to
``deleteRows`` or ``updateRows``.
* When all pages for all splits have been processed, the Presto engine calls ``ConnectorMetadata.finishDelete()`` or
``finishUpdate``, passing a collection containing all the fragments from all the splits. The connector
does what is required to finalize the operation, for example, committing the transaction.

The rowId Column Handle Abstraction
===================================

The Presto engine and connectors use a rowId column handle abstraction to agree on the identities of rows
to be updated or deleted. The rowId column handle is opaque to the Presto engine. Depending on the connector,
the rowId column handle abstraction could represent several physical columns.

The rowId Column Handle for ``DELETE``
--------------------------------------

The Presto engine identifies the rows to be deleted using a connector-specific
rowId column handle, returned by the connector's ``ConnectorMetadata.getDeleteRowIdColumnHandle()``
method, whose full signature is::

ColumnHandle getDeleteRowIdColumnHandle(
ConnectorSession session,
ConnectorTableHandle tableHandle)

The rowId Column Handle for ``UPDATE``
--------------------------------------

The Presto engine identifies rows to be updated using a connector-specific rowId column handle,
returned by the connector's ``ConnectorMetadata.getUpdateRowIdColumnHandle()``
method. In addition to the columns that identify the row, for ``UPDATE`` the rowId column will contain
any columns that the connector requires in order to perform the ``UPDATE`` operation.

UpdatablePageSource API
=======================

As mentioned above, to support ``DELETE`` or ``UPDATE``, the connector must define a subclass of
``UpdatablePageSource``, layered over the connector's ``ConnectorPageSource``. The interesting methods are:

* ``Page getNextPage()``. When the Presto engine calls ``getNextPage()``, the ``UpdatablePageSource`` calls
its underlying ``ConnectorPageSource.getNextPage()`` method to get a page. Some connectors will reformat
the page before returning it to the Presto engine.

* ``void deleteRows(Block rowIds)``. The Presto engine calls the ``deleteRows()`` method of the same ``UpdatablePageSource``
instance that supplied the original page, passing a block of rowIds, created by the Presto engine based on the column
handle returned by ``ConnectorMetadata.getDeleteRowIdColumnHandle()``

* ``void updateRows(Page page, List<Integer> columnValueAndRowIdChannels)``. The Presto engine calls the ``updateRows()``
method of the same ``UpdatablePageSource`` instance that supplied the original page, passing a page of projected columns,
one for each updated column and the last one for the rowId column. The order of projected columns is defined by the Presto engine,
and that order is reflected in the ``columnValueAndRowIdChannels`` argument. The job of ``updateRows()`` is to:

* Extract the updated column blocks and the rowId block from the projected page.
* Assemble them in whatever order is required by the connector for storage.
* Store the update result in the underlying file store.

* ``CompletableFuture<Collection<Slice>> finish()``. The Presto engine calls ``finish()`` when all the pages
of a split have been processed. The connector returns a future containing a collection of ``Slice``, representing
connector-specific information about the rows processed. Usually this will include the row count, and might
include information like the files or partitions created or changed.

``ConnectorMetadata`` ``DELETE`` API
====================================

A connector implementing ``DELETE`` must specify three ``ConnectorMetadata`` methods.

* ``getDeleteRowIdColumnHandle()``::

ColumnHandle getDeleteRowIdColumnHandle(
ConnectorSession session,
ConnectorTableHandle tableHandle)

The ColumnHandle returned by this method provides the rowId column handle used by the connector to identify rows
to be deleted, as well as any other fields of the row that the connector will need to complete the ``DELETE`` operation.
For a JDBC connector, that rowId is usually the primary key for the table and no other fields are required.
For other connectors, the information needed to identify a row usually consists of multiple physical columns.

* ``beginDelete()``::

ConnectorTableHandle beginDelete(
ConnectorSession session,
ConnectorTableHandle tableHandle)

As the last step in creating the ``DELETE`` execution plan, the connector's ``beginDelete()`` method is called,
passing the ``session`` and ``tableHandle``.

``beginDelete()`` performs any orchestration needed in the connector to start processing the ``DELETE``.
This orchestration varies from connector to connector.

``beginDelete()`` returns a ``ConnectorTableHandle`` with any added information the connector needs when the handle
is passed back to ``finishDelete()`` and the split generation machinery. For most connectors, the returned table
handle contains a flag identifying the table handle as a table handle for a ``DELETE`` operation.

* ``finishDelete()``::

void finishDelete(
ConnectorSession session,
ConnectorTableHandle tableHandle,
Collection<Slice> fragments)

During ``DELETE`` processing, the Presto engine accumulates the ``Slice`` collections returned by ``UpdatablePageSource.finish()``.
After all splits have been processed, the engine calls ``finishDelete()``, passing the table handle and that
collection of ``Slice`` fragments. In response, the connector takes appropriate actions to complete the ``Delete`` operation.
Those actions might include committing the transaction, assuming the connector supports a transaction paradigm.

``ConnectorMetadata`` ``UPDATE`` API
====================================

A connector implementing ``UPDATE`` must specify three ``ConnectorMetadata`` methods.

* ``getUpdateRowIdColumnHandle``::

ColumnHandle getUpdateRowIdColumnHandle(
ConnectorSession session,
ConnectorTableHandle tableHandle,
List<ColumnHandle> updatedColumns)

The ``updatedColumns`` list contains column handles for all columns updated by the ``UPDATE`` operation in table column order.

The ColumnHandle returned by this method provides the rowId used by the connector to identify rows to be updated, as
well as any other fields of the row that the connector will need to complete the ``UPDATE`` operation.

* ``beginUpdate``::

ConnectorTableHandle beginUpdate(
ConnectorSession session,
ConnectorTableHandle tableHandle,
List<ColumnHandle> updatedColumns)

As the last step in creating the ``UPDATE`` execution plan, the connector's ``beginUpdate()`` method is called,
passing arguments that define the ``UPDATE`` to the connector. In addition to the ``session``
and ``tableHandle``, the arguments includes the list of the updated columns handles, in table column order.

``beginUpdate()`` performs any orchestration needed in the connector to start processing the ``UPDATE``.
This orchestration varies from connector to connector.

``beginUpdate`` returns a ``ConnectorTableHandle`` with any added information the connector needs when the handle
is passed back to ``finishUpdate()`` and the split generation machinery. For most connectors, the returned table
handle contains a flag identifying the table handle as a table handle for a ``UPDATE`` operation. For some connectors
that support partitioning, the table handle will reflect that partitioning.

* ``finishUpdate``::

void finishUpdate(
ConnectorSession session,
ConnectorTableHandle tableHandle,
Collection<Slice> fragments)

During ``UPDATE`` processing, the Presto engine accumulates the ``Slice`` collections returned by ``UpdatablePageSource.finish()``.
After all splits have been processed, the engine calls ``finishUpdate()``, passing the table handle and that
collection of ``Slice`` fragments. In response, the connector takes appropriate actions to complete the ``UPDATE`` operation.
Those actions might include committing the transaction, assuming the connector supports a transaction paradigm.
1 change: 1 addition & 0 deletions presto-docs/src/main/sphinx/sql.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,6 @@ This chapter describes the SQL syntax used in Presto.
sql/show-tables
sql/start-transaction
sql/truncate
sql/update
sql/use
sql/values
33 changes: 33 additions & 0 deletions presto-docs/src/main/sphinx/sql/update.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
======
UPDATE
======

Synopsis
--------

.. code-block:: text
UPDATE table_name SET [ ( column = expression [, ... ] ) ] [ WHERE condition ]
Description
-----------

Update selected columns values in existing rows in a table.

The columns named in the ``column = expression`` assignments will be updated
for all rows that match the ``WHERE`` condition. The values of all column update
expressions for a matching row are evaluated before any column value is changed.
When the type of the expression and the type of the column differ, the usual implicit
CASTs, such as widening numeric fields, are applied to the ``UPDATE`` expression values.


Examples
--------

Update the status of all purchases that haven't been assigned a ship date::

UPDATE purchases SET status = 'OVERDUE' WHERE ship_date IS NULL;

Update the account manager and account assign date for all customers::

UPDATE customers SET
account_manager = 'John Henry',
assign_date = DATE '2007-01-01';
Original file line number Diff line number Diff line change
Expand Up @@ -2484,7 +2484,7 @@ public ConnectorTableHandle beginDelete(ConnectorSession session, ConnectorTable
}

@Override
public ColumnHandle getUpdateRowIdColumnHandle(ConnectorSession session, ConnectorTableHandle tableHandle)
public ColumnHandle getDeleteRowIdColumnHandle(ConnectorSession session, ConnectorTableHandle tableHandle)
{
return updateRowIdHandle();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ public void checkCanDeleteFromTable(ConnectorTransactionHandle transaction, Conn
{
}

@Override
public void checkCanUpdateTableColumns(ConnectorTransactionHandle transaction, ConnectorIdentity identity, AccessControlContext context, SchemaTableName tableName, Set<String> updatedColumns)
{
}

@Override
public void checkCanCreateView(ConnectorTransactionHandle transaction, ConnectorIdentity identity, AccessControlContext context, SchemaTableName viewName)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.INSERT;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.SELECT;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.UPDATE;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.toHivePrivilege;
import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.isRoleApplicable;
import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.isRoleEnabled;
Expand Down Expand Up @@ -73,6 +74,7 @@
import static com.facebook.presto.spi.security.AccessDeniedException.denySetRole;
import static com.facebook.presto.spi.security.AccessDeniedException.denyShowRoles;
import static com.facebook.presto.spi.security.AccessDeniedException.denyTruncateTable;
import static com.facebook.presto.spi.security.AccessDeniedException.denyUpdateTableColumns;
import static com.facebook.presto.spi.security.PrincipalType.ROLE;
import static com.facebook.presto.spi.security.PrincipalType.USER;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
Expand Down Expand Up @@ -239,6 +241,15 @@ public void checkCanTruncateTable(ConnectorTransactionHandle transaction, Connec
}
}

@Override
public void checkCanUpdateTableColumns(ConnectorTransactionHandle transaction, ConnectorIdentity identity, AccessControlContext context, SchemaTableName tableName, Set<String> updatedColumns)
{
MetastoreContext metastoreContext = new MetastoreContext(identity, context.getQueryId().getId(), context.getClientInfo(), context.getSource(), Optional.empty(), false, HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
if (!checkTablePermission(transaction, identity, metastoreContext, tableName, UPDATE, false)) {
denyUpdateTableColumns(tableName.toString(), updatedColumns);
}
}

@Override
public void checkCanCreateView(ConnectorTransactionHandle transaction, ConnectorIdentity identity, AccessControlContext context, SchemaTableName viewName)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ public void checkCanDeleteFromTable(ConnectorTransactionHandle transactionHandle
delegate.checkCanDeleteFromTable(transactionHandle, identity, context, tableName);
}

@Override
public void checkCanUpdateTableColumns(ConnectorTransactionHandle transactionHandle, ConnectorIdentity identity, AccessControlContext context, SchemaTableName tableName, Set<String> updatedColumns)
{
delegate.checkCanUpdateTableColumns(transactionHandle, identity, context, tableName, updatedColumns);
}

@Override
public void checkCanCreateView(ConnectorTransactionHandle transactionHandle, ConnectorIdentity identity, AccessControlContext context, SchemaTableName viewName)
{
Expand Down
Loading

0 comments on commit 76ae3ed

Please sign in to comment.