-
Notifications
You must be signed in to change notification settings - Fork 436
[Flink] Support Partial Updates to the Flink Sink #2042
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
polyzos
wants to merge
3
commits into
apache:main
Choose a base branch
from
polyzos:partial-update-ds
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,7 @@ | |
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| import java.util.Arrays; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
|
|
@@ -73,6 +74,9 @@ public class FlussSinkBuilder<InputT> { | |
| private final Map<String, String> configOptions = new HashMap<>(); | ||
| private FlussSerializationSchema<InputT> serializationSchema; | ||
| private boolean shuffleByBucketId = true; | ||
| // Optional list of columns for partial update. When set, upsert will only update these columns. | ||
| // The primary key columns must be fully specified in this list. | ||
| private List<String> partialUpdateColumns; | ||
|
|
||
| /** Set the bootstrap server for the sink. */ | ||
| public FlussSinkBuilder<InputT> setBootstrapServers(String bootstrapServers) { | ||
|
|
@@ -98,6 +102,23 @@ public FlussSinkBuilder<InputT> setShuffleByBucketId(boolean shuffleByBucketId) | |
| return this; | ||
| } | ||
|
|
||
| /** | ||
| * Enable partial update by specifying the column names to update for upsert tables. Primary key | ||
| * columns must be included in this list. | ||
| */ | ||
| public FlussSinkBuilder<InputT> setPartialUpdateColumns(List<String> columns) { | ||
| this.partialUpdateColumns = columns; | ||
| return this; | ||
| } | ||
|
|
||
| /** | ||
| * Enable partial update by specifying the column names to update for upsert tables. Convenience | ||
| * varargs overload. | ||
| */ | ||
| public FlussSinkBuilder<InputT> setPartialUpdateColumns(String... columns) { | ||
| return setPartialUpdateColumns(columns == null ? null : Arrays.asList(columns)); | ||
| } | ||
|
|
||
| /** Set a configuration option. */ | ||
| public FlussSinkBuilder<InputT> setOption(String key, String value) { | ||
| configOptions.put(key, value); | ||
|
|
@@ -153,12 +174,17 @@ public FlussSink<InputT> build() { | |
|
|
||
| if (isUpsert) { | ||
| LOG.info("Initializing Fluss upsert sink writer ..."); | ||
| int[] targetColumnIndexes = | ||
| computeTargetColumnIndexes( | ||
| tableRowType.getFieldNames(), | ||
| tableInfo.getPrimaryKeys(), | ||
| partialUpdateColumns); | ||
| writerBuilder = | ||
| new FlinkSink.UpsertSinkWriterBuilder<>( | ||
| tablePath, | ||
| flussConfig, | ||
| tableRowType, | ||
| null, // not support partialUpdateColumns yet | ||
| targetColumnIndexes, | ||
| numBucket, | ||
| bucketKeys, | ||
| partitionKeys, | ||
|
|
@@ -193,4 +219,48 @@ private void validateConfiguration() { | |
| checkNotNull(tableName, "Table name is required but not provided."); | ||
| checkArgument(!tableName.isEmpty(), "Table name cannot be empty."); | ||
| } | ||
|
|
||
| // -------------- Test-visible helper methods -------------- | ||
| /** | ||
| * Computes target column indexes for partial updates. If {@code specifiedColumns} is null or | ||
| * empty, returns null indicating full update. Validates that all primary key columns are | ||
| * included in the specified columns. | ||
| * | ||
| * @param allFieldNames the list of all field names in table row type order | ||
| * @param primaryKeyNames the list of primary key column names | ||
| * @param specifiedColumns the optional list of columns specified for partial update | ||
| * @return the indexes into {@code allFieldNames} corresponding to {@code specifiedColumns}, or | ||
| * null for full update | ||
| * @throws IllegalArgumentException if a specified column does not exist or primary key coverage | ||
| * is incomplete | ||
| */ | ||
| static int[] computeTargetColumnIndexes( | ||
| List<String> allFieldNames, | ||
| List<String> primaryKeyNames, | ||
| List<String> specifiedColumns) { | ||
| if (specifiedColumns == null || specifiedColumns.isEmpty()) { | ||
| return null; // full update | ||
| } | ||
|
|
||
| // Map specified column names to indexes | ||
| int[] indexes = new int[specifiedColumns.size()]; | ||
| for (int i = 0; i < specifiedColumns.size(); i++) { | ||
| String col = specifiedColumns.get(i); | ||
| int idx = allFieldNames.indexOf(col); | ||
| checkArgument( | ||
| idx >= 0, "Column '%s' not found in table schema: %s", col, allFieldNames); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Slightly more context can be provided in the message e.g. "Partial update column '%s' not found in table schema: %s' |
||
| indexes[i] = idx; | ||
| } | ||
|
|
||
| // Validate that all primary key columns are covered | ||
| for (String pk : primaryKeyNames) { | ||
| checkArgument( | ||
| specifiedColumns.contains(pk), | ||
| "Partial updates must include all primary key columns. Missing primary key column: %s. Provided columns: %s", | ||
| pk, | ||
| specifiedColumns); | ||
| } | ||
|
|
||
| return indexes; | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.