remove Connection.getTable with Schema Info.

swuferhong · loserwang1024 · commit 9742f897e4f3 · 2025-11-30T18:01:03.000+08:00
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/Connection.java b/fluss-client/src/main/java/org/apache/fluss/client/Connection.java
@@ -21,7 +21,6 @@
 import org.apache.fluss.client.admin.Admin;
 import org.apache.fluss.client.table.Table;
 import org.apache.fluss.config.Configuration;
-import org.apache.fluss.metadata.SchemaInfo;
 import org.apache.fluss.metadata.TablePath;
 
 import javax.annotation.concurrent.ThreadSafe;
@@ -57,16 +56,6 @@ public interface Connection extends AutoCloseable {
     /** Retrieve a new Table client to operate data in table. */
     Table getTable(TablePath tablePath);
 
-    /**
-     * Retrieve a new Table client to operate data in table with specific schema. When performing
-     * read, write and loop operations, this schema will be used.
-     *
-     * @param tablePath the path of the table to operate on
-     * @param schemaInfo the schema information to be used for the table operations
-     * @return a new Table client instance for the specified table and schema
-     */
-    Table getTable(TablePath tablePath, SchemaInfo schemaInfo);
-
     /** Close the connection and release all resources. */
     @Override
     void close() throws Exception;
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/FlussConnection.java b/fluss-client/src/main/java/org/apache/fluss/client/FlussConnection.java
@@ -33,8 +33,6 @@
 import org.apache.fluss.config.Configuration;
 import org.apache.fluss.exception.FlussRuntimeException;
 import org.apache.fluss.fs.FileSystem;
-import org.apache.fluss.metadata.SchemaInfo;
-import org.apache.fluss.metadata.TableInfo;
 import org.apache.fluss.metadata.TablePath;
 import org.apache.fluss.metrics.registry.MetricRegistry;
 import org.apache.fluss.rpc.GatewayClientProxy;
@@ -106,15 +104,6 @@ public Table getTable(TablePath tablePath) {
         return new FlussTable(this, tablePath, admin.getTableInfo(tablePath).join());
     }
 
-    @Override
-    public Table getTable(TablePath tablePath, SchemaInfo schemaInfo) {
-        // force to update the table info from server to avoid stale data in cache.
-        metadataUpdater.updateTableOrPartitionMetadata(tablePath, null);
-        Admin admin = getOrCreateAdmin();
-        TableInfo tableInfo = admin.getTableInfo(tablePath).join();
-        return new FlussTable(this, tablePath, tableInfo.withNewSchema(schemaInfo));
-    }
-
     public MetadataUpdater getMetadataUpdater() {
         return metadataUpdater;
     }
diff --git a/fluss-client/src/test/java/org/apache/fluss/client/table/FlussTableITCase.java b/fluss-client/src/test/java/org/apache/fluss/client/table/FlussTableITCase.java
@@ -39,7 +39,6 @@
 import org.apache.fluss.metadata.LogFormat;
 import org.apache.fluss.metadata.MergeEngineType;
 import org.apache.fluss.metadata.Schema;
-import org.apache.fluss.metadata.SchemaInfo;
 import org.apache.fluss.metadata.TableBucket;
 import org.apache.fluss.metadata.TableChange;
 import org.apache.fluss.metadata.TableDescriptor;
@@ -79,7 +78,6 @@
 import static org.apache.fluss.record.TestData.DATA1_TABLE_DESCRIPTOR_PK;
 import static org.apache.fluss.record.TestData.DATA1_TABLE_PATH;
 import static org.apache.fluss.record.TestData.DATA1_TABLE_PATH_PK;
-import static org.apache.fluss.record.TestData.DATA2_SCHEMA;
 import static org.apache.fluss.record.TestData.DATA3_SCHEMA_PK;
 import static org.apache.fluss.testutils.DataTestUtils.assertRowValueEquals;
 import static org.apache.fluss.testutils.DataTestUtils.compactedRow;
@@ -278,8 +276,7 @@ void testPutAndLookup() throws Exception {
                         false)
                 .get();
         waitAllSchemaSync(tablePath, 2);
-        Table newSchemaTable =
-                conn.getTable(tableInfo.getTablePath(), new SchemaInfo(DATA2_SCHEMA, 2));
+        Table newSchemaTable = conn.getTable(tableInfo.getTablePath());
         // schema change case1: read new data with new schema.
         verifyPutAndLookup(newSchemaTable, new Object[] {2, "b", "bb"});
         // schema change case2: read new data with old schema.
@@ -368,9 +365,7 @@ void testPutAndPrefixLookup() throws Exception {
                 .get();
         waitAllSchemaSync(tablePath, 2);
         try (Connection connection = ConnectionFactory.createConnection(clientConf);
-                Table newSchemaTable =
-                        connection.getTable(
-                                tableInfo.getTablePath(), new SchemaInfo(newSchema, 2))) {
+                Table newSchemaTable = connection.getTable(tableInfo.getTablePath())) {
             // schema change case1: read new data with new schema.
             verifyPutAndLookup(
                     newSchemaTable, new Object[] {1, "a", 4L, "value4", "add_column_value"});
diff --git a/fluss-client/src/test/java/org/apache/fluss/client/table/scanner/batch/KvSnapshotBatchScannerITCase.java b/fluss-client/src/test/java/org/apache/fluss/client/table/scanner/batch/KvSnapshotBatchScannerITCase.java
@@ -126,6 +126,7 @@ void testScanSnapshotDuringSchemaChange() throws Exception {
 
         // put into values with old schema.
         Map<TableBucket, List<InternalRow>> oldSchemaRowByBuckets = putRows(tableId, tablePath, 10);
+        waitUntilAllSnapshotFinished(oldSchemaRowByBuckets.keySet(), 0);
 
         // add a new column and rename an existing column
         admin.alterTable(
@@ -175,7 +176,7 @@ void testScanSnapshotDuringSchemaChange() throws Exception {
         }
 
         // wait snapshot finish
-        waitUntilAllSnapshotFinished(expectedRowByBuckets.keySet(), 0);
+        waitUntilAllSnapshotFinished(expectedRowByBuckets.keySet(), 1);
 
         // test read snapshot with new Schema
         testSnapshotRead(tablePath, expectedRowByBuckets);
diff --git a/fluss-common/src/main/java/org/apache/fluss/metadata/Schema.java b/fluss-common/src/main/java/org/apache/fluss/metadata/Schema.java
@@ -261,6 +261,12 @@ public Builder fromColumns(List<Column> inputColumns) {
 
             if (allMatchColumnId) {
                 columns.addAll(inputColumns);
+                highestFieldId =
+                        new AtomicInteger(
+                                inputColumns.stream()
+                                        .mapToInt(Column::getColumnId)
+                                        .max()
+                                        .orElse(-1));
             } else {
                 // if all columnId is not set, this maybe from old version schema. Just use its
                 // position as columnId.
diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/FlinkSource.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/FlinkSource.java
@@ -17,9 +17,6 @@
 
 package org.apache.fluss.flink.source;
 
-import org.apache.fluss.client.Connection;
-import org.apache.fluss.client.ConnectionFactory;
-import org.apache.fluss.client.table.Table;
 import org.apache.fluss.config.Configuration;
 import org.apache.fluss.flink.source.deserializer.DeserializerInitContextImpl;
 import org.apache.fluss.flink.source.deserializer.FlussDeserializationSchema;
@@ -35,9 +32,6 @@
 import org.apache.fluss.flink.source.state.SourceEnumeratorState;
 import org.apache.fluss.lake.source.LakeSource;
 import org.apache.fluss.lake.source.LakeSplit;
-import org.apache.fluss.metadata.Schema;
-import org.apache.fluss.metadata.SchemaInfo;
-import org.apache.fluss.metadata.TableInfo;
 import org.apache.fluss.metadata.TablePath;
 import org.apache.fluss.predicate.Predicate;
 import org.apache.fluss.types.RowType;
@@ -56,8 +50,6 @@
 
 import javax.annotation.Nullable;
 
-import java.util.List;
-
 /** Flink source for Fluss. */
 public class FlinkSource<OUT>
         implements Source<OUT, SourceSplitBase, SourceEnumeratorState>, ResultTypeQueryable {
@@ -184,14 +176,6 @@ public SourceReader<OUT, SourceSplitBase> createReader(SourceReaderContext conte
         FlinkSourceReaderMetrics flinkSourceReaderMetrics =
                 new FlinkSourceReaderMetrics(context.metricGroup());
 
-        TableInfo tableInfo;
-        try (Connection connection = ConnectionFactory.createConnection(flussConf);
-                Table table = connection.getTable(tablePath)) {
-            tableInfo = table.getTableInfo();
-        }
-
-        Schema schema = tableInfo.getSchema();
-
         deserializationSchema.open(
                 new DeserializerInitContextImpl(
                         context.metricGroup().addGroup("deserializer"),
@@ -200,16 +184,12 @@ public SourceReader<OUT, SourceSplitBase> createReader(SourceReaderContext conte
         FlinkRecordEmitter<OUT> recordEmitter = new FlinkRecordEmitter<>(deserializationSchema);
         // recall to projectedFields
 
-        int[] projectedFields = reCalculateProjectedFields(sourceOutputType, schema.getRowType());
-
         return new FlinkSourceReader<>(
                 elementsQueue,
                 flussConf,
                 tablePath,
                 sourceOutputType,
-                new SchemaInfo(schema, tableInfo.getSchemaId()),
                 context,
-                projectedFields,
                 flinkSourceReaderMetrics,
                 recordEmitter,
                 lakeSource);
@@ -219,31 +199,4 @@ public SourceReader<OUT, SourceSplitBase> createReader(SourceReaderContext conte
     public TypeInformation<OUT> getProducedType() {
         return deserializationSchema.getProducedType(sourceOutputType);
     }
-
-    /**
-     * The projected fields for the fluss table from the source output types. Mapping based on
-     * column name rather thn column id.
-     *
-     * @return
-     */
-    private static int[] reCalculateProjectedFields(
-            RowType sourceOutputType, RowType flussRowType) {
-        if (sourceOutputType.copy(false).equals(flussRowType.copy(false))) {
-            return null;
-        }
-
-        List<String> fieldNames = sourceOutputType.getFieldNames();
-        int[] projectedFlussFields = new int[fieldNames.size()];
-        for (int i = 0; i < fieldNames.size(); i++) {
-            int fieldIndex = flussRowType.getFieldIndex(fieldNames.get(i));
-            if (fieldIndex == -1) {
-                throw new IllegalArgumentException(
-                        String.format(
-                                "The field %s is not found in the fluss table.",
-                                fieldNames.get(i)));
-            }
-            projectedFlussFields[i] = fieldIndex;
-        }
-        return projectedFlussFields;
-    }
 }
diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/reader/FlinkSourceReader.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/reader/FlinkSourceReader.java
@@ -31,7 +31,6 @@
 import org.apache.fluss.flink.source.split.SourceSplitState;
 import org.apache.fluss.lake.source.LakeSource;
 import org.apache.fluss.lake.source.LakeSplit;
-import org.apache.fluss.metadata.SchemaInfo;
 import org.apache.fluss.metadata.TableBucket;
 import org.apache.fluss.metadata.TablePath;
 import org.apache.fluss.types.RowType;
@@ -41,8 +40,6 @@
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
 
-import javax.annotation.Nullable;
-
 import java.util.Map;
 import java.util.Set;
 import java.util.function.Consumer;
@@ -57,9 +54,7 @@ public FlinkSourceReader(
             Configuration flussConfig,
             TablePath tablePath,
             RowType sourceOutputType,
-            SchemaInfo schemaInfo,
             SourceReaderContext context,
-            @Nullable int[] projectedFields,
             FlinkSourceReaderMetrics flinkSourceReaderMetrics,
             FlinkRecordEmitter<OUT> recordEmitter,
             LakeSource<LakeSplit> lakeSource) {
@@ -72,8 +67,6 @@ public FlinkSourceReader(
                                         flussConfig,
                                         tablePath,
                                         sourceOutputType,
-                                        schemaInfo,
-                                        projectedFields,
                                         flinkSourceReaderMetrics,
                                         lakeSource),
                         (ignore) -> {}),
diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/reader/FlinkSourceSplitReader.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/reader/FlinkSourceSplitReader.java
@@ -17,6 +17,7 @@
 
 package org.apache.fluss.flink.source.reader;
 
+import org.apache.fluss.annotation.VisibleForTesting;
 import org.apache.fluss.client.Connection;
 import org.apache.fluss.client.ConnectionFactory;
 import org.apache.fluss.client.table.Table;
@@ -36,7 +37,6 @@
 import org.apache.fluss.flink.source.split.SourceSplitBase;
 import org.apache.fluss.lake.source.LakeSource;
 import org.apache.fluss.lake.source.LakeSplit;
-import org.apache.fluss.metadata.SchemaInfo;
 import org.apache.fluss.metadata.TableBucket;
 import org.apache.fluss.metadata.TablePath;
 import org.apache.fluss.types.RowType;
@@ -58,7 +58,6 @@
 import java.time.Duration;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -120,21 +119,18 @@ public FlinkSourceSplitReader(
             Configuration flussConf,
             TablePath tablePath,
             RowType sourceOutputType,
-            SchemaInfo schemaInfo,
-            @Nullable int[] projectedFields,
             FlinkSourceReaderMetrics flinkSourceReaderMetrics,
             @Nullable LakeSource<LakeSplit> lakeSource) {
         this.flinkMetricRegistry =
                 new FlinkMetricRegistry(flinkSourceReaderMetrics.getSourceReaderMetricGroup());
         this.connection = ConnectionFactory.createConnection(flussConf, flinkMetricRegistry);
-        this.table = connection.getTable(tablePath, schemaInfo);
+        this.table = connection.getTable(tablePath);
         this.sourceOutputType = sourceOutputType;
         this.boundedSplits = new ArrayDeque<>();
         this.subscribedBuckets = new HashMap<>();
         this.flinkSourceReaderMetrics = flinkSourceReaderMetrics;
-
-        sanityCheck(table.getTableInfo().getRowType(), projectedFields);
-        this.projectedFields = projectedFields;
+        this.projectedFields =
+                reCalculateProjectedFields(sourceOutputType, table.getTableInfo().getRowType());
         this.logScanner = table.newScan().project(projectedFields).createLogScanner();
         this.stoppingOffsets = new HashMap<>();
         this.emptyLogSplits = new HashSet<>();
@@ -568,35 +564,33 @@ public void close() throws Exception {
         flinkMetricRegistry.close();
     }
 
-    private void sanityCheck(RowType flussTableRowType, @Nullable int[] projectedFields) {
-        RowType tableRowType =
-                projectedFields != null
-                        ? flussTableRowType.project(projectedFields)
-                        : flussTableRowType;
-        if (!sourceOutputType.copy(false).equals(tableRowType.copy(false))) {
-            // The default nullability of Flink row type and Fluss row type might be not the same,
-            // thus we need to compare the row type without nullability here.
-
-            final String flussSchemaMsg;
-            if (projectedFields == null) {
-                flussSchemaMsg = "\nFluss table schema: " + tableRowType;
-            } else {
-                flussSchemaMsg =
-                        "\nFluss table schema: "
-                                + tableRowType
-                                + " (projection "
-                                + Arrays.toString(projectedFields)
-                                + ")";
+    /**
+     * The projected fields for the fluss table from the source output types. Mapping based on
+     * column name rather thn column id.
+     */
+    private static int[] reCalculateProjectedFields(
+            RowType sourceOutputType, RowType flussRowType) {
+        if (sourceOutputType.copy(false).equals(flussRowType.copy(false))) {
+            return null;
+        }
+
+        List<String> fieldNames = sourceOutputType.getFieldNames();
+        int[] projectedFlussFields = new int[fieldNames.size()];
+        for (int i = 0; i < fieldNames.size(); i++) {
+            int fieldIndex = flussRowType.getFieldIndex(fieldNames.get(i));
+            if (fieldIndex == -1) {
+                throw new ValidationException(
+                        String.format(
+                                "The field %s is not found in the fluss table.",
+                                fieldNames.get(i)));
             }
-            // Throw exception if the schema is the not same, this should rarely happen because we
-            // only allow fluss tables derived from fluss catalog. But this can happen if an ALTER
-            // TABLE command executed on the fluss table, after the job is submitted but before the
-            // SinkFunction is opened.
-            throw new ValidationException(
-                    "The Flink query schema is not matched to Fluss table schema. "
-                            + "\nFlink query schema: "
-                            + sourceOutputType
-                            + flussSchemaMsg);
+            projectedFlussFields[i] = fieldIndex;
         }
+        return projectedFlussFields;
+    }
+
+    @VisibleForTesting
+    public int[] getProjectedFields() {
+        return projectedFields;
     }
 }
diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/source/reader/FlinkSourceReaderTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/source/reader/FlinkSourceReaderTest.java
@@ -26,8 +26,6 @@
 import org.apache.fluss.flink.source.metrics.FlinkSourceReaderMetrics;
 import org.apache.fluss.flink.source.split.LogSplit;
 import org.apache.fluss.flink.utils.FlinkTestBase;
-import org.apache.fluss.metadata.Schema;
-import org.apache.fluss.metadata.SchemaInfo;
 import org.apache.fluss.metadata.TableBucket;
 import org.apache.fluss.metadata.TableDescriptor;
 import org.apache.fluss.metadata.TablePath;
@@ -179,9 +177,7 @@ private FlinkSourceReader createReader(
                 flussConf,
                 tablePath,
                 sourceOutputType,
-                new SchemaInfo(Schema.newBuilder().fromRowType(sourceOutputType).build(), 1),
                 context,
-                null,
                 new FlinkSourceReaderMetrics(context.metricGroup()),
                 recordEmitter,
                 null);
diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/source/reader/FlinkSourceSplitReaderTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/source/reader/FlinkSourceSplitReaderTest.java