diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index 0cec849579a6..13a4f4695bdb 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -169,6 +169,13 @@ test test-jar + + org.apache.hudi + hudi-hadoop-common + ${project.version} + test + test-jar + org.apache.hudi hudi-client-common diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml index 3a21bb3b41c5..6e47816f62ac 100644 --- a/hudi-client/hudi-client-common/pom.xml +++ b/hudi-client/hudi-client-common/pom.xml @@ -43,6 +43,16 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-io + ${project.version} + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + org.apache.hudi hudi-aws @@ -111,6 +121,21 @@ test-jar test + + org.apache.hudi + hudi-io + ${project.version} + tests + test + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-tests-common diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java index 06f959758e0a..524901dd9bb0 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java @@ -67,8 +67,8 @@ import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.exception.HoodieUpsertException; import org.apache.hudi.exception.SchemaCompatibilityException; -import org.apache.hudi.hadoop.fs.ConsistencyGuard; -import org.apache.hudi.hadoop.fs.ConsistencyGuard.FileVisibility; +import org.apache.hudi.common.fs.ConsistencyGuard; +import org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.io.HoodieMergeHandle; import org.apache.hudi.metadata.HoodieTableMetadata; diff --git a/hudi-client/hudi-flink-client/pom.xml b/hudi-client/hudi-flink-client/pom.xml index d774078d5d68..8f25e3cfd8c7 100644 --- a/hudi-client/hudi-flink-client/pom.xml +++ b/hudi-client/hudi-flink-client/pom.xml @@ -55,6 +55,11 @@ hudi-client-common ${project.parent.version} + + org.apache.hudi + hudi-hadoop-common + ${project.version} + @@ -132,6 +137,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-hadoop-mr diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml index 3fb62e2fa504..c6b36bd6bed6 100644 --- a/hudi-client/hudi-java-client/pom.xml +++ b/hudi-client/hudi-java-client/pom.xml @@ -37,6 +37,11 @@ + + org.apache.hudi + hudi-hadoop-common + ${project.version} + org.apache.hudi hudi-client-common @@ -64,6 +69,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-client-common diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml index 4e7361d651a8..13e3e825f241 100644 --- a/hudi-client/hudi-spark-client/pom.xml +++ b/hudi-client/hudi-spark-client/pom.xml @@ -55,6 +55,18 @@ ${project.parent.version} + + org.apache.hudi + hudi-io + ${project.version} + + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + + org.apache.spark @@ -90,6 +102,22 @@ test-jar test + + org.apache.hudi + hudi-io + ${project.version} + tests + test-jar + test + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-client-common diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java index 1bb4b9ff70e3..2d29e936d158 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java @@ -32,6 +32,7 @@ import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.keygen.constant.KeyGeneratorOptions; +import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.testutils.HoodieClientTestUtils; import org.apache.hudi.testutils.HoodieSparkClientTestHarness; @@ -133,7 +134,7 @@ public void readLocalWriteHDFS() throws Exception { // Read from hdfs FileSystem fs = HadoopFSUtils.getFs(dfsBasePath, HoodieTestUtils.getDefaultStorageConf()); - HoodieTableMetaClient metaClient = HoodieTestUtils.createMetaClient(fs.getConf(), dfsBasePath); + HoodieTableMetaClient metaClient = HoodieTestUtils.createMetaClient(HoodieStorageUtils.getStorageConf(fs.getConf()), dfsBasePath); HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); Dataset readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime); assertEquals(readRecords.count(), records.size()); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java index 072b88b1f6c6..9461e7819009 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java @@ -23,7 +23,7 @@ import org.apache.hudi.common.fs.OptimisticConsistencyGuard; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.testutils.FileCreateUtils; -import org.apache.hudi.hadoop.fs.ConsistencyGuard; +import org.apache.hudi.common.fs.ConsistencyGuard; import org.apache.hudi.storage.StoragePath; import org.apache.hudi.testutils.HoodieSparkClientTestHarness; diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestDirectWriteMarkers.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestDirectWriteMarkers.java index ac80e61db282..ec503a8774c6 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestDirectWriteMarkers.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestDirectWriteMarkers.java @@ -19,7 +19,7 @@ package org.apache.hudi.table.marker; import org.apache.hudi.client.common.HoodieSparkEngineContext; -import org.apache.hudi.common.testutils.FileSystemTestUtils; +import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; @@ -61,7 +61,7 @@ public void cleanup() { @Override void verifyMarkersInFileSystem(boolean isTablePartitioned) throws IOException { - List markerFiles = FileSystemTestUtils.listRecursive(storage, markerFolderPath) + List markerFiles = HoodieTestTable.listRecursive(storage, markerFolderPath) .stream().filter(status -> status.getPath().getName().contains(".marker")) .sorted().collect(Collectors.toList()); assertEquals(3, markerFiles.size()); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java index 882bfb2f8f81..5121a05ba907 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java @@ -21,8 +21,8 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.IOType; -import org.apache.hudi.common.testutils.FileSystemTestUtils; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; +import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.MarkerUtils; import org.apache.hudi.exception.HoodieException; @@ -107,7 +107,7 @@ public void testDataPathsWhenCreatingOrMerging(boolean isTablePartitioned) throw createSomeMarkers(isTablePartitioned); // add invalid file createInvalidFile(isTablePartitioned ? "2020/06/01" : "", "invalid_file3"); - long fileSize = FileSystemTestUtils.listRecursive(storage, markerFolderPath).stream() + long fileSize = HoodieTestTable.listRecursive(storage, markerFolderPath).stream() .filter(fileStatus -> !fileStatus.getPath().getName().contains(MarkerUtils.MARKER_TYPE_FILENAME)) .count(); assertEquals(fileSize, 4); diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index d4a82e859c5b..1b1f2f15a2a4 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -109,12 +109,6 @@ ${project.version} - - org.apache.hudi - hudi-hadoop-common - ${project.version} - - org.openjdk.jol jol-core diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java index 40b894de8802..f04cd7ed10ce 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java @@ -240,7 +240,7 @@ private static HFileReader createReader(String hFilePath, HoodieStorage storage) LOG.info("Opening HFile for reading :" + hFilePath); StoragePath path = new StoragePath(hFilePath); long fileSize = storage.getPathInfo(path).getLength(); - SeekableDataInputStream stream = storage.openSeekable(path); + SeekableDataInputStream stream = storage.openSeekable(path, true); return new HFileReaderImpl(stream, fileSize); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java index 85d00ecb18d7..a77ca8466921 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java @@ -33,6 +33,7 @@ import java.util.Properties; import static org.apache.hudi.common.util.ConfigUtils.getRawValueWithAltKeys; +import static org.apache.hudi.common.util.ConfigUtils.loadGlobalProperties; /** * This class deals with {@link ConfigProperty} and provides get/set functionalities. @@ -238,7 +239,7 @@ public TypedProperties getProps() { public TypedProperties getProps(boolean includeGlobalProps) { if (includeGlobalProps) { - TypedProperties mergedProps = DFSPropertiesConfiguration.getGlobalProps(); + TypedProperties mergedProps = loadGlobalProperties(); mergedProps.putAll(props); return mergedProps; } else { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/PropertiesConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/PropertiesConfig.java new file mode 100644 index 000000000000..6edbe5bb5609 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/PropertiesConfig.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.config; + +/** + * Used for loading filesystem specific configs + */ +public abstract class PropertiesConfig { + /** + * return any global properties for the filesystem + */ + public TypedProperties getGlobalProperties() { + return new TypedProperties(); + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java index 4160e099d44d..5ad9753489f2 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java @@ -34,17 +34,13 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieValidationException; import org.apache.hudi.exception.InvalidHoodiePathException; -import org.apache.hudi.hadoop.fs.CachingPath; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; -import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; -import org.apache.hudi.hadoop.fs.inline.InLineFSUtils; -import org.apache.hudi.hadoop.fs.inline.InLineFileSystem; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; import org.apache.hudi.storage.StoragePathFilter; import org.apache.hudi.storage.StoragePathInfo; +import org.apache.hudi.storage.inline.InLineFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -75,6 +71,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.storage.HoodieStorageUtils.getStorageConfWithCopy; + /** * Utility functions related to accessing the file storage. */ @@ -93,20 +91,6 @@ public class FSUtils { private static final StoragePathFilter ALLOW_ALL_FILTER = file -> true; - public static Configuration buildInlineConf(Configuration conf) { - Configuration inlineConf = new Configuration(conf); - inlineConf.set("fs." + InLineFileSystem.SCHEME + ".impl", InLineFileSystem.class.getName()); - inlineConf.setClassLoader(InLineFileSystem.class.getClassLoader()); - return inlineConf; - } - - public static StorageConfiguration buildInlineConf(StorageConfiguration storageConf) { - StorageConfiguration inlineConf = storageConf.newInstance(); - inlineConf.set("fs." + InLineFileSystem.SCHEME + ".impl", InLineFileSystem.class.getName()); - (inlineConf.unwrapAs(Configuration.class)).setClassLoader(InLineFileSystem.class.getClassLoader()); - return inlineConf; - } - /** * Check if table already exists in the given path. * @@ -209,21 +193,7 @@ public static List getAllPartitionFoldersThreeLevelsDown(HoodieStorage s * Given a base partition and a partition path, return relative path of partition path to the base path. */ public static String getRelativePartitionPath(Path basePath, Path fullPartitionPath) { - basePath = CachingPath.getPathWithoutSchemeAndAuthority(basePath); - fullPartitionPath = CachingPath.getPathWithoutSchemeAndAuthority(fullPartitionPath); - - String fullPartitionPathStr = fullPartitionPath.toString(); - - if (!fullPartitionPathStr.startsWith(basePath.toString())) { - throw new IllegalArgumentException("Partition path \"" + fullPartitionPathStr - + "\" does not belong to base-path \"" + basePath + "\""); - } - - int partitionStartIndex = fullPartitionPathStr.indexOf(basePath.getName(), - basePath.getParent() == null ? 0 : basePath.getParent().toString().length()); - // Partition-Path could be empty for non-partitioned tables - return partitionStartIndex + basePath.getName().length() == fullPartitionPathStr.length() ? "" - : fullPartitionPathStr.substring(partitionStartIndex + basePath.getName().length() + 1); + return getRelativePartitionPath(new StoragePath(basePath.toUri()), new StoragePath(fullPartitionPath.toUri())); } public static String getRelativePartitionPath(StoragePath basePath, StoragePath fullPartitionPath) { @@ -489,14 +459,12 @@ public static boolean isBaseFile(StoragePath path) { public static boolean isLogFile(StoragePath logPath) { String scheme = logPath.toUri().getScheme(); - return isLogFile(InLineFileSystem.SCHEME.equals(scheme) + return isLogFile(InLineFSUtils.SCHEME.equals(scheme) ? InLineFSUtils.getOuterFilePathFromInlinePath(logPath).getName() : logPath.getName()); } public static boolean isLogFile(Path logPath) { - String scheme = logPath.toUri().getScheme(); - return isLogFile(InLineFileSystem.SCHEME.equals(scheme) - ? InLineFSUtils.getOuterFilePathFromInlinePath(logPath).getName() : logPath.getName()); + return isLogFile(new StoragePath(logPath.getName())); } public static boolean isLogFile(String fileName) { @@ -635,16 +603,7 @@ public static Long getSizeInMB(long sizeInBytes) { } public static Path constructAbsolutePathInHadoopPath(String basePath, String relativePartitionPath) { - if (StringUtils.isNullOrEmpty(relativePartitionPath)) { - return new Path(basePath); - } - - // NOTE: We have to chop leading "/" to make sure Hadoop does not treat it like - // absolute path - String properPartitionPath = relativePartitionPath.startsWith(PATH_SEPARATOR) - ? relativePartitionPath.substring(1) - : relativePartitionPath; - return constructAbsolutePath(new CachingPath(basePath), properPartitionPath); + return new Path(constructAbsolutePath(basePath, relativePartitionPath).toUri()); } public static StoragePath constructAbsolutePath(String basePath, String relativePartitionPath) { @@ -660,11 +619,6 @@ public static StoragePath constructAbsolutePath(String basePath, String relative return constructAbsolutePath(new StoragePath(basePath), properPartitionPath); } - public static Path constructAbsolutePath(Path basePath, String relativePartitionPath) { - // For non-partitioned table, return only base-path - return StringUtils.isNullOrEmpty(relativePartitionPath) ? basePath : new CachingPath(basePath, relativePartitionPath); - } - public static StoragePath constructAbsolutePath(StoragePath basePath, String relativePartitionPath) { // For non-partitioned table, return only base-path return StringUtils.isNullOrEmpty(relativePartitionPath) ? basePath : new StoragePath(basePath, relativePartitionPath); @@ -695,14 +649,6 @@ public static String getDFSFullPartitionPath(FileSystem fs, Path fullPartitionPa return fs.getUri() + fullPartitionPath.toUri().getRawPath(); } - public static Configuration registerFileSystem(StoragePath file, Configuration conf) { - Configuration returnConf = new Configuration(conf); - String scheme = HadoopFSUtils.getFs(file.toString(), conf).getScheme(); - returnConf.set("fs." + HoodieWrapperFileSystem.getHoodieScheme(scheme) + ".impl", - HoodieWrapperFileSystem.class.getName()); - return returnConf; - } - /** * Helper to filter out paths under metadata folder when running fs.globStatus. * @@ -756,7 +702,7 @@ public static Map parallelizeFilesProcess( List subPaths) { Map result = new HashMap<>(); if (subPaths.size() > 0) { - StorageConfiguration conf = HadoopFSUtils.getStorageConfWithCopy(fs.getConf()); + StorageConfiguration conf = getStorageConfWithCopy(fs.getConf()); int actualParallelism = Math.min(subPaths.size(), parallelism); hoodieEngineContext.setJobStatus(FSUtils.class.getSimpleName(), diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FailSafeConsistencyGuard.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FailSafeConsistencyGuard.java index decd1099daca..f873d8d22a54 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FailSafeConsistencyGuard.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FailSafeConsistencyGuard.java @@ -19,7 +19,6 @@ package org.apache.hudi.common.fs; import org.apache.hudi.common.util.ValidationUtils; -import org.apache.hudi.hadoop.fs.ConsistencyGuard; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.StoragePath; import org.apache.hudi.storage.StoragePathInfo; diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/NoOpConsistencyGuard.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/NoOpConsistencyGuard.java similarity index 97% rename from hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/NoOpConsistencyGuard.java rename to hudi-common/src/main/java/org/apache/hudi/common/fs/NoOpConsistencyGuard.java index 1f8401a0b881..f47a180b8569 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/NoOpConsistencyGuard.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/NoOpConsistencyGuard.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hudi.hadoop.fs; +package org.apache.hudi.common.fs; import org.apache.hudi.storage.StoragePath; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/OptimisticConsistencyGuard.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/OptimisticConsistencyGuard.java index 8e0f9a0dc41a..dfe7047999f1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/OptimisticConsistencyGuard.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/OptimisticConsistencyGuard.java @@ -18,7 +18,6 @@ package org.apache.hudi.common.fs; -import org.apache.hudi.hadoop.fs.ConsistencyGuard; import org.apache.hudi.storage.StoragePath; import org.apache.hudi.storage.HoodieStorage; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java index aa288adece8e..38bf3e43d45d 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java @@ -22,9 +22,11 @@ import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.HoodieMetaserverConfig; import org.apache.hudi.common.config.HoodieTimeGeneratorConfig; +import org.apache.hudi.common.fs.ConsistencyGuard; import org.apache.hudi.common.fs.ConsistencyGuardConfig; import org.apache.hudi.common.fs.FailSafeConsistencyGuard; import org.apache.hudi.common.fs.FileSystemRetryConfig; +import org.apache.hudi.common.fs.NoOpConsistencyGuard; import org.apache.hudi.common.model.BootstrapIndexType; import org.apache.hudi.common.model.HoodieFunctionalIndexDefinition; import org.apache.hudi.common.model.HoodieFunctionalIndexMetadata; @@ -49,8 +51,6 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.TableNotFoundException; -import org.apache.hudi.hadoop.fs.ConsistencyGuard; -import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hudi.keygen.constant.KeyGeneratorType; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.storage.HoodieStorage; @@ -79,7 +79,6 @@ import static org.apache.hudi.common.util.ConfigUtils.containsConfigProperty; import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; -import static org.apache.hudi.hadoop.fs.HadoopFSUtils.getStorageWithWrapperFS; /** * HoodieTableMetaClient allows to access meta-data about a hoodie table It returns meta-data about @@ -390,7 +389,7 @@ public HoodieStorage getStorage() { consistencyGuardConfig) : new NoOpConsistencyGuard(); - storage = getStorageWithWrapperFS( + storage = HoodieStorageUtils.getStorage( metaPath, getStorageConf(), fileSystemRetryConfig.isFileSystemActionRetryEnable(), diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java index 3294f7b8d8c9..8b24367bd8ad 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java @@ -474,7 +474,7 @@ public static SeekableDataInputStream getDataInputStream(HoodieStorage storage, HoodieLogFile logFile, int bufferSize) { try { - return storage.openSeekable(logFile.getPath(), bufferSize); + return storage.openSeekable(logFile.getPath(), bufferSize, true); } catch (IOException e) { throw new HoodieIOException("Unable to get seekable input stream for " + logFile, e); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java index efb3b5fa55f8..a379e305d0eb 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java @@ -22,7 +22,6 @@ import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.engine.HoodieReaderContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; @@ -30,7 +29,6 @@ import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.CloseableMappingIterator; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.fs.inline.InLineFSUtils; import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; import org.apache.hudi.io.storage.HoodieFileReader; @@ -40,6 +38,7 @@ import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; +import org.apache.hudi.storage.inline.InLineFSUtils; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; @@ -189,8 +188,7 @@ protected byte[] serializeRecords(List records) throws IOException protected ClosableIterator> deserializeRecords(byte[] content, HoodieRecordType type) throws IOException { checkState(readerSchema != null, "Reader's schema has to be non-null"); - StorageConfiguration storageConf = - FSUtils.buildInlineConf(getBlockContentLocation().get().getStorageConf()); + StorageConfiguration storageConf = getBlockContentLocation().get().getStorageConf().getInline(); HoodieStorage storage = HoodieStorageUtils.getStorage(pathForReader, storageConf); // Read the content try (HoodieFileReader reader = @@ -206,8 +204,7 @@ protected ClosableIterator> deserializeRecords(byte[] conten protected ClosableIterator deserializeRecords(HoodieReaderContext readerContext, byte[] content) throws IOException { checkState(readerSchema != null, "Reader's schema has to be non-null"); - StorageConfiguration storageConf = - FSUtils.buildInlineConf(getBlockContentLocation().get().getStorageConf()); + StorageConfiguration storageConf = getBlockContentLocation().get().getStorageConf().getInline(); HoodieStorage storage = HoodieStorageUtils.getStorage(pathForReader, storageConf); // Read the content try (HoodieAvroHFileReaderImplBase reader = (HoodieAvroHFileReaderImplBase) @@ -225,7 +222,7 @@ protected ClosableIterator> lookupRecords(List sorte // NOTE: It's important to extend Hadoop configuration here to make sure configuration // is appropriately carried over - StorageConfiguration inlineConf = FSUtils.buildInlineConf(blockContentLoc.getStorageConf()); + StorageConfiguration inlineConf = blockContentLoc.getStorageConf().getInline(); StoragePath inlinePath = InLineFSUtils.getInlineFilePath( blockContentLoc.getLogFile().getPath(), diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java index 4d65839b2eb1..4d7f3f838f29 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java @@ -20,20 +20,19 @@ import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.engine.HoodieReaderContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; -import org.apache.hudi.hadoop.fs.inline.InLineFSUtils; import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hudi.io.storage.HoodieFileReaderFactory; import org.apache.hudi.io.storage.HoodieFileWriter; import org.apache.hudi.io.storage.HoodieFileWriterFactory; +import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; +import org.apache.hudi.storage.inline.InLineFSUtils; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; @@ -123,7 +122,7 @@ protected byte[] serializeRecords(List records) throws IOException parquetWriter = HoodieFileWriterFactory.getFileWriter( HoodieFileFormat.PARQUET, outputStream, - HadoopFSUtils.getStorageConf(new Configuration()), + HoodieStorageUtils.getStorageConf(new Configuration()), config, writerSchema, recordType); @@ -153,7 +152,7 @@ protected ClosableIterator> readRecordsFromBlockPayload(Hood // NOTE: It's important to extend Hadoop configuration here to make sure configuration // is appropriately carried over - StorageConfiguration inlineConf = FSUtils.buildInlineConf(blockContentLoc.getStorageConf()); + StorageConfiguration inlineConf = blockContentLoc.getStorageConf().getInline(); StoragePath inlineLogFilePath = InLineFSUtils.getInlineFilePath( blockContentLoc.getLogFile().getPath(), @@ -175,7 +174,7 @@ protected ClosableIterator readRecordsFromBlockPayload(HoodieReaderContex // NOTE: It's important to extend Hadoop configuration here to make sure configuration // is appropriately carried over - StorageConfiguration inlineConf = FSUtils.buildInlineConf(blockContentLoc.getStorageConf()); + StorageConfiguration inlineConf = blockContentLoc.getStorageConf().getInline(); StoragePath inlineLogFilePath = InLineFSUtils.getInlineFilePath( blockContentLoc.getLogFile().getPath(), diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java index 2a4bb17f66e5..c22b7f6a087f 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java @@ -51,12 +51,13 @@ * Utils for Hudi base file. */ public abstract class BaseFileUtils { + public static final String ORC_UTILS = "org.apache.hudi.common.util.OrcUtils"; public static BaseFileUtils getInstance(String path) { if (path.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) { return new ParquetUtils(); } else if (path.endsWith(HoodieFileFormat.ORC.getFileExtension())) { - return new OrcUtils(); + return ReflectionUtils.loadClass(ORC_UTILS); } throw new UnsupportedOperationException("The format for file " + path + " is not supported yet."); } @@ -65,7 +66,7 @@ public static BaseFileUtils getInstance(HoodieFileFormat fileFormat) { if (HoodieFileFormat.PARQUET.equals(fileFormat)) { return new ParquetUtils(); } else if (HoodieFileFormat.ORC.equals(fileFormat)) { - return new OrcUtils(); + return ReflectionUtils.loadClass(ORC_UTILS); } throw new UnsupportedOperationException(fileFormat.name() + " format not supported yet."); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ConfigUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ConfigUtils.java index 3eeee52bd9ad..fdaa1f140163 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/ConfigUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ConfigUtils.java @@ -20,6 +20,7 @@ import org.apache.hudi.common.config.ConfigProperty; import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.config.PropertiesConfig; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.model.HoodiePayloadProps; import org.apache.hudi.common.model.RecordPayloadType; @@ -627,4 +628,8 @@ public static HoodieConfig getReaderConfigs(StorageConfiguration storageConf) Boolean.toString(storageConf.getBoolean(USE_NATIVE_HFILE_READER.key(), USE_NATIVE_HFILE_READER.defaultValue()))); return config; } + + public static TypedProperties loadGlobalProperties() { + return ((PropertiesConfig) ReflectionUtils.loadClass("org.apache.hudi.common.config.DFSPropertiesConfiguration")).getGlobalProperties(); + } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java index 9e2024a1d839..1c8966545102 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java @@ -28,9 +28,9 @@ import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.MetadataNotFoundException; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.BaseKeyGenerator; import org.apache.hudi.storage.HoodieStorage; +import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; @@ -100,7 +100,8 @@ public static ParquetMetadata readMetadata(StorageConfiguration conf, Storage ParquetMetadata footer; try { // TODO(vc): Should we use the parallel reading version here? - footer = ParquetFileReader.readFooter(HadoopFSUtils.getFs(parquetFileHadoopPath.toString(), conf).getConf(), parquetFileHadoopPath); + footer = ParquetFileReader.readFooter(HoodieStorageUtils.getStorage( + parquetFileHadoopPath.toString(), conf).getConf().unwrapAs(Configuration.class), parquetFileHadoopPath); } catch (IOException e) { throw new HoodieIOException("Failed to read footer for parquet " + parquetFileHadoopPath, e); } @@ -125,7 +126,7 @@ private static Set> filterParquetRowKeys(StorageConfiguration filterFunction = Option.of(new RecordKeysFilterFunction(filter)); } Configuration conf = configuration.unwrapCopyAs(Configuration.class); - conf.addResource(HadoopFSUtils.getFs(filePath.toString(), conf).getConf()); + conf.addResource(HoodieStorageUtils.getStorage(filePath.toString(), configuration).getConf().unwrapAs(Configuration.class)); AvroReadSupport.setAvroReadSchema(conf, readSchema); AvroReadSupport.setRequestedProjection(conf, readSchema); Set> rowKeys = new HashSet<>(); @@ -179,7 +180,7 @@ public ClosableIterator getHoodieKeyIterator(StorageConfiguration public ClosableIterator getHoodieKeyIterator(StorageConfiguration configuration, StoragePath filePath, Option keyGeneratorOpt) { try { Configuration conf = configuration.unwrapCopyAs(Configuration.class); - conf.addResource(HadoopFSUtils.getFs(filePath.toString(), conf).getConf()); + conf.addResource(HoodieStorageUtils.getStorage(filePath.toString(), configuration).getConf().unwrapAs(Configuration.class)); Schema readSchema = keyGeneratorOpt .map(keyGenerator -> { List fields = new ArrayList<>(); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java index 9b137ce5d9d1..2a727158e178 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.ReflectionUtils; +import org.apache.hudi.exception.HoodieException; import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; @@ -47,6 +48,11 @@ import static org.apache.hudi.io.storage.HoodieHFileConfig.PREFETCH_ON_OPEN; public class HoodieAvroFileWriterFactory extends HoodieFileWriterFactory { + //hardcoded classes to remove at a later time + public static final String HOODIE_AVRO_PARQUET_WRITER = "org.apache.hudi.io.storage.HoodieAvroParquetWriter"; + public static final String HOODIE_AVRO_HFILE_WRITER = "org.apache.hudi.io.storage.HoodieAvroHFileWriter"; + public static final String HOODIE_AVRO_ORC_WRITER = "org.apache.hudi.io.storage.HoodieAvroOrcWriter"; + @Override protected HoodieFileWriter newParquetFileWriter( String instantTime, StoragePath path, StorageConfiguration conf, HoodieConfig config, Schema schema, @@ -66,7 +72,14 @@ protected HoodieFileWriter newParquetFileWriter( config.getLongOrDefault(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE), conf.unwrapAs(Configuration.class), config.getDoubleOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION), config.getBooleanOrDefault(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED)); - return new HoodieAvroParquetWriter(path, parquetConfig, instantTime, taskContextSupplier, populateMetaFields); + try { + return (HoodieFileWriter) ReflectionUtils.loadClass(HOODIE_AVRO_PARQUET_WRITER, + new Class[] {StoragePath.class, HoodieParquetConfig.class, String.class, TaskContextSupplier.class, boolean.class}, + path, parquetConfig, instantTime, taskContextSupplier, populateMetaFields); + } catch (HoodieException e) { + throw (IOException) e.getCause().getCause(); + } + } protected HoodieFileWriter newParquetFileWriter( @@ -94,7 +107,13 @@ protected HoodieFileWriter newHFileFileWriter( HoodieAvroHFileReaderImplBase.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION, filter, HFILE_COMPARATOR); - return new HoodieAvroHFileWriter(instantTime, path, hfileConfig, schema, taskContextSupplier, config.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS)); + try { + return (HoodieFileWriter) ReflectionUtils.loadClass(HOODIE_AVRO_HFILE_WRITER, + new Class[] {String.class, StoragePath.class, HoodieHFileConfig.class, Schema.class, TaskContextSupplier.class, boolean.class}, + instantTime, path, hfileConfig, schema, taskContextSupplier, config.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS)); + } catch (HoodieException e) { + throw (IOException) e.getCause().getCause(); + } } protected HoodieFileWriter newOrcFileWriter( @@ -106,7 +125,13 @@ protected HoodieFileWriter newOrcFileWriter( config.getInt(HoodieStorageConfig.ORC_STRIPE_SIZE), config.getInt(HoodieStorageConfig.ORC_BLOCK_SIZE), config.getLong(HoodieStorageConfig.ORC_FILE_MAX_SIZE), filter); - return new HoodieAvroOrcWriter(instantTime, path, orcConfig, schema, taskContextSupplier); + try { + return (HoodieFileWriter) ReflectionUtils.loadClass(HOODIE_AVRO_ORC_WRITER, + new Class[] {String.class, StoragePath.class, HoodieOrcConfig.class, Schema.class, TaskContextSupplier.class}, + instantTime, path, orcConfig, schema, taskContextSupplier); + } catch (HoodieException e) { + throw (IOException) e.getCause().getCause(); + } } private HoodieAvroWriteSupport getHoodieAvroWriteSupport(StorageConfiguration conf, Schema schema, diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java index e59b1635fcaa..fef595d7923c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java @@ -31,7 +31,6 @@ import org.apache.hudi.common.util.io.ByteBufferBackedInputStream; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.fs.HadoopSeekableDataInputStream; import org.apache.hudi.io.ByteArraySeekableDataInputStream; import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hudi.io.hfile.HFileReader; @@ -47,7 +46,6 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.fs.FSDataInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -265,7 +263,7 @@ private HFileReader newHFileReader() throws IOException { if (path.isPresent()) { HoodieStorage storage = HoodieStorageUtils.getStorage(path.get(), conf); fileSize = storage.getPathInfo(path.get()).getLength(); - inputStream = new HadoopSeekableDataInputStream((FSDataInputStream) storage.open(path.get())); + inputStream = storage.openSeekable(path.get(), false); } else { fileSize = bytesContent.get().length; inputStream = new ByteArraySeekableDataInputStream(new ByteBufferBackedInputStream(bytesContent.get())); diff --git a/hudi-common/src/main/java/org/apache/hudi/metrics/Metrics.java b/hudi-common/src/main/java/org/apache/hudi/metrics/Metrics.java index 33ae1b751992..af32248eea17 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metrics/Metrics.java +++ b/hudi-common/src/main/java/org/apache/hudi/metrics/Metrics.java @@ -23,7 +23,6 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.config.metrics.HoodieMetricsConfig; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; @@ -99,8 +98,7 @@ public static synchronized void shutdownAllMetrics() { private List addAdditionalMetricsExporters(HoodieMetricsConfig metricConfig) { List reporterList = new ArrayList<>(); List propPathList = StringUtils.split(metricConfig.getMetricReporterFileBasedConfigs(), ","); - try (HoodieStorage storage = HoodieStorageUtils.getStorage( - propPathList.get(0), HadoopFSUtils.getStorageConf(new Configuration()))) { + try (HoodieStorage storage = HoodieStorageUtils.getStorage(propPathList.get(0), new Configuration())) { for (String propPath : propPathList) { HoodieMetricsConfig secondarySourceConfig = HoodieMetricsConfig.newBuilder().fromInputStream( storage.open(new StoragePath(propPath))).withPath(metricConfig.getBasePath()).build(); diff --git a/hudi-common/src/main/java/org/apache/hudi/storage/HoodieStorageUtils.java b/hudi-common/src/main/java/org/apache/hudi/storage/HoodieStorageUtils.java index da6efc3e9253..64bcde90d71c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/storage/HoodieStorageUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/storage/HoodieStorageUtils.java @@ -19,14 +19,15 @@ package org.apache.hudi.storage; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; -import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; -import org.apache.hudi.storage.hadoop.HoodieHadoopStorage; +import org.apache.hudi.common.fs.ConsistencyGuard; +import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; public class HoodieStorageUtils { + public static final String HUDI_HADOOP_STORAGE = "org.apache.hudi.storage.hadoop.HoodieHadoopStorage"; + public static final String HADOOP_STORAGE_CONF = "org.apache.hudi.storage.hadoop.HadoopStorageConfiguration"; public static final String DEFAULT_URI = "file:///"; public static HoodieStorage getStorage(StorageConfiguration conf) { @@ -34,22 +35,47 @@ public static HoodieStorage getStorage(StorageConfiguration conf) { } public static HoodieStorage getStorage(FileSystem fs) { - return new HoodieHadoopStorage(fs); + return (HoodieStorage) ReflectionUtils.loadClass(HUDI_HADOOP_STORAGE, new Class[] {FileSystem.class}, fs); } public static HoodieStorage getStorage(String basePath, StorageConfiguration conf) { - return getStorage(HadoopFSUtils.getFs(basePath, conf)); + return (HoodieStorage) ReflectionUtils.loadClass(HUDI_HADOOP_STORAGE, new Class[] {String.class, StorageConfiguration.class}, basePath, conf); + } + + public static HoodieStorage getStorage(String basePath, Configuration conf) { + return (HoodieStorage) ReflectionUtils.loadClass(HUDI_HADOOP_STORAGE, new Class[] {String.class, Configuration.class}, basePath, conf); } public static HoodieStorage getStorage(StoragePath path, StorageConfiguration conf) { - return getStorage(HadoopFSUtils.getFs(path, conf.unwrapAs(Configuration.class))); + return (HoodieStorage) ReflectionUtils.loadClass(HUDI_HADOOP_STORAGE, new Class[] {StoragePath.class, StorageConfiguration.class}, path, conf); + } + + public static HoodieStorage getStorage(StoragePath path, + StorageConfiguration conf, + boolean enableRetry, + long maxRetryIntervalMs, + int maxRetryNumbers, + long initialRetryIntervalMs, + String retryExceptions, + ConsistencyGuard consistencyGuard) { + return (HoodieStorage) ReflectionUtils.loadClass(HUDI_HADOOP_STORAGE, + new Class[] {StoragePath.class, StorageConfiguration.class, boolean.class, long.class, int.class, long.class, + String.class, ConsistencyGuard.class}, + path, conf, enableRetry, maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptions, + consistencyGuard); } public static HoodieStorage getRawStorage(HoodieStorage storage) { - FileSystem fs = (FileSystem) storage.getFileSystem(); - if (fs instanceof HoodieWrapperFileSystem) { - return getStorage(((HoodieWrapperFileSystem) fs).getFileSystem()); - } - return storage; + return (HoodieStorage) ReflectionUtils.loadClass(HUDI_HADOOP_STORAGE, new Class[] {HoodieStorage.class}, storage); + } + + public static StorageConfiguration getStorageConf(Configuration conf) { + return (StorageConfiguration) ReflectionUtils.loadClass(HADOOP_STORAGE_CONF, + new Class[] {Configuration.class}, conf); + } + + public static StorageConfiguration getStorageConfWithCopy(Configuration conf) { + return (StorageConfiguration) ReflectionUtils.loadClass(HADOOP_STORAGE_CONF, + new Class[] {Configuration.class, boolean.class}, conf, true); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java index 459b7de30c32..4139f1fa3963 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java @@ -36,8 +36,8 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.storage.HoodieStorage; +import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; @@ -48,8 +48,6 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.orc.TypeDescription; import org.slf4j.Logger; @@ -578,8 +576,8 @@ private static void createMetadataFile(String f, String basePath, StorageConfigu basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + f); OutputStream os = null; try { - FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); - os = fs.create(commitFile, true); + HoodieStorage storage = HoodieStorageUtils.getStorage(basePath, configuration); + os = storage.create(new StoragePath(commitFile.toUri()), true); // Write empty commit metadata os.write(content); } catch (IOException ioe) { @@ -628,8 +626,8 @@ public static void createEmptyCleanRequestedFile(String basePath, String instant } private static void createEmptyFile(String basePath, Path filePath, StorageConfiguration configuration) throws IOException { - FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); - OutputStream os = fs.create(filePath, true); + HoodieStorage storage = HoodieStorageUtils.getStorage(basePath, configuration); + OutputStream os = storage.create(new StoragePath(filePath.toUri()), true); os.close(); } @@ -644,8 +642,8 @@ public static void createCompactionAuxiliaryMetadata(String basePath, HoodieInst StorageConfiguration configuration) throws IOException { Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + instant.getFileName()); - FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); - try (OutputStream os = fs.create(commitFile, true)) { + HoodieStorage storage = HoodieStorageUtils.getStorage(basePath, configuration); + try (OutputStream os = storage.create(new StoragePath(commitFile.toUri()), true)) { HoodieCompactionPlan workload = HoodieCompactionPlan.newBuilder().setVersion(1).build(); // Write empty commit metadata os.write(TimelineMetadataUtils.serializeCompactionPlan(workload).get()); @@ -656,11 +654,11 @@ public static void createSavepointFile(String basePath, String instantTime, Stor throws IOException { Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeSavePointFileName(instantTime + "_" + InProcessTimeGenerator.createNewInstantTime())); - FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); - try (FSDataOutputStream os = fs.create(commitFile, true)) { + HoodieStorage storage = HoodieStorageUtils.getStorage(basePath, configuration); + try (OutputStream os = storage.create(new StoragePath(commitFile.toUri()), true)) { HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); // Write empty commit metadata - os.writeBytes(new String(serializeCommitMetadata(commitMetadata).get())); + os.write(serializeCommitMetadata(commitMetadata).get()); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java index 2708a797ec32..5cc687b18c31 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java @@ -28,8 +28,8 @@ import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; @@ -56,6 +56,8 @@ import java.util.UUID; import java.util.stream.Collectors; +import static org.apache.hudi.storage.HoodieStorageUtils.HADOOP_STORAGE_CONF; + /** * A utility class for testing. */ @@ -68,7 +70,13 @@ public class HoodieTestUtils { public static final String[] DEFAULT_PARTITION_PATHS = {"2016/03/15", "2015/03/16", "2015/03/17"}; public static StorageConfiguration getDefaultStorageConf() { - return HadoopFSUtils.getStorageConf(new Configuration(false)); + return (StorageConfiguration) ReflectionUtils.loadClass(HADOOP_STORAGE_CONF, + new Class[] {Boolean.class}, false); + } + + public static StorageConfiguration getDefaultStorageConfWithDefaults() { + return (StorageConfiguration) ReflectionUtils.loadClass(HADOOP_STORAGE_CONF, + new Class[] {Boolean.class}, true); } public static HoodieStorage getStorage(String path) { @@ -215,7 +223,7 @@ public static HoodieTableMetaClient createMetaClient(StorageConfiguration sto */ public static HoodieTableMetaClient createMetaClient(Configuration conf, String basePath) { - return createMetaClient(HadoopFSUtils.getStorageConfWithCopy(conf), basePath); + return createMetaClient(HoodieStorageUtils.getStorageConfWithCopy(conf), basePath); } /** diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/InProcessTimeGenerator.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/InProcessTimeGenerator.java index 31ac8a7ac403..265047d07479 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/InProcessTimeGenerator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/InProcessTimeGenerator.java @@ -23,9 +23,6 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.TimeGenerator; import org.apache.hudi.common.table.timeline.TimeGenerators; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; - -import org.apache.hadoop.conf.Configuration; /** * An in-process time generator that always use in-process lock for time generation. @@ -35,8 +32,7 @@ public class InProcessTimeGenerator { private static final TimeGenerator TIME_GENERATOR = TimeGenerators.getTimeGenerator( - HoodieTimeGeneratorConfig.defaultConfig(""), - HadoopFSUtils.getStorageConf(new Configuration())); + HoodieTimeGeneratorConfig.defaultConfig(""), HoodieTestUtils.getDefaultStorageConfWithDefaults()); public static String createNewInstantTime() { return createNewInstantTime(0L); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieFileSliceTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieFileSliceTestUtils.java index 06d351155109..43002a723ef4 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieFileSliceTestUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieFileSliceTestUtils.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.engine.LocalTaskContextSupplier; +import org.apache.hudi.common.engine.TaskContextSupplier; import org.apache.hudi.common.model.DeleteRecord; import org.apache.hudi.common.model.FileSlice; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; @@ -44,8 +45,9 @@ import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.common.util.collection.Pair; -import org.apache.hudi.io.storage.HoodieAvroParquetWriter; +import org.apache.hudi.io.storage.HoodieAvroFileWriter; import org.apache.hudi.io.storage.HoodieParquetConfig; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.StoragePath; @@ -76,6 +78,7 @@ import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.AVRO_SCHEMA; import static org.apache.hudi.common.testutils.reader.DataGenerationPlan.OperationType.DELETE; import static org.apache.hudi.common.testutils.reader.DataGenerationPlan.OperationType.INSERT; +import static org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.HOODIE_AVRO_PARQUET_WRITER; public class HoodieFileSliceTestUtils { public static final String FORWARD_SLASH = "/"; @@ -267,7 +270,8 @@ public static HoodieBaseFile createBaseFile( 0.1, true); - try (HoodieAvroParquetWriter writer = new HoodieAvroParquetWriter( + try (HoodieAvroFileWriter writer = (HoodieAvroFileWriter) ReflectionUtils.loadClass(HOODIE_AVRO_PARQUET_WRITER, + new Class[] {StoragePath.class, HoodieParquetConfig.class, String.class, TaskContextSupplier.class, boolean.class}, new StoragePath(baseFilePath), parquetConfig, baseInstantTime, diff --git a/hudi-examples/hudi-examples-common/pom.xml b/hudi-examples/hudi-examples-common/pom.xml index caec2fadbade..53876ca055a7 100644 --- a/hudi-examples/hudi-examples-common/pom.xml +++ b/hudi-examples/hudi-examples-common/pom.xml @@ -94,6 +94,11 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-hadoop-common + ${project.version} + diff --git a/hudi-examples/hudi-examples-flink/pom.xml b/hudi-examples/hudi-examples-flink/pom.xml index 590349bb02bd..8d1a7d398f2d 100644 --- a/hudi-examples/hudi-examples-flink/pom.xml +++ b/hudi-examples/hudi-examples-flink/pom.xml @@ -79,6 +79,11 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-hadoop-common + ${project.version} + org.apache.hudi hudi-client-common @@ -272,6 +277,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-client-common diff --git a/hudi-flink-datasource/hudi-flink/pom.xml b/hudi-flink-datasource/hudi-flink/pom.xml index e0ba8c5675f6..edbab588b1f0 100644 --- a/hudi-flink-datasource/hudi-flink/pom.xml +++ b/hudi-flink-datasource/hudi-flink/pom.xml @@ -95,6 +95,11 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-hadoop-common + ${project.version} + org.apache.hudi hudi-client-common @@ -349,6 +354,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-client-common diff --git a/hudi-hadoop-common/pom.xml b/hudi-hadoop-common/pom.xml index 2ae9c6107035..912f2b075b90 100644 --- a/hudi-hadoop-common/pom.xml +++ b/hudi-hadoop-common/pom.xml @@ -68,6 +68,11 @@ + + org.apache.hudi + hudi-common + ${project.version} + org.apache.hudi hudi-io @@ -92,6 +97,17 @@ provided + + org.apache.parquet + parquet-avro + + + + + com.esotericsoftware + kryo-shaded + + org.apache.hudi hudi-tests-common @@ -106,5 +122,19 @@ ${project.version} test + + org.apache.hudi + hudi-common + ${project.version} + tests + test-jar + test + + + com.github.stefanbirkner + system-rules + 1.17.2 + test + diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java similarity index 98% rename from hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java index f7987b870d11..cc706dfd7193 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java @@ -55,7 +55,7 @@ * * Note: Not reusing commons-configuration since it has too many conflicting runtime deps. */ -public class DFSPropertiesConfiguration { +public class DFSPropertiesConfiguration extends PropertiesConfig { private static final Logger LOG = LoggerFactory.getLogger(DFSPropertiesConfiguration.class); @@ -202,6 +202,11 @@ public void addPropsFromStream(BufferedReader reader, StoragePath cfgFilePath) t } } + @Override + public TypedProperties getGlobalProperties() { + return getGlobalProps(); + } + public static TypedProperties getGlobalProps() { final TypedProperties globalProps = new TypedProperties(); globalProps.putAll(GLOBAL_PROPS); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java similarity index 93% rename from hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java index 213068632b75..c37d3118c4dc 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java @@ -38,7 +38,6 @@ import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.orc.OrcFile; @@ -63,6 +62,7 @@ import static org.apache.hudi.common.util.BinaryUtil.toBytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; +import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToHadoopPath; /** * Utility functions for ORC files. @@ -81,7 +81,7 @@ public ClosableIterator getHoodieKeyIterator(StorageConfiguration try { Configuration conf = configuration.unwrapCopyAs(Configuration.class); conf.addResource(HadoopFSUtils.getFs(filePath.toString(), conf).getConf()); - Reader reader = OrcFile.createReader(new Path(filePath.toUri()), OrcFile.readerOptions(conf)); + Reader reader = OrcFile.createReader(convertToHadoopPath(filePath), OrcFile.readerOptions(conf)); Schema readSchema = HoodieAvroUtils.getRecordKeyPartitionPathSchema(); TypeDescription orcSchema = AvroOrcUtils.createOrcSchema(readSchema); @@ -152,7 +152,7 @@ public ClosableIterator getHoodieKeyIterator(StorageConfiguration public List readAvroRecords(StorageConfiguration configuration, StoragePath filePath) { Schema avroSchema; try (Reader reader = OrcFile.createReader( - new Path(filePath.toUri()), OrcFile.readerOptions(configuration.unwrapAs(Configuration.class)))) { + convertToHadoopPath(filePath), OrcFile.readerOptions(configuration.unwrapAs(Configuration.class)))) { avroSchema = AvroOrcUtils.createAvroSchema(reader.getSchema()); } catch (IOException io) { throw new HoodieIOException("Unable to read Avro records from an ORC file:" + filePath, io); @@ -167,7 +167,7 @@ public List readAvroRecords(StorageConfiguration configuration public List readAvroRecords(StorageConfiguration configuration, StoragePath filePath, Schema avroSchema) { List records = new ArrayList<>(); try (Reader reader = OrcFile.createReader( - new Path(filePath.toUri()), OrcFile.readerOptions(configuration.unwrapAs(Configuration.class)))) { + convertToHadoopPath(filePath), OrcFile.readerOptions(configuration.unwrapAs(Configuration.class)))) { TypeDescription orcSchema = reader.getSchema(); try (RecordReader recordReader = reader.rows( new Options(configuration.unwrapAs(Configuration.class)).schema(orcSchema))) { @@ -197,7 +197,7 @@ public Set> filterRowKeys(StorageConfiguration conf, Stora throws HoodieIOException { long rowPosition = 0; try (Reader reader = OrcFile.createReader( - new Path(filePath.toUri()), OrcFile.readerOptions(conf.unwrapAs(Configuration.class)))) { + convertToHadoopPath(filePath), OrcFile.readerOptions(conf.unwrapAs(Configuration.class)))) { TypeDescription schema = reader.getSchema(); try (RecordReader recordReader = reader.rows(new Options(conf.unwrapAs(Configuration.class)).schema(schema))) { Set> filteredRowKeys = new HashSet<>(); @@ -236,7 +236,7 @@ public Set> filterRowKeys(StorageConfiguration conf, Stora public Map readFooter(StorageConfiguration conf, boolean required, StoragePath filePath, String... footerNames) { try (Reader reader = OrcFile.createReader( - new Path(filePath.toUri()), OrcFile.readerOptions(conf.unwrapAs(Configuration.class)))) { + convertToHadoopPath(filePath), OrcFile.readerOptions(conf.unwrapAs(Configuration.class)))) { Map footerVals = new HashMap<>(); List metadataItemList = reader.getFileTail().getFooter().getMetadataList(); Map metadata = metadataItemList.stream().collect(Collectors.toMap( @@ -259,7 +259,7 @@ public Map readFooter(StorageConfiguration conf, boolean requ @Override public Schema readAvroSchema(StorageConfiguration conf, StoragePath filePath) { try (Reader reader = OrcFile.createReader( - new Path(filePath.toUri()), OrcFile.readerOptions(conf.unwrapAs(Configuration.class)))) { + convertToHadoopPath(filePath), OrcFile.readerOptions(conf.unwrapAs(Configuration.class)))) { if (reader.hasMetadataValue("orc.avro.schema")) { ByteBuffer metadataValue = reader.getMetadataValue("orc.avro.schema"); byte[] bytes = toBytes(metadataValue); @@ -281,7 +281,7 @@ public HoodieFileFormat getFormat() { @Override public long getRowCount(StorageConfiguration conf, StoragePath filePath) { try (Reader reader = OrcFile.createReader( - new Path(filePath.toUri()), OrcFile.readerOptions(conf.unwrapAs(Configuration.class)))) { + convertToHadoopPath(filePath), OrcFile.readerOptions(conf.unwrapAs(Configuration.class)))) { return reader.getNumberOfRows(); } catch (IOException io) { throw new HoodieIOException("Unable to get row count for ORC file:" + filePath, io); @@ -296,7 +296,7 @@ public void writeMetaFile(HoodieStorage storage, StoragePath filePath, Propertie OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(storage.unwrapConfAs(Configuration.class)) .fileSystem((FileSystem) storage.getFileSystem()) .setSchema(AvroOrcUtils.createOrcSchema(schema)); - try (Writer writer = OrcFile.createWriter(new Path(filePath.toUri()), writerOptions)) { + try (Writer writer = OrcFile.createWriter(convertToHadoopPath(filePath), writerOptions)) { for (String key : props.stringPropertyNames()) { writer.addUserMetadata(key, ByteBuffer.wrap(getUTF8Bytes(props.getProperty(key)))); } diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java index 3aa66e6c2de3..3119ee8c0c08 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java @@ -20,13 +20,11 @@ package org.apache.hudi.hadoop.fs; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; import org.apache.hudi.storage.StoragePathInfo; import org.apache.hudi.storage.StorageSchemes; import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration; -import org.apache.hudi.storage.hadoop.HoodieHadoopStorage; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BufferedFSInputStream; @@ -42,8 +40,6 @@ import java.io.IOException; import java.util.Map; -import static org.apache.hudi.common.util.ValidationUtils.checkArgument; - /** * Utility functions related to accessing the file storage on Hadoop. */ @@ -88,7 +84,7 @@ public static FileSystem getFs(String pathStr, Configuration conf) { } public static FileSystem getFs(StoragePath path, Configuration conf) { - return getFs(new Path(path.toUri()), conf); + return getFs(convertToHadoopPath(path), conf); } public static FileSystem getFs(Path path, Configuration conf) { @@ -109,25 +105,6 @@ public static FileSystem getFs(String pathStr, Configuration conf, boolean local return getFs(pathStr, conf); } - public static HoodieStorage getStorageWithWrapperFS(StoragePath path, - StorageConfiguration conf, - boolean enableRetry, - long maxRetryIntervalMs, - int maxRetryNumbers, - long initialRetryIntervalMs, - String retryExceptions, - ConsistencyGuard consistencyGuard) { - FileSystem fileSystem = getFs(path, conf.unwrapCopyAs(Configuration.class)); - - if (enableRetry) { - fileSystem = new HoodieRetryWrapperFileSystem(fileSystem, - maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptions); - } - checkArgument(!(fileSystem instanceof HoodieWrapperFileSystem), - "File System not expected to be that of HoodieWrapperFileSystem"); - return new HoodieHadoopStorage(new HoodieWrapperFileSystem(fileSystem, consistencyGuard)); - } - public static Path addSchemeIfLocalPath(String path) { Path providedPath = new Path(path); File localFile = new File(path); @@ -190,11 +167,13 @@ public static FileStatus convertToHadoopFileStatus(StoragePathInfo pathInfo) { * @param fs instance of {@link FileSystem} in use. * @param filePath path of the file. * @param bufferSize buffer size to be used. + * @param wrapStream if false, don't attempt to wrap the stream * @return the right {@link FSDataInputStream} as required. */ public static FSDataInputStream getFSDataInputStream(FileSystem fs, StoragePath filePath, - int bufferSize) { + int bufferSize, + boolean wrapStream) { FSDataInputStream fsDataInputStream = null; try { fsDataInputStream = fs.open(convertToHadoopPath(filePath), bufferSize); @@ -202,6 +181,10 @@ public static FSDataInputStream getFSDataInputStream(FileSystem fs, throw new HoodieIOException(String.format("Exception creating input stream from file: %s", filePath), e); } + if (!wrapStream) { + return fsDataInputStream; + } + if (isGCSFileSystem(fs)) { // in GCS FS, we might need to interceptor seek offsets as we might get EOF exception return new SchemeAwareFSDataInputStream(getFSDataInputStreamForGCS(fsDataInputStream, filePath, bufferSize), true); @@ -273,4 +256,12 @@ public static boolean isCHDFileSystem(FileSystem fs) { private static StorageConfiguration getStorageConf(Configuration conf, boolean copy) { return new HadoopStorageConfiguration(conf, copy); } + + public static Configuration registerFileSystem(StoragePath file, Configuration conf) { + Configuration returnConf = new Configuration(conf); + String scheme = HadoopFSUtils.getFs(file.toString(), conf).getScheme(); + returnConf.set("fs." + HoodieWrapperFileSystem.getHoodieScheme(scheme) + ".impl", + HoodieWrapperFileSystem.class.getName()); + return returnConf; + } } diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java index 927849fea79f..b2a3a97d3bd1 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java @@ -19,6 +19,8 @@ package org.apache.hudi.hadoop.fs; +import org.apache.hudi.common.fs.ConsistencyGuard; +import org.apache.hudi.common.fs.NoOpConsistencyGuard; import org.apache.hudi.common.metrics.Registry; import org.apache.hudi.common.util.HoodieTimer; import org.apache.hudi.exception.HoodieException; @@ -61,6 +63,9 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeoutException; +import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToHadoopPath; +import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePath; + /** * HoodieWrapperFileSystem wraps the default file system. It holds state about the open streams in the file system to * support getting the written size to each of the open streams. @@ -142,7 +147,7 @@ public HoodieWrapperFileSystem(FileSystem fileSystem, ConsistencyGuard consisten public static Path convertToHoodiePath(StoragePath file, Configuration conf) { try { String scheme = HadoopFSUtils.getFs(file.toString(), conf).getScheme(); - return convertPathWithScheme(new Path(file.toUri()), getHoodieScheme(scheme)); + return convertPathWithScheme(convertToHadoopPath(file), getHoodieScheme(scheme)); } catch (HoodieIOException e) { throw e; } @@ -357,7 +362,7 @@ public boolean delete(Path f, boolean recursive) throws IOException { if (success) { try { - consistencyGuard.waitTillFileDisappears(new StoragePath(f.toUri())); + consistencyGuard.waitTillFileDisappears(convertToStoragePath(f)); } catch (TimeoutException e) { throw new HoodieException("Timed out waiting for " + f + " to disappear", e); } @@ -969,7 +974,7 @@ private Path convertToDefaultPath(Path oldPath) { } private StoragePath convertToDefaultStoragePath(Path oldPath) { - return new StoragePath(convertPathWithScheme(oldPath, getScheme()).toUri()); + return convertToStoragePath(convertPathWithScheme(oldPath, getScheme())); } private Path convertToLocalPath(Path oldPath) { diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SizeAwareFSDataOutputStream.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SizeAwareFSDataOutputStream.java index 3665c2a69a26..e2851a35084a 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SizeAwareFSDataOutputStream.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SizeAwareFSDataOutputStream.java @@ -19,8 +19,8 @@ package org.apache.hudi.hadoop.fs; +import org.apache.hudi.common.fs.ConsistencyGuard; import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.storage.StoragePath; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; @@ -29,6 +29,8 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; +import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePath; + /** * Wrapper over FSDataOutputStream to keep track of the size of the written bytes. This gives a cheap way * to check on the underlying file size. @@ -76,7 +78,7 @@ public void write(byte[] b) throws IOException { public void close() throws IOException { super.close(); try { - consistencyGuard.waitTillFileAppears(new StoragePath(path.toUri())); + consistencyGuard.waitTillFileAppears(convertToStoragePath(path)); } catch (TimeoutException e) { throw new HoodieException(e); } diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/HadoopInLineFSUtils.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/HadoopInLineFSUtils.java new file mode 100644 index 000000000000..ba252ef45ba0 --- /dev/null +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/HadoopInLineFSUtils.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.hadoop.fs.inline; + +import org.apache.hudi.storage.StorageConfiguration; +import org.apache.hudi.storage.StoragePath; +import org.apache.hudi.storage.inline.InLineFSUtils; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToHadoopPath; +import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePath; + +/** + * Utils to parse InLineFileSystem paths. + * Inline FS format: + * "inlinefs:////?start_offset=start_offset>&length=" + * Eg: "inlinefs:///s3a/?start_offset=20&length=40" + */ +public class HadoopInLineFSUtils extends InLineFSUtils { + + public static StorageConfiguration buildInlineConf(StorageConfiguration storageConf) { + StorageConfiguration inlineConf = storageConf.newInstance(); + inlineConf.set("fs." + InLineFileSystem.SCHEME + ".impl", InLineFileSystem.class.getName()); + (inlineConf.unwrapAs(Configuration.class)).setClassLoader(InLineFileSystem.class.getClassLoader()); + return inlineConf; + } + + /** + * InlineFS Path format: + * "inlinefs://path/to/outer/file/outer_file_scheme/?start_offset=start_offset>&length=" + *

+ * Outer File Path format: + * "outer_file_scheme://path/to/outer/file" + *

+ * Example + * Input: "inlinefs://file1/s3a/?start_offset=20&length=40". + * Output: "s3a://file1" + * + * @param inlineFSPath InLineFS Path to get the outer file Path + * @return Outer file Path from the InLineFS Path + */ + public static Path getOuterFilePathFromInlinePath(Path inlineFSPath) { + StoragePath storagePath = convertToStoragePath(inlineFSPath); + StoragePath outerFilePath = getOuterFilePathFromInlinePath(storagePath); + return convertToHadoopPath(outerFilePath); + } +} diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InLineFileSystem.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InLineFileSystem.java index 9d7d187b807e..9296b7178999 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InLineFileSystem.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InLineFileSystem.java @@ -20,6 +20,7 @@ package org.apache.hudi.hadoop.fs.inline; import org.apache.hudi.storage.StoragePath; +import org.apache.hudi.storage.inline.InLineFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -33,6 +34,8 @@ import java.io.IOException; import java.net.URI; +import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePath; + /** * Enables reading any inline file at a given offset and length. This {@link FileSystem} is used only in read path and does not support * any write apis. @@ -46,7 +49,7 @@ */ public class InLineFileSystem extends FileSystem { - public static final String SCHEME = "inlinefs"; + public static final String SCHEME = InLineFSUtils.SCHEME; private Configuration conf = null; @Override @@ -67,11 +70,11 @@ public String getScheme() { @Override public FSDataInputStream open(Path inlinePath, int bufferSize) throws IOException { - Path outerPath = InLineFSUtils.getOuterFilePathFromInlinePath(inlinePath); + Path outerPath = HadoopInLineFSUtils.getOuterFilePathFromInlinePath(inlinePath); FileSystem outerFs = outerPath.getFileSystem(conf); FSDataInputStream outerStream = outerFs.open(outerPath, bufferSize); - StoragePath inlineStoragePath = new StoragePath(inlinePath.toUri()); - return new InLineFsDataInputStream(InLineFSUtils.startOffset(inlineStoragePath), outerStream, InLineFSUtils.length(inlineStoragePath)); + StoragePath inlineStoragePath = convertToStoragePath(inlinePath); + return new InLineFsDataInputStream(HadoopInLineFSUtils.startOffset(inlineStoragePath), outerStream, HadoopInLineFSUtils.length(inlineStoragePath)); } @Override @@ -85,10 +88,10 @@ public boolean exists(Path f) { @Override public FileStatus getFileStatus(Path inlinePath) throws IOException { - Path outerPath = InLineFSUtils.getOuterFilePathFromInlinePath(inlinePath); + Path outerPath = HadoopInLineFSUtils.getOuterFilePathFromInlinePath(inlinePath); FileSystem outerFs = outerPath.getFileSystem(conf); FileStatus status = outerFs.getFileStatus(outerPath); - FileStatus toReturn = new FileStatus(InLineFSUtils.length(new StoragePath(inlinePath.toUri())), status.isDirectory(), status.getReplication(), status.getBlockSize(), + FileStatus toReturn = new FileStatus(HadoopInLineFSUtils.length(convertToStoragePath(inlinePath)), status.isDirectory(), status.getReplication(), status.getBlockSize(), status.getModificationTime(), status.getAccessTime(), status.getPermission(), status.getOwner(), status.getGroup(), inlinePath); return toReturn; diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java similarity index 98% rename from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java index 5d7366ac9399..6de6b24868b5 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java @@ -21,10 +21,10 @@ import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.engine.TaskContextSupplier; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieDuplicateKeyException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.metadata.MetadataPartitionType; import org.apache.hudi.storage.StoragePath; @@ -80,7 +80,7 @@ public class HoodieAvroHFileWriter public HoodieAvroHFileWriter(String instantTime, StoragePath file, HoodieHFileConfig hfileConfig, Schema schema, TaskContextSupplier taskContextSupplier, boolean populateMetaFields) throws IOException { - Configuration conf = FSUtils.registerFileSystem(file, hfileConfig.getHadoopConf()); + Configuration conf = HadoopFSUtils.registerFileSystem(file, hfileConfig.getHadoopConf()); this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, conf); this.fs = (HoodieWrapperFileSystem) this.file.getFileSystem(conf); this.hfileConfig = hfileConfig; diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java similarity index 94% rename from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java index 3346816125bf..07e7bc7f1223 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java @@ -18,13 +18,14 @@ package org.apache.hudi.io.storage; +import org.apache.hudi.avro.HoodieAvroWriteSupport; import org.apache.hudi.avro.HoodieBloomFilterWriteSupport; import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.bloom.HoodieDynamicBoundedBloomFilter; import org.apache.hudi.common.engine.TaskContextSupplier; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.AvroOrcUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.storage.StoragePath; @@ -45,7 +46,6 @@ import java.util.List; import java.util.concurrent.atomic.AtomicLong; -import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; public class HoodieAvroOrcWriter implements HoodieAvroFileWriter, Closeable { @@ -70,7 +70,7 @@ public class HoodieAvroOrcWriter implements HoodieAvroFileWriter, Closeable { public HoodieAvroOrcWriter(String instantTime, StoragePath file, HoodieOrcConfig config, Schema schema, TaskContextSupplier taskContextSupplier) throws IOException { - Configuration conf = FSUtils.registerFileSystem(file, config.getHadoopConf()); + Configuration conf = HadoopFSUtils.registerFileSystem(file, config.getHadoopConf()); this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, conf); this.fs = (HoodieWrapperFileSystem) this.file.getFileSystem(conf); this.instantTime = instantTime; @@ -152,7 +152,7 @@ public void close() throws IOException { if (orcConfig.useBloomFilter()) { final BloomFilter bloomFilter = orcConfig.getBloomFilter(); - writer.addUserMetadata(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, ByteBuffer.wrap(getUTF8Bytes(bloomFilter.serializeToString()))); + writer.addUserMetadata(HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, ByteBuffer.wrap(getUTF8Bytes(bloomFilter.serializeToString()))); if (minRecordKey != null && maxRecordKey != null) { writer.addUserMetadata(HoodieBloomFilterWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER, ByteBuffer.wrap(getUTF8Bytes(minRecordKey))); writer.addUserMetadata(HoodieBloomFilterWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER, ByteBuffer.wrap(getUTF8Bytes(maxRecordKey))); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java similarity index 97% rename from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java index f237db139ab4..06f1e513055f 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java @@ -18,8 +18,8 @@ package org.apache.hudi.io.storage; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.VisibleForTesting; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.storage.StoragePath; @@ -73,7 +73,7 @@ protected WriteSupport getWriteSupport(Configuration conf) { parquetWriterbuilder.withDictionaryEncoding(parquetConfig.dictionaryEnabled()); parquetWriterbuilder.withValidation(ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED); parquetWriterbuilder.withWriterVersion(ParquetWriter.DEFAULT_WRITER_VERSION); - parquetWriterbuilder.withConf(FSUtils.registerFileSystem(file, parquetConfig.getHadoopConf())); + parquetWriterbuilder.withConf(HadoopFSUtils.registerFileSystem(file, parquetConfig.getHadoopConf())); handleParquetBloomFilters(parquetWriterbuilder, parquetConfig.getHadoopConf()); parquetWriter = parquetWriterbuilder.build(); diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HadoopStorageConfiguration.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HadoopStorageConfiguration.java index f272f8333eb7..ed7b24052472 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HadoopStorageConfiguration.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HadoopStorageConfiguration.java @@ -20,6 +20,7 @@ package org.apache.hudi.storage.hadoop; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.inline.HadoopInLineFSUtils; import org.apache.hudi.storage.StorageConfiguration; import org.apache.hadoop.conf.Configuration; @@ -37,8 +38,8 @@ public class HadoopStorageConfiguration extends StorageConfiguration getString(String key) { return Option.ofNullable(configuration.get(key)); } + @Override + public StorageConfiguration getInline() { + return HadoopInLineFSUtils.buildInlineConf(this); + } + @Override public String toString() { StringBuilder stringBuilder = new StringBuilder(); diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java index 1e1ba67ae66f..126b17617eb2 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java @@ -19,8 +19,11 @@ package org.apache.hudi.storage.hadoop; +import org.apache.hudi.common.fs.ConsistencyGuard; import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.fs.HadoopSeekableDataInputStream; +import org.apache.hudi.hadoop.fs.HoodieRetryWrapperFileSystem; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.StorageConfiguration; @@ -43,9 +46,11 @@ import java.util.List; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.ValidationUtils.checkArgument; import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToHadoopPath; import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePath; import static org.apache.hudi.hadoop.fs.HadoopFSUtils.convertToStoragePathInfo; +import static org.apache.hudi.hadoop.fs.HadoopFSUtils.getFs; /** * Implementation of {@link HoodieStorage} using Hadoop's {@link FileSystem} @@ -53,6 +58,46 @@ public class HoodieHadoopStorage extends HoodieStorage { private final FileSystem fs; + public HoodieHadoopStorage(HoodieStorage storage) { + FileSystem fs = (FileSystem) storage.getFileSystem(); + if (fs instanceof HoodieWrapperFileSystem) { + this.fs = ((HoodieWrapperFileSystem) fs).getFileSystem(); + } else { + this.fs = fs; + } + } + + public HoodieHadoopStorage(String basePath, Configuration conf) { + this(HadoopFSUtils.getFs(basePath, conf)); + } + + public HoodieHadoopStorage(StoragePath path, StorageConfiguration conf) { + this(HadoopFSUtils.getFs(path, conf.unwrapAs(Configuration.class))); + } + + public HoodieHadoopStorage(String basePath, StorageConfiguration conf) { + this(HadoopFSUtils.getFs(basePath, conf)); + } + + public HoodieHadoopStorage(StoragePath path, + StorageConfiguration conf, + boolean enableRetry, + long maxRetryIntervalMs, + int maxRetryNumbers, + long initialRetryIntervalMs, + String retryExceptions, + ConsistencyGuard consistencyGuard) { + FileSystem fileSystem = getFs(path, conf.unwrapCopyAs(Configuration.class)); + + if (enableRetry) { + fileSystem = new HoodieRetryWrapperFileSystem(fileSystem, + maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptions); + } + checkArgument(!(fileSystem instanceof HoodieWrapperFileSystem), + "File System not expected to be that of HoodieWrapperFileSystem"); + this.fs = new HoodieWrapperFileSystem(fileSystem, consistencyGuard); + } + public HoodieHadoopStorage(FileSystem fs) { this.fs = fs; } @@ -98,9 +143,9 @@ public InputStream open(StoragePath path) throws IOException { } @Override - public SeekableDataInputStream openSeekable(StoragePath path, int bufferSize) throws IOException { + public SeekableDataInputStream openSeekable(StoragePath path, int bufferSize, boolean wrapStream) throws IOException { return new HadoopSeekableDataInputStream( - HadoopFSUtils.getFSDataInputStream(fs, path, bufferSize)); + HadoopFSUtils.getFSDataInputStream(fs, path, bufferSize, wrapStream)); } @Override diff --git a/hudi-common/src/test/java/org/apache/hudi/client/transaction/lock/TestInProcessLockProvider.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/client/transaction/lock/TestInProcessLockProvider.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/client/transaction/lock/TestInProcessLockProvider.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/client/transaction/lock/TestInProcessLockProvider.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java similarity index 98% rename from hudi-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java index 2e72b3737a0d..cb7d78476940 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java @@ -31,7 +31,7 @@ import java.util.UUID; import java.util.stream.Collectors; -import static org.apache.hudi.common.testutils.FileSystemTestUtils.readLastLineFromResourceFile; +import static org.apache.hudi.common.testutils.HoodieTestTable.readLastLineFromResourceFile; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java similarity index 97% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java index b3949f468956..a9f2f273a7ae 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java @@ -30,8 +30,7 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; -import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; -import org.apache.hudi.hadoop.fs.inline.InLineFSUtils; +import org.apache.hudi.hadoop.fs.inline.HadoopInLineFSUtils; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; @@ -94,15 +93,15 @@ public void tearDown() throws Exception { public void testMakeDataFileName() { String instantTime = HoodieActiveTimeline.formatDate(new Date()); String fileName = UUID.randomUUID().toString(); - assertEquals(FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName, BASE_FILE_EXTENSION), - fileName + "_" + TEST_WRITE_TOKEN + "_" + instantTime + BASE_FILE_EXTENSION); + assertEquals(FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName, HoodieCommonTestHarness.BASE_FILE_EXTENSION), + fileName + "_" + TEST_WRITE_TOKEN + "_" + instantTime + HoodieCommonTestHarness.BASE_FILE_EXTENSION); } @Test public void testMaskFileName() { String instantTime = HoodieActiveTimeline.formatDate(new Date()); int taskPartitionId = 2; - assertEquals(FSUtils.maskWithoutFileId(instantTime, taskPartitionId), "*_" + taskPartitionId + "_" + instantTime + BASE_FILE_EXTENSION); + assertEquals(FSUtils.maskWithoutFileId(instantTime, taskPartitionId), "*_" + taskPartitionId + "_" + instantTime + HoodieCommonTestHarness.BASE_FILE_EXTENSION); } /** @@ -130,7 +129,7 @@ public void testProcessFiles() throws Exception { "2016/05/16/2_1-0-1_20190528120000", ".hoodie/.temp/2/2016/05/16/2_1-0-1_20190528120000", ".hoodie/.temp/2/2016/04/15/1_1-0-1_20190528120000") - .map(fileName -> fileName + BASE_FILE_EXTENSION) + .map(fileName -> fileName + HoodieCommonTestHarness.BASE_FILE_EXTENSION) .collect(Collectors.toList()); files.forEach(f -> { @@ -170,7 +169,7 @@ public void testProcessFiles() throws Exception { public void testGetCommitTime() { String instantTime = HoodieActiveTimeline.formatDate(new Date()); String fileName = UUID.randomUUID().toString(); - String fullFileName = FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName, BASE_FILE_EXTENSION); + String fullFileName = FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName, HoodieCommonTestHarness.BASE_FILE_EXTENSION); assertEquals(instantTime, FSUtils.getCommitTime(fullFileName)); // test log file name fullFileName = FSUtils.makeLogFileName(fileName, HOODIE_LOG.getFileExtension(), instantTime, 1, TEST_WRITE_TOKEN); @@ -181,7 +180,7 @@ public void testGetCommitTime() { public void testGetFileNameWithoutMeta() { String instantTime = HoodieActiveTimeline.formatDate(new Date()); String fileName = UUID.randomUUID().toString(); - String fullFileName = FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName, BASE_FILE_EXTENSION); + String fullFileName = FSUtils.makeBaseFileName(instantTime, TEST_WRITE_TOKEN, fileName, HoodieCommonTestHarness.BASE_FILE_EXTENSION); assertEquals(fileName, FSUtils.getFileId(fullFileName)); } @@ -247,7 +246,7 @@ public void tesLogFileName() { String logFile = FSUtils.makeLogFileName(fileName, ".log", "100", 2, "1-0-1"); System.out.println("Log File =" + logFile); StoragePath rlPath = new StoragePath(new StoragePath(partitionPath), logFile); - StoragePath inlineFsPath = InLineFSUtils.getInlineFilePath( + StoragePath inlineFsPath = HadoopInLineFSUtils.getInlineFilePath( new StoragePath(rlPath.toUri()), "file", 0, 100); assertTrue(FSUtils.isLogFile(rlPath)); assertTrue(FSUtils.isLogFile(inlineFsPath)); @@ -372,7 +371,7 @@ public void testFileNameRelatedFunctions() throws Exception { final String LOG_EXTENSION = "." + LOG_STR; // data file name - String dataFileName = FSUtils.makeBaseFileName(instantTime, writeToken, fileId, BASE_FILE_EXTENSION); + String dataFileName = FSUtils.makeBaseFileName(instantTime, writeToken, fileId, HoodieCommonTestHarness.BASE_FILE_EXTENSION); assertEquals(instantTime, FSUtils.getCommitTime(dataFileName)); assertEquals(fileId, FSUtils.getFileId(dataFileName)); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java similarity index 99% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java index 2ee65d6f045a..2093e658c4e4 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java @@ -21,7 +21,6 @@ import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.fs.HoodieRetryWrapperFileSystem; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; -import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java similarity index 98% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java index 93a321166c0d..c7b5217524e5 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java @@ -23,7 +23,6 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; -import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/InLineFSUtilsTest.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/InLineFSUtilsTest.java similarity index 93% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/inline/InLineFSUtilsTest.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/InLineFSUtilsTest.java index 04eefcf15dd6..f46a8d23f250 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/InLineFSUtilsTest.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/InLineFSUtilsTest.java @@ -19,9 +19,8 @@ package org.apache.hudi.common.fs.inline; import org.apache.hudi.common.testutils.FileSystemTestUtils; -import org.apache.hudi.hadoop.fs.inline.InLineFSUtils; -import org.apache.hudi.hadoop.fs.inline.InLineFileSystem; import org.apache.hudi.storage.StoragePath; +import org.apache.hudi.storage.inline.InLineFSUtils; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -32,7 +31,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; /** - * Tests {@link InLineFileSystem}. + * Tests {@link InLineFSUtils}. */ public class InLineFSUtilsTest { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java similarity index 98% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java index dd9bdc8cc497..76b55429024f 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java @@ -20,7 +20,7 @@ import org.apache.hudi.common.testutils.FileSystemTestUtils; import org.apache.hudi.common.util.collection.Pair; -import org.apache.hudi.hadoop.fs.inline.InLineFSUtils; +import org.apache.hudi.hadoop.fs.inline.HadoopInLineFSUtils; import org.apache.hudi.hadoop.fs.inline.InLineFileSystem; import org.apache.hudi.storage.StoragePath; @@ -350,12 +350,12 @@ public void testInLineFSPathConversions() { if (inputPath.toString().contains(":")) { scheme = inputPath.toString().split(":")[0]; } - final StoragePath actualInLineFSPath = InLineFSUtils.getInlineFilePath( + final StoragePath actualInLineFSPath = HadoopInLineFSUtils.getInlineFilePath( new StoragePath(inputPath.toUri()), scheme, 10, 10); assertEquals(expectedInLineFSPath, actualInLineFSPath); final StoragePath actualOuterFilePath = - InLineFSUtils.getOuterFilePathFromInlinePath(actualInLineFSPath); + HadoopInLineFSUtils.getOuterFilePathFromInlinePath(actualInLineFSPath); assertEquals(expectedTransformedInputPath, actualOuterFilePath); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHFileReader.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHFileReader.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHFileReader.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHFileReader.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInMemoryFileSystem.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInMemoryFileSystem.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInMemoryFileSystem.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestInMemoryFileSystem.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestParquetInLining.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestParquetInLining.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestParquetInLining.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/inline/TestParquetInLining.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java similarity index 99% rename from hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java index af8096cc9854..367dc506b218 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java @@ -63,7 +63,6 @@ import org.apache.hudi.common.util.collection.ExternalSpillableMap; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.CorruptedLogFileException; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; @@ -73,7 +72,6 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; @@ -459,7 +457,7 @@ public void testHugeLogFileWrite() throws IOException, URISyntaxException, Inter header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); byte[] dataBlockContentBytes = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header).getContentBytes(); HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc = new HoodieLogBlock.HoodieLogBlockContentLocation( - HadoopFSUtils.getStorageConf(new Configuration()), null, 0, dataBlockContentBytes.length, 0); + HoodieTestUtils.getDefaultStorageConfWithDefaults(), null, 0, dataBlockContentBytes.length, 0); HoodieDataBlock reusableDataBlock = new HoodieAvroDataBlock(null, Option.ofNullable(dataBlockContentBytes), false, logBlockContentLoc, Option.ofNullable(getSimpleSchema()), header, new HashMap<>(), HoodieRecord.RECORD_KEY_METADATA_FIELD); long writtenSize = 0; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieFileGroup.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodieFileGroup.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieFileGroup.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodieFileGroup.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodiePartitionMetadata.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodiePartitionMetadata.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodiePartitionMetadata.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodiePartitionMetadata.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecord.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecord.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecord.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecord.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecordDelegate.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecordDelegate.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecordDelegate.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecordDelegate.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java similarity index 97% rename from hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java index 750f286f7bb1..70cb9eea6d5f 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java @@ -19,14 +19,13 @@ package org.apache.hudi.common.table; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; +import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; -import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -63,7 +62,7 @@ public class TestHoodieTableConfig extends HoodieCommonTestHarness { @BeforeEach public void setUp() throws Exception { initPath(); - storage = HoodieStorageUtils.getStorage(basePath, HadoopFSUtils.getStorageConf(new Configuration())); + storage = HoodieStorageUtils.getStorage(basePath, HoodieTestUtils.getDefaultStorageConfWithDefaults()); metaPath = new StoragePath(basePath, HoodieTableMetaClient.METAFOLDER_NAME); Properties props = new Properties(); props.setProperty(HoodieTableConfig.NAME.key(), "test-table"); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java similarity index 95% rename from hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java index 13498528f17c..efb88412a210 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java @@ -24,9 +24,9 @@ import org.apache.hudi.common.table.log.block.HoodieDataBlock; import org.apache.hudi.common.table.log.block.HoodieLogBlock; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.testutils.SchemaTestUtil; import org.apache.hudi.common.util.Option; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.internal.schema.HoodieSchemaException; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; @@ -34,7 +34,6 @@ import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.conf.Configuration; import org.apache.parquet.avro.AvroSchemaConverter; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -100,7 +99,7 @@ public void testReadSchemaFromLogFile() throws IOException, URISyntaxException, assertEquals( new AvroSchemaConverter().convert(expectedSchema), TableSchemaResolver.readSchemaFromLogFile(HoodieStorageUtils.getStorage( - logFilePath, HadoopFSUtils.getStorageConf(new Configuration())), logFilePath)); + logFilePath, HoodieTestUtils.getDefaultStorageConfWithDefaults()), logFilePath)); } private String initTestDir(String folderName) throws IOException { @@ -111,7 +110,7 @@ private String initTestDir(String folderName) throws IOException { private StoragePath writeLogFile(StoragePath partitionPath, Schema schema) throws IOException, URISyntaxException, InterruptedException { HoodieStorage storage = HoodieStorageUtils.getStorage( - partitionPath, HadoopFSUtils.getStorageConf(new Configuration())); + partitionPath, HoodieTestUtils.getDefaultStorageConfWithDefaults()); HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION) .withFileId("test-fileid1").withDeltaCommit("100").withStorage(storage).build(); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java similarity index 97% rename from hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java index f06599fbe746..6e65ef3c656e 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java @@ -31,7 +31,7 @@ import java.util.Set; import java.util.stream.Collectors; -import static org.apache.hudi.common.testutils.FileSystemTestUtils.readLastLineFromResourceFile; +import static org.apache.hudi.common.testutils.HoodieTestTable.readLastLineFromResourceFile; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/read/TestCustomMerger.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/read/TestCustomMerger.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/read/TestCustomMerger.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/read/TestCustomMerger.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/read/TestEventTimeMerging.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/read/TestEventTimeMerging.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/read/TestEventTimeMerging.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/read/TestEventTimeMerging.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java similarity index 99% rename from hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java index 5ec75b31ef87..f744cc216a8f 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java @@ -18,6 +18,7 @@ package org.apache.hudi.common.table.timeline; +import org.apache.hudi.common.fs.NoOpConsistencyGuard; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieInstant.State; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; @@ -27,7 +28,6 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; -import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieInstant.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieInstant.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieInstant.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieInstant.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestWaitBasedTimeGenerator.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/timeline/TestWaitBasedTimeGenerator.java similarity index 96% rename from hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestWaitBasedTimeGenerator.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/timeline/TestWaitBasedTimeGenerator.java index 681e62bdeef9..9ebd0b36073c 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestWaitBasedTimeGenerator.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/timeline/TestWaitBasedTimeGenerator.java @@ -21,11 +21,10 @@ import org.apache.hudi.client.transaction.lock.InProcessLockProvider; import org.apache.hudi.common.config.HoodieTimeGeneratorConfig; import org.apache.hudi.common.config.LockConfiguration; +import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.exception.HoodieLockException; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.storage.StorageConfiguration; -import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.params.ParameterizedTest; @@ -75,7 +74,7 @@ public boolean tryLock(long time, TimeUnit unit) { // Clock skew time private final long clockSkewTime = 20L; - private final StorageConfiguration storageConf = HadoopFSUtils.getStorageConf(new Configuration()); + private final StorageConfiguration storageConf = HoodieTestUtils.getDefaultStorageConfWithDefaults(); private HoodieTimeGeneratorConfig timeGeneratorConfig; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestRocksDBBasedIncrementalFSViewSync.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestRocksDBBasedIncrementalFSViewSync.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/view/TestRocksDBBasedIncrementalFSViewSync.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestRocksDBBasedIncrementalFSViewSync.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestRocksDbBasedFileSystemView.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestRocksDbBasedFileSystemView.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/view/TestRocksDbBasedFileSystemView.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestRocksDbBasedFileSystemView.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestSpillableMapBasedFileSystemView.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestSpillableMapBasedFileSystemView.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/view/TestSpillableMapBasedFileSystemView.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestSpillableMapBasedFileSystemView.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestSpillableMapBasedIncrementalFSViewSync.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestSpillableMapBasedIncrementalFSViewSync.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/table/view/TestSpillableMapBasedIncrementalFSViewSync.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/view/TestSpillableMapBasedIncrementalFSViewSync.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java similarity index 61% rename from hudi-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java index 232c14cc31c4..162740b55a14 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java @@ -18,26 +18,15 @@ package org.apache.hudi.common.testutils; -import org.apache.hudi.common.table.log.TestLogReaderUtils; -import org.apache.hudi.common.util.FileIOUtils; -import org.apache.hudi.hadoop.fs.inline.InLineFSUtils; import org.apache.hudi.hadoop.fs.inline.InLineFileSystem; import org.apache.hudi.hadoop.fs.inline.InMemoryFileSystem; -import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.StoragePath; -import org.apache.hudi.storage.StoragePathInfo; +import org.apache.hudi.storage.inline.InLineFSUtils; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; import java.util.Random; import java.util.UUID; @@ -79,34 +68,4 @@ public static void deleteFile(File fileToDelete) throws IOException { throw new IOException(message); } } - - public static List listRecursive(FileSystem fs, Path path) throws IOException { - return listFiles(fs, path, true); - } - - public static List listFiles(FileSystem fs, Path path, boolean recursive) throws IOException { - RemoteIterator itr = fs.listFiles(path, recursive); - List statuses = new ArrayList<>(); - while (itr.hasNext()) { - statuses.add(itr.next()); - } - return statuses; - } - - public static List listRecursive(HoodieStorage storage, StoragePath path) - throws IOException { - return listFiles(storage, path); - } - - public static List listFiles(HoodieStorage storage, StoragePath path) - throws IOException { - return storage.listFiles(path); - } - - public static String readLastLineFromResourceFile(String resourceName) throws IOException { - try (InputStream inputStream = TestLogReaderUtils.class.getResourceAsStream(resourceName)) { - List lines = FileIOUtils.readAsUTFStringLines(inputStream); - return lines.get(lines.size() - 1); - } - } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieCommonTestHarness.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/HoodieCommonTestHarness.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieCommonTestHarness.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/HoodieCommonTestHarness.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java similarity index 97% rename from hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java index 2720aa42dd01..8b0e24b40741 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java @@ -52,12 +52,14 @@ import org.apache.hudi.common.model.WriteOperationType; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.log.HoodieLogFormat; +import org.apache.hudi.common.table.log.TestLogReaderUtils; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.TimelineMetadataUtils; import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV2MigrationHandler; import org.apache.hudi.common.util.CompactionUtils; +import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.ValidationUtils; @@ -69,11 +71,14 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Paths; import java.time.Instant; @@ -824,17 +829,47 @@ public List listAllBaseFiles() throws IOException { } public List listAllBaseFiles(String fileExtension) throws IOException { - return FileSystemTestUtils.listRecursive(storage, new StoragePath(basePath)).stream() + return listRecursive(storage, new StoragePath(basePath)).stream() .filter(fileInfo -> fileInfo.getPath().getName().endsWith(fileExtension)) .collect(Collectors.toList()); } + public static List listRecursive(FileSystem fs, Path path) throws IOException { + return listFiles(fs, path, true); + } + + public static List listFiles(FileSystem fs, Path path, boolean recursive) throws IOException { + RemoteIterator itr = fs.listFiles(path, recursive); + List statuses = new ArrayList<>(); + while (itr.hasNext()) { + statuses.add(itr.next()); + } + return statuses; + } + + public static List listRecursive(HoodieStorage storage, StoragePath path) + throws IOException { + return listFiles(storage, path); + } + + public static List listFiles(HoodieStorage storage, StoragePath path) + throws IOException { + return storage.listFiles(path); + } + + public static String readLastLineFromResourceFile(String resourceName) throws IOException { + try (InputStream inputStream = TestLogReaderUtils.class.getResourceAsStream(resourceName)) { + List lines = FileIOUtils.readAsUTFStringLines(inputStream); + return lines.get(lines.size() - 1); + } + } + public List listAllLogFiles() throws IOException { return listAllLogFiles(HoodieFileFormat.HOODIE_LOG.getFileExtension()); } public List listAllLogFiles(String fileExtension) throws IOException { - return FileSystemTestUtils.listRecursive(storage, new StoragePath(basePath)).stream() + return listRecursive(storage, new StoragePath(basePath)).stream() .filter( fileInfo -> !fileInfo.getPath().toString() .contains(HoodieTableMetaClient.METAFOLDER_NAME)) @@ -849,7 +884,7 @@ public List listAllBaseAndLogFiles() throws IOException { } public FileStatus[] listAllFilesInPartition(String partitionPath) throws IOException { - return FileSystemTestUtils.listRecursive(fs, + return listRecursive(fs, new Path(Paths.get(basePath, partitionPath).toString())).stream() .filter(entry -> { boolean toReturn = true; @@ -872,7 +907,7 @@ public FileStatus[] listAllFilesInPartition(String partitionPath) throws IOExcep } public FileStatus[] listAllFilesInTempFolder() throws IOException { - return FileSystemTestUtils.listRecursive(fs, new Path(Paths.get(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME).toString())).toArray(new FileStatus[0]); + return listRecursive(fs, new Path(Paths.get(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME).toString())).toArray(new FileStatus[0]); } public void deleteFilesInPartition(String partitionPath, List filesToDelete) throws IOException { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/minicluster/HdfsTestService.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/minicluster/HdfsTestService.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/testutils/minicluster/HdfsTestService.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/minicluster/HdfsTestService.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieFileGroupReaderTestHarness.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieFileGroupReaderTestHarness.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieFileGroupReaderTestHarness.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/testutils/reader/HoodieFileGroupReaderTestHarness.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestCommitUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestCommitUtils.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestCommitUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestCommitUtils.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestCompactionUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestCompactionUtils.java similarity index 99% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestCompactionUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestCompactionUtils.java index 32ced12c480e..e7174ec1ac51 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestCompactionUtils.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestCompactionUtils.java @@ -32,7 +32,6 @@ import org.apache.hudi.common.table.timeline.versioning.compaction.CompactionPlanMigrator; import org.apache.hudi.common.testutils.CompactionTestUtils.DummyHoodieBaseFile; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; -import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.storage.StoragePath; @@ -216,7 +215,7 @@ public void testGetAllPendingCompactionOperationsWithDupFileId() throws IOExcept // schedule similar plan again so that there will be duplicates plan1.getOperations().get(0).setDataFilePath("bla"); scheduleCompaction(metaClient, "005", plan1); - metaClient = HoodieTestUtils.createMetaClient(metaClient.getStorageConf(), basePath); + metaClient = createMetaClient(metaClient.getStorageConf(), basePath); assertThrows(IllegalStateException.class, () -> { CompactionUtils.getAllPendingCompactionOperations(metaClient); }); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java similarity index 96% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java index 21412696f2ce..f6caa31a62c6 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java @@ -21,6 +21,7 @@ import org.apache.hudi.common.config.DFSPropertiesConfiguration; import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.testutils.minicluster.HdfsTestService; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; @@ -42,8 +43,6 @@ import java.io.IOException; import java.io.PrintStream; -import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs; -import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -65,8 +64,8 @@ public class TestDFSPropertiesConfiguration { @BeforeAll public static void initClass() throws Exception { - if (shouldUseExternalHdfs()) { - dfs = useExternalHdfs(); + if (HoodieTestUtils.shouldUseExternalHdfs()) { + dfs = HoodieTestUtils.useExternalHdfs(); } else { hdfsTestService = new HdfsTestService(); dfsCluster = hdfsTestService.start(true); @@ -169,7 +168,7 @@ public void testLocalFileSystemLoading() throws IOException { String.format( "file:%s", getClass().getClassLoader() - .getResource("props/test.properties") + .getResource("props/testdfs.properties") .getPath() ) )); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestFileIOUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestFileIOUtils.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestFileIOUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestFileIOUtils.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java similarity index 94% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java index c604d276ba96..085a981b220c 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java @@ -20,13 +20,12 @@ import org.apache.hudi.common.table.marker.MarkerType; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; +import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; -import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -43,8 +42,7 @@ class TestMarkerUtils extends HoodieCommonTestHarness { @BeforeEach public void setup() { initPath(); - storage = HoodieStorageUtils.getStorage( - basePath, HadoopFSUtils.getStorageConf(new Configuration())); + storage = HoodieStorageUtils.getStorage(basePath, HoodieTestUtils.getDefaultStorageConfWithDefaults()); } @Test diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestParquetUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetUtils.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestParquetUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetUtils.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java similarity index 96% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java index 95b08d9d6203..94943a436eeb 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java @@ -21,12 +21,11 @@ import org.apache.hudi.common.model.HoodiePartitionMetadata; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; -import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; @@ -64,8 +63,7 @@ private void setup() throws IOException { private void setup(Option partitionMetafileFormat) throws IOException { URI tablePathURI = Paths.get(tempDir.getAbsolutePath(), "test_table").toUri(); tablePath = new StoragePath(tablePathURI); - storage = HoodieStorageUtils.getStorage( - tablePathURI.toString(), HadoopFSUtils.getStorageConf(new Configuration())); + storage = HoodieStorageUtils.getStorage(tablePathURI.toString(), HoodieTestUtils.getDefaultStorageConfWithDefaults()); // Create bootstrap index folder assertTrue(new File( diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestBitCaskDiskMap.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/collection/TestBitCaskDiskMap.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestBitCaskDiskMap.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/collection/TestBitCaskDiskMap.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbBasedMap.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbBasedMap.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbBasedMap.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbBasedMap.java diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbDiskMap.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbDiskMap.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbDiskMap.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbDiskMap.java diff --git a/hudi-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieBaseParquetWriter.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieBaseParquetWriter.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieBaseParquetWriter.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieBaseParquetWriter.java diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHBaseHFileReaderWriter.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHBaseHFileReaderWriter.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHBaseHFileReaderWriter.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHBaseHFileReaderWriter.java diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriterBase.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriterBase.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriterBase.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriterBase.java diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java diff --git a/hudi-common/src/test/java/org/apache/hudi/metadata/TestFileSystemBackedTableMetadata.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestFileSystemBackedTableMetadata.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/metadata/TestFileSystemBackedTableMetadata.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestFileSystemBackedTableMetadata.java diff --git a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java diff --git a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java diff --git a/hudi-common/src/test/resources/external-config/hudi-defaults.conf b/hudi-hadoop-common/src/test/resources/external-config/hudi-defaults.conf similarity index 100% rename from hudi-common/src/test/resources/external-config/hudi-defaults.conf rename to hudi-hadoop-common/src/test/resources/external-config/hudi-defaults.conf diff --git a/hudi-hadoop-common/src/test/resources/props/testdfs.properties b/hudi-hadoop-common/src/test/resources/props/testdfs.properties new file mode 100644 index 000000000000..0e9f3e7aa27f --- /dev/null +++ b/hudi-hadoop-common/src/test/resources/props/testdfs.properties @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +some.random.prop=123 \ No newline at end of file diff --git a/hudi-hadoop-mr/pom.xml b/hudi-hadoop-mr/pom.xml index 191f49f40d1e..1dacca6bd9fb 100644 --- a/hudi-hadoop-mr/pom.xml +++ b/hudi-hadoop-mr/pom.xml @@ -44,6 +44,12 @@ ${project.version} + + org.apache.hudi + hudi-hadoop-common + ${project.version} + + com.esotericsoftware @@ -108,6 +114,22 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + + + org.apache.hudi + hudi-io + ${project.version} + tests + test-jar + test + diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadSnapshotReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadSnapshotReader.java index 38b6e16f6c0d..9885d8ffbc7e 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadSnapshotReader.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadSnapshotReader.java @@ -37,7 +37,6 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.testutils.InputFormatTestUtil; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; @@ -62,7 +61,6 @@ import java.util.stream.Collectors; import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath; -import static org.apache.hudi.hadoop.fs.HadoopFSUtils.getFs; import static org.apache.hudi.hadoop.testutils.InputFormatTestUtil.writeDataBlockToLogFile; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -90,7 +88,7 @@ public void setUp() { baseJobConf.set(HoodieMemoryConfig.MAX_DFS_STREAM_BUFFER_SIZE.key(), String.valueOf(1024 * 1024)); baseJobConf.set(serdeConstants.LIST_COLUMNS, COLUMNS); baseJobConf.set(serdeConstants.LIST_COLUMN_TYPES, COLUMN_TYPES); - storage = HoodieStorageUtils.getStorage(getFs(basePath.toUri().toString(), baseJobConf)); + storage = HoodieStorageUtils.getStorage(basePath.toUri().toString(), baseJobConf); } @AfterEach @@ -114,7 +112,7 @@ public void testSnapshotReaderPartitioned() throws Exception { private void testReaderInternal(boolean partitioned, HoodieLogBlock.HoodieLogBlockType logBlockType) throws Exception { // initial commit Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema()); - HoodieTestUtils.init(HadoopFSUtils.getStorageConf(hadoopConf), basePath.toString(), HoodieTableType.MERGE_ON_READ); + HoodieTestUtils.init(HoodieStorageUtils.getStorageConf(hadoopConf), basePath.toString(), HoodieTableType.MERGE_ON_READ); String baseInstant = "100"; File partitionDir = partitioned ? InputFormatTestUtil.prepareParquetTable(basePath, schema, 1, TOTAL_RECORDS, baseInstant, HoodieTableType.MERGE_ON_READ) diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index 33bfc03d6a23..bf525857864c 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -210,6 +210,20 @@ tests test-jar + + org.apache.hudi + hudi-io + ${project.version} + tests + test-jar + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + org.apache.hudi hudi-spark_${scala.binary.version} diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java index efc40437b8e5..e9149e8aaa55 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java @@ -18,7 +18,7 @@ package org.apache.hudi.integ.testsuite.writer; -import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.storage.StoragePath; @@ -71,7 +71,7 @@ public AvroFileDeltaInputWriter(Configuration configuration, String basePath, St StoragePath path = new StoragePath(basePath, UUID.randomUUID().toString() + AVRO_EXTENSION); this.file = HoodieWrapperFileSystem.convertToHoodiePath(path, configuration); this.fs = (HoodieWrapperFileSystem) this.file - .getFileSystem(FSUtils.registerFileSystem(path, configuration)); + .getFileSystem(HadoopFSUtils.registerFileSystem(path, configuration)); this.output = this.fs.create(this.file); this.writer = new GenericDatumWriter(schema); this.dataFileWriter = new DataFileWriter<>(writer).create(schema, output); diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/ConsistencyGuard.java b/hudi-io/src/main/java/org/apache/hudi/common/fs/ConsistencyGuard.java similarity index 98% rename from hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/ConsistencyGuard.java rename to hudi-io/src/main/java/org/apache/hudi/common/fs/ConsistencyGuard.java index ac615fb1048f..e475a9195ccf 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/ConsistencyGuard.java +++ b/hudi-io/src/main/java/org/apache/hudi/common/fs/ConsistencyGuard.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hudi.hadoop.fs; +package org.apache.hudi.common.fs; import org.apache.hudi.storage.StoragePath; diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java index b7e987760437..586b5b0a56f8 100644 --- a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java @@ -123,11 +123,12 @@ public abstract class HoodieStorage implements Closeable { * * @param path the file to open. * @param bufferSize buffer size to use. + * @param wrapStream true if we want to wrap the inputstream based on filesystem specific criteria * @return the InputStream to read from. * @throws IOException IO error. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) - public abstract SeekableDataInputStream openSeekable(StoragePath path, int bufferSize) throws IOException; + public abstract SeekableDataInputStream openSeekable(StoragePath path, int bufferSize, boolean wrapStream) throws IOException; /** * Appends to an existing file (optional operation). @@ -392,12 +393,13 @@ public boolean createNewFile(StoragePath path) throws IOException { * Opens an SeekableDataInputStream at the indicated path with seeks supported. * * @param path the file to open. + * @param wrapStream true if we want to wrap the inputstream based on filesystem specific criteria * @return the InputStream to read from. * @throws IOException IO error. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) - public SeekableDataInputStream openSeekable(StoragePath path) throws IOException { - return openSeekable(path, getDefaultBlockSize(path)); + public SeekableDataInputStream openSeekable(StoragePath path, boolean wrapStream) throws IOException { + return openSeekable(path, getDefaultBlockSize(path), wrapStream); } /** diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/StorageConfiguration.java b/hudi-io/src/main/java/org/apache/hudi/storage/StorageConfiguration.java index ac586fc6f72c..15f0333fd5b5 100644 --- a/hudi-io/src/main/java/org/apache/hudi/storage/StorageConfiguration.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/StorageConfiguration.java @@ -63,6 +63,13 @@ public abstract class StorageConfiguration implements Serializable { */ public abstract Option getString(String key); + /** + * Gets an inline version of this storage configuration + * + * @return copy of this storage configuration that is inline + */ + public abstract StorageConfiguration getInline(); + /** * @param clazz class of U, which is assignable from T. * @param type to return. diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/StoragePath.java b/hudi-io/src/main/java/org/apache/hudi/storage/StoragePath.java index 24bf77e76ada..2a24978f0844 100644 --- a/hudi-io/src/main/java/org/apache/hudi/storage/StoragePath.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/StoragePath.java @@ -235,6 +235,13 @@ public StoragePath makeQualified(URI defaultUri) { return new StoragePath(newUri); } + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public String getFileExtension() { + String fileName = getName(); + int dotIndex = fileName.lastIndexOf('.'); + return dotIndex == -1 ? "" : fileName.substring(dotIndex); + } + @Override public String toString() { // This value could be overwritten concurrently and that's okay, since diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InLineFSUtils.java b/hudi-io/src/main/java/org/apache/hudi/storage/inline/InLineFSUtils.java similarity index 65% rename from hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InLineFSUtils.java rename to hudi-io/src/main/java/org/apache/hudi/storage/inline/InLineFSUtils.java index 6c6cb7323e46..97b8de500509 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InLineFSUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/inline/InLineFSUtils.java @@ -17,28 +17,22 @@ * under the License. */ -package org.apache.hudi.hadoop.fs.inline; +package org.apache.hudi.storage.inline; import org.apache.hudi.storage.StoragePath; -import org.apache.hadoop.fs.Path; - import java.io.File; import static org.apache.hudi.common.util.ValidationUtils.checkArgument; -/** - * Utils to parse InLineFileSystem paths. - * Inline FS format: - * "inlinefs:////?start_offset=start_offset>&length=" - * Eg: "inlinefs:///s3a/?start_offset=20&length=40" - */ public class InLineFSUtils { - private static final String START_OFFSET_STR = "start_offset"; - private static final String LENGTH_STR = "length"; - private static final String SCHEME_SEPARATOR = "" + StoragePath.COLON_CHAR; - private static final String EQUALS_STR = "="; - private static final String LOCAL_FILESYSTEM_SCHEME = "file"; + + public static final String SCHEME = "inlinefs"; + protected static final String START_OFFSET_STR = "start_offset"; + protected static final String LENGTH_STR = "length"; + protected static final String SCHEME_SEPARATOR = "" + StoragePath.COLON_CHAR; + protected static final String EQUALS_STR = "="; + protected static final String LOCAL_FILESYSTEM_SCHEME = "file"; /** * Get the InlineFS Path for a given schema and its Path. @@ -59,42 +53,13 @@ public static StoragePath getInlineFilePath(StoragePath outerPath, long inLineLength) { final String subPath = new File(outerPath.toString().substring(outerPath.toString().indexOf(":") + 1)).getPath(); return new StoragePath( - InLineFileSystem.SCHEME + SCHEME_SEPARATOR + SCHEME + SCHEME_SEPARATOR + StoragePath.SEPARATOR + subPath + StoragePath.SEPARATOR + origScheme + StoragePath.SEPARATOR + "?" + START_OFFSET_STR + EQUALS_STR + inLineStartOffset + "&" + LENGTH_STR + EQUALS_STR + inLineLength ); } - /** - * InlineFS Path format: - * "inlinefs://path/to/outer/file/outer_file_scheme/?start_offset=start_offset>&length=" - *

- * Outer File Path format: - * "outer_file_scheme://path/to/outer/file" - *

- * Example - * Input: "inlinefs://file1/s3a/?start_offset=20&length=40". - * Output: "s3a://file1" - * - * @param inlineFSPath InLineFS Path to get the outer file Path - * @return Outer file Path from the InLineFS Path - */ - public static Path getOuterFilePathFromInlinePath(Path inlineFSPath) { - assertInlineFSPath(inlineFSPath); - - final String outerFileScheme = inlineFSPath.getParent().getName(); - final Path basePath = inlineFSPath.getParent().getParent(); - checkArgument(basePath.toString().contains(SCHEME_SEPARATOR), - "Invalid InLineFS path: " + inlineFSPath); - - final String pathExceptScheme = basePath.toString().substring(basePath.toString().indexOf(SCHEME_SEPARATOR) + 1); - final String fullPath = outerFileScheme + SCHEME_SEPARATOR - + (outerFileScheme.equals(LOCAL_FILESYSTEM_SCHEME) ? StoragePath.SEPARATOR : "") - + pathExceptScheme; - return new Path(fullPath); - } - public static StoragePath getOuterFilePathFromInlinePath(StoragePath inlineFSPath) { assertInlineFSPath(inlineFSPath); @@ -136,13 +101,8 @@ public static long length(StoragePath inlinePath) { return Long.parseLong(slices[slices.length - 1]); } - private static void assertInlineFSPath(Path inlinePath) { - String scheme = inlinePath.toUri().getScheme(); - checkArgument(InLineFileSystem.SCHEME.equals(scheme)); - } - private static void assertInlineFSPath(StoragePath inlinePath) { String scheme = inlinePath.toUri().getScheme(); - checkArgument(InLineFileSystem.SCHEME.equals(scheme)); + checkArgument(SCHEME.equals(scheme)); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/NetworkTestUtils.java b/hudi-io/src/test/java/org/apache/hudi/common/testutils/NetworkTestUtils.java similarity index 100% rename from hudi-common/src/test/java/org/apache/hudi/common/testutils/NetworkTestUtils.java rename to hudi-io/src/test/java/org/apache/hudi/common/testutils/NetworkTestUtils.java diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java index 0e40b562f669..cdc8d6f67462 100644 --- a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java +++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java @@ -163,11 +163,11 @@ public void testSeekable() throws IOException { stream.flush(); } - try (SeekableDataInputStream seekableStream = storage.openSeekable(path)) { + try (SeekableDataInputStream seekableStream = storage.openSeekable(path, true)) { validateSeekableDataInputStream(seekableStream, data); } - try (SeekableDataInputStream seekableStream = storage.openSeekable(path, 2)) { + try (SeekableDataInputStream seekableStream = storage.openSeekable(path, 2, true)) { validateSeekableDataInputStream(seekableStream, data); } } diff --git a/hudi-kafka-connect/pom.xml b/hudi-kafka-connect/pom.xml index d9fce1c9c57b..b3a4019ea5f3 100644 --- a/hudi-kafka-connect/pom.xml +++ b/hudi-kafka-connect/pom.xml @@ -210,6 +210,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + diff --git a/hudi-spark-datasource/hudi-spark-common/pom.xml b/hudi-spark-datasource/hudi-spark-common/pom.xml index 3a3bd4fa9857..11c656ebb1b7 100644 --- a/hudi-spark-datasource/hudi-spark-common/pom.xml +++ b/hudi-spark-datasource/hudi-spark-common/pom.xml @@ -176,6 +176,16 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-hadoop-common + ${project.version} + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + org.apache.hudi hudi-hive-sync @@ -259,6 +269,22 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala index 79bc0e7044dc..fc1bcacf3bee 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala @@ -28,7 +28,7 @@ import org.apache.hudi.common.table.read.HoodieFileGroupReader import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient} import org.apache.hudi.common.util.FileIOUtils import org.apache.hudi.common.util.collection.ExternalSpillableMap.DiskMapType -import org.apache.hudi.hadoop.fs.HadoopFSUtils +import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration import org.apache.hudi.storage.StorageConfiguration import org.apache.hudi.{AvroConversionUtils, HoodieFileIndex, HoodiePartitionCDCFileGroupMapping, HoodiePartitionFileSliceMapping, HoodieSparkUtils, HoodieTableSchema, HoodieTableState, SparkAdapterSupport, SparkFileFormatInternalRowReaderContext} @@ -43,7 +43,6 @@ import org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderB import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.isMetaField import org.apache.spark.sql.sources.Filter import org.apache.spark.sql.types.{LongType, Metadata, MetadataBuilder, StringType, StructField, StructType} -import org.apache.spark.util.SerializableConfiguration import java.io.Closeable import java.util.Locale @@ -114,15 +113,15 @@ class HoodieFileGroupReaderBasedParquetFileFormat(tableState: HoodieTableState, val requiredSchemaSplits = requiredSchemaWithMandatory.fields.partition(f => HoodieRecord.HOODIE_META_COLUMNS_WITH_OPERATION.contains(f.name)) val requiredMeta = StructType(requiredSchemaSplits._1) val requiredWithoutMeta = StructType(requiredSchemaSplits._2) - val augmentedHadoopConf = FSUtils.buildInlineConf(hadoopConf) + val augmentedStorageConf = new HadoopStorageConfiguration(hadoopConf).getInline val (baseFileReader, preMergeBaseFileReader, readerMaps, cdcFileReader) = buildFileReaders( - spark, dataSchema, partitionSchema, requiredSchema, filters, options, augmentedHadoopConf, + spark, dataSchema, partitionSchema, requiredSchema, filters, options, augmentedStorageConf.unwrap, requiredSchemaWithMandatory, requiredWithoutMeta, requiredMeta) val requestedAvroSchema = AvroConversionUtils.convertStructTypeToAvroSchema(requiredSchema, sanitizedTableName) val dataAvroSchema = AvroConversionUtils.convertStructTypeToAvroSchema(dataSchema, sanitizedTableName) - val broadcastedStorageConf = spark.sparkContext.broadcast(HadoopFSUtils.getStorageConf(augmentedHadoopConf)) + val broadcastedStorageConf = spark.sparkContext.broadcast(augmentedStorageConf) val broadcastedDataSchema = spark.sparkContext.broadcast(dataAvroSchema) val broadcastedRequestedSchema = spark.sparkContext.broadcast(requestedAvroSchema) val fileIndexProps: TypedProperties = HoodieFileIndex.getConfigProperties(spark, options) diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml index eb09a3fd5689..c723b03ccc9a 100644 --- a/hudi-spark-datasource/hudi-spark/pom.xml +++ b/hudi-spark-datasource/hudi-spark/pom.xml @@ -201,6 +201,16 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-io + ${project.version} + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + org.apache.hudi hudi-hadoop-mr @@ -466,6 +476,22 @@ test-jar test + + org.apache.hudi + hudi-io + ${project.version} + tests + test-jar + test + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-java-client diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderOnSpark.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderOnSpark.scala index c1d814e81d23..6d988ed00b7f 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderOnSpark.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/common/table/read/TestHoodieFileGroupReaderOnSpark.scala @@ -19,16 +19,14 @@ package org.apache.hudi.common.table.read +import org.apache.avro.Schema +import org.apache.hadoop.conf.Configuration import org.apache.hudi.common.config.HoodieReaderConfig.FILE_GROUP_READER_ENABLED import org.apache.hudi.common.engine.HoodieReaderContext -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.{HoodieRecord, WriteOperationType} import org.apache.hudi.common.testutils.HoodieTestUtils import org.apache.hudi.storage.StorageConfiguration import org.apache.hudi.{AvroConversionUtils, SparkFileFormatInternalRowReaderContext} - -import org.apache.avro.Schema -import org.apache.hadoop.conf.Configuration import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.datasources.PartitionedFile import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat @@ -40,7 +38,6 @@ import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.{AfterEach, BeforeEach} import java.util - import scala.collection.JavaConverters._ /** @@ -77,7 +74,7 @@ class TestHoodieFileGroupReaderOnSpark extends TestHoodieFileGroupReaderBase[Int } override def getStorageConf: StorageConfiguration[_] = { - FSUtils.buildInlineConf(HoodieTestUtils.getDefaultStorageConf) + HoodieTestUtils.getDefaultStorageConf.getInline } override def getBasePath: String = { diff --git a/hudi-spark-datasource/hudi-spark2/pom.xml b/hudi-spark-datasource/hudi-spark2/pom.xml index d52cf4775552..0223e65ca7a7 100644 --- a/hudi-spark-datasource/hudi-spark2/pom.xml +++ b/hudi-spark-datasource/hudi-spark2/pom.xml @@ -183,6 +183,11 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-hadoop-common + ${project.version} + org.apache.hudi hudi-spark-common_${scala.binary.version} @@ -236,6 +241,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-spark-common_${scala.binary.version} diff --git a/hudi-spark-datasource/hudi-spark3-common/pom.xml b/hudi-spark-datasource/hudi-spark3-common/pom.xml index e855e835a919..499cec4577aa 100644 --- a/hudi-spark-datasource/hudi-spark3-common/pom.xml +++ b/hudi-spark-datasource/hudi-spark3-common/pom.xml @@ -225,6 +225,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-spark-common_${scala.binary.version} diff --git a/hudi-spark-datasource/hudi-spark3.0.x/pom.xml b/hudi-spark-datasource/hudi-spark3.0.x/pom.xml index e6d39bfd0821..4878274304f1 100644 --- a/hudi-spark-datasource/hudi-spark3.0.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.0.x/pom.xml @@ -256,6 +256,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi diff --git a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml index f1d08470b637..6c810acdaae8 100644 --- a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml @@ -256,6 +256,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi diff --git a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml index fae380c2e894..f48c46d0c314 100644 --- a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml @@ -301,6 +301,15 @@ test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + + org.apache.hudi hudi-spark-common_${scala.binary.version} diff --git a/hudi-spark-datasource/hudi-spark3.2plus-common/pom.xml b/hudi-spark-datasource/hudi-spark3.2plus-common/pom.xml index d0ba895df1e1..0b3dc38fdae2 100644 --- a/hudi-spark-datasource/hudi-spark3.2plus-common/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.2plus-common/pom.xml @@ -210,6 +210,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-spark-common_${scala.binary.version} diff --git a/hudi-spark-datasource/hudi-spark3.3.x/pom.xml b/hudi-spark-datasource/hudi-spark3.3.x/pom.xml index 8b5591d2a756..49481ed7be68 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.3.x/pom.xml @@ -291,6 +291,22 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi diff --git a/hudi-spark-datasource/hudi-spark3.4.x/pom.xml b/hudi-spark-datasource/hudi-spark3.4.x/pom.xml index fdd93009e913..8f2b81cc2367 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.4.x/pom.xml @@ -292,6 +292,15 @@ test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + + org.apache.hudi hudi-spark-common_${scala.binary.version} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/pom.xml b/hudi-spark-datasource/hudi-spark3.5.x/pom.xml index 8ac085fed156..554c69b9cdfd 100644 --- a/hudi-spark-datasource/hudi-spark3.5.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.5.x/pom.xml @@ -292,6 +292,15 @@ test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + + org.apache.hudi hudi-spark-common_${scala.binary.version} diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml index 0d091bff4545..6d87f121bfb9 100644 --- a/hudi-sync/hudi-hive-sync/pom.xml +++ b/hudi-sync/hudi-hive-sync/pom.xml @@ -49,6 +49,16 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-io + ${project.version} + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + org.apache.hudi hudi-hadoop-mr @@ -120,7 +130,22 @@ test-jar test - + + org.apache.hudi + hudi-io + ${project.version} + tests + test-jar + test + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-tests-common diff --git a/hudi-sync/hudi-sync-common/pom.xml b/hudi-sync/hudi-sync-common/pom.xml index 464b3b141573..c6e1afe593ad 100644 --- a/hudi-sync/hudi-sync-common/pom.xml +++ b/hudi-sync/hudi-sync-common/pom.xml @@ -44,7 +44,11 @@ hudi-common ${project.version} - + + org.apache.hudi + hudi-hadoop-common + ${project.version} + com.esotericsoftware @@ -72,6 +76,14 @@ ${project.version} test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi diff --git a/hudi-timeline-service/pom.xml b/hudi-timeline-service/pom.xml index a2b3c67aa830..f994b67c270b 100644 --- a/hudi-timeline-service/pom.xml +++ b/hudi-timeline-service/pom.xml @@ -78,6 +78,11 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-hadoop-common + ${project.version} + @@ -160,6 +165,14 @@ test-jar test + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index 8f4dd9a3807f..3a7a9d6a712d 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -168,6 +168,16 @@ hudi-common ${project.version} + + org.apache.hudi + hudi-io + ${project.version} + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + org.apache.hudi @@ -442,6 +452,22 @@ test-jar test + + org.apache.hudi + hudi-io + ${project.version} + tests + test-jar + test + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-hive-sync