Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HADOOP-19385. S3A: Add iceberg bulk delete test #7316

Open
wants to merge 1 commit into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
import org.apache.hadoop.io.wrappedio.WrappedIO;
import org.apache.hadoop.io.wrappedio.impl.DynamicWrappedIO;

import static org.apache.hadoop.fs.contract.ContractTestUtils.assertSuccessfulBulkDelete;
import static org.apache.hadoop.fs.contract.ContractTestUtils.createListOfPaths;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.io.wrappedio.WrappedIO.bulkDelete_delete;
Expand Down Expand Up @@ -210,6 +212,20 @@ public void testDeletePathsNotExists() throws Exception {
assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths));
}

/**
* Use a more complex filename.
* This validates that any conversions to URI/string
* when passing to an object store is correct.
*/
@Test
public void testDeleteComplexFilename() throws Exception {
Path path = new Path(basePath, "child[=comple]x");
List<Path> paths = new ArrayList<>();
paths.add(path);
// bulk delete call doesn't verify if a path exist or not before deleting.
assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths));
}

@Test
public void testDeletePathsDirectory() throws Exception {
List<Path> paths = new ArrayList<>();
Expand Down Expand Up @@ -333,28 +349,4 @@ public void testChildPaths() throws Exception {
assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths));
}


/**
* Assert on returned entries after bulk delete operation.
* Entries should be empty after successful delete.
*/
public static void assertSuccessfulBulkDelete(List<Map.Entry<Path, String>> entries) {
Assertions.assertThat(entries)
.describedAs("Bulk delete failed, " +
"return entries should be empty after successful delete")
.isEmpty();
}

/**
* Create a list of paths with the given count
* under the given base path.
*/
private List<Path> createListOfPaths(int count, Path basePath) {
List<Path> paths = new ArrayList<>();
for (int i = 0; i < count; i++) {
Path path = new Path(basePath, "file-" + i);
paths.add(path);
}
return paths;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.Set;
Expand Down Expand Up @@ -1825,6 +1826,48 @@ public static long totalReadSize(final List<FileRange> fileRanges) {
.sum();
}

/**
* Assert on returned entries after bulk delete operation.
* Entries should be empty after successful delete.
*/
public static void assertSuccessfulBulkDelete(List<Map.Entry<Path, String>> entries) {
Assertions.assertThat(entries)
.describedAs("Bulk delete failed, " +
"return entries should be empty after successful delete")
.isEmpty();
}

/**
* Get a file status value or, if the path doesn't exist, return null.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps an opportunity to return Optional<FileStatus>?

* @param fs filesystem
* @param path path
* @return status or null
* @throws IOException Any IO Failure other than file not found.
*/
public static final FileStatus getFileStatusOrNull(
final FileSystem fs,
final Path path)
throws IOException {
try {
return fs.getFileStatus(path);
} catch (FileNotFoundException e) {
return null;
}
}

/**
* Create a list of paths with the given count
* under the given base path.
*/
public static List<Path> createListOfPaths(int count, Path basePath) {
List<Path> paths = new ArrayList<>();
for (int i = 0; i < count; i++) {
Path path = new Path(basePath, "file-" + i);
paths.add(path);
}
return paths;
}

/**
* Results of recursive directory creation/scan operations.
*/
Expand Down
12 changes: 11 additions & 1 deletion hadoop-project/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,17 @@
<version>${hadoop.version}</version>
<type>test-jar</type>
</dependency>

<dependency>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert

<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-format-testing</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-format-testing</artifactId>
<version>${hadoop.version}</version>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
Expand Down
67 changes: 67 additions & 0 deletions hadoop-tools/hadoop-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@
<job.id>00</job.id>
<!-- are root tests enabled. Set to false when running parallel jobs on same bucket -->
<root.tests.enabled>unset</root.tests.enabled>

<!-- iceberg is java 17+ -->
<!-- This requires the changes of the matching PR -->
<iceberg.version>1.8.0-SNAPSHOT</iceberg.version>
</properties>

<profiles>
Expand Down Expand Up @@ -312,6 +316,69 @@
</properties>
</profile>

<!-- Adds a *test only* java 17 build profile-->
<profile>
<id>java-17-or-later</id>
<activation>
<jdk>[17,)</jdk>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-java17-test-source</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>${basedir}/src/test/java17</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>

<!-- Apache Iceberg, used for testing/regression testing BulkDelete -->
<!-- iceberg is java 17+ and so can only be referenced in java 17+ test source trees -->
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-core</artifactId>
<version>${iceberg.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.httpcomponents.core5</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>io.airlift</groupId>
<artifactId>aircompressor</artifactId>
</exclusion>
<exclusion>
<groupId>org.checkerframework</groupId>
<artifactId>checker-qual</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>

</profiles>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1521,6 +1521,13 @@ private Constants() {
*/
public static final String FS_S3A_PERFORMANCE_FLAGS =
"fs.s3a.performance.flags";

/**
* All performance flags in the enumeration.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@value

*/
public static final String PERFORMANCE_FLAGS =
"create, delete, mkdir, open";

/**
* Prefix for adding a header to the object when created.
* The actual value must have a "." suffix and then the actual header.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;

import org.apache.hadoop.fs.BulkDelete;
Expand All @@ -42,6 +44,9 @@
*/
public class BulkDeleteOperation extends AbstractStoreOperation implements BulkDelete {

private static final Logger LOG = LoggerFactory.getLogger(
BulkDeleteOperation.class);

private final BulkDeleteOperationCallbacks callbacks;

private final Path basePath;
Expand Down Expand Up @@ -78,14 +83,18 @@ public Path basePath() {
public List<Map.Entry<Path, String>> bulkDelete(final Collection<Path> paths)
throws IOException, IllegalArgumentException {
requireNonNull(paths);
checkArgument(paths.size() <= pageSize,
"Number of paths (%d) is larger than the page size (%d)", paths.size(), pageSize);
final int size = paths.size();
LOG.debug("bulkDelete() of {} paths with pagesize {}",
size, pageSize);
checkArgument(size <= pageSize,
"Number of paths (%d) is larger than the page size (%d)", size, pageSize);
final StoreContext context = getStoreContext();
final List<ObjectIdentifier> objects = paths.stream().map(p -> {
checkArgument(p.isAbsolute(), "Path %s is not absolute", p);
checkArgument(validatePathIsUnderParent(p, basePath),
"Path %s is not under the base path %s", p, basePath);
final String k = context.pathToKey(p);
LOG.debug("path \"{}\" mapped to \"{}\"", p, k);
return ObjectIdentifier.builder().key(k).build();
}).collect(toList());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,17 @@ mvn verify -Dparallel-tests -Dprefetch -DtestsThreadCount=8
mvn verify -Dparallel-tests -Dprefetch -Dscale -DtestsThreadCount=8
```

## <a name="java17"></a> Java 17 Tests

This module includes a test source tree which compiles and runs on
Java 17+ _only_. This is to allow external libraries to be used
in testing -libraries which have been built on Java 17 and cannot
be loaded on older versions.

* This source tree is `src/test/java17`.
* It may depend upon any library is built on Java 17 or later.
* It is for testing only.

## <a name="scale"></a> Scale Tests

There are a set of tests designed to measure the scalability and performance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.hadoop.fs.statistics.MeanStatistic;

import static java.util.stream.Collectors.toList;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertSuccessfulBulkDelete;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
import org.apache.hadoop.fs.s3a.statistics.CommitterStatistics;
import org.apache.hadoop.io.wrappedio.WrappedIO;

import static org.apache.hadoop.fs.contract.AbstractContractBulkDeleteTest.assertSuccessfulBulkDelete;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertSuccessfulBulkDelete;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.fs.s3a.Constants.*;
Expand Down
Loading
Loading