Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Im 542 get s3 region by analyzing the asset href #224

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ public static String readCollectionId(JSONObject collection) {
return collection.getString("id");
}

private static JSONObject readItemAssets(JSONObject collection) {
return collection.getJSONObject("item_assets");
}

/**
* Obtains the geometry from the collection data.
* Currently, only available for dynamic collections.
Expand Down Expand Up @@ -82,14 +78,6 @@ public static JSONObject readAssetsFromCollection(String collectionUrl, JSONObje
return collection.getJSONObject("assets");
}

// item_assets is a shortcut for obtaining information about the assets
// https://github.com/stac-extensions/item-assets
if (collection.has("item_assets")) {
if (!collection.getJSONObject("item_assets").isEmpty()) {
return STACCollectionParser.readItemAssets(collection);
}
}

// TODO Move the query to another place.
String parameters = "?collections=" + collectonId + "&limit=1";
HttpResponse<JsonNode> response = Unirest.get(searchEndpoint.get() + parameters).asJson();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;

import kong.unirest.apache.ApacheClient;
import kong.unirest.json.JSONObject;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
Expand Down Expand Up @@ -274,8 +273,8 @@ public void getEncodedData(IResource resource, Map<String, String> urnParameters
scope.getMonitor().warn("Multiple EPSGs found on the items " + EPSGsAtItems.toString() + ". The transformation process could affect the data.");
}

if (resource.getParameters().contains("s3BucketRegion")) {
String bucketRegion = resource.getParameters().get("s3BucketRegion", String.class);
if (resource.getParameters().contains("awsRegion")) {
String bucketRegion = resource.getParameters().get("awsRegion", String.class);
S3Client s3Client = buildS3Client(bucketRegion);
collection.setS3Client(s3Client);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@
import org.integratedmodelling.klab.exceptions.KlabIOException;
import org.integratedmodelling.klab.utils.Parameters;
import org.integratedmodelling.klab.utils.Triple;
import org.integratedmodelling.klab.utils.s3.S3RegionResolver;
import org.integratedmodelling.klab.utils.s3.S3URLUtils;

import kong.unirest.json.JSONObject;
import software.amazon.awssdk.regions.Region;

public class STACImporter implements IResourceImporter {

Expand Down Expand Up @@ -82,6 +85,12 @@ private void importCollection(List<Builder> ret, IParameters<String> parameters,
}
parameters.put("asset", assetId);
String resourceUrn = collectionId + "-" + assetId;
String href = assetData.getString("href");
if (S3URLUtils.isS3Endpoint(href)) {
String[] bucketAndObject = href.split("://")[1].split("/", 2);
Region s3Region = S3RegionResolver.resolveBucketRegion(bucketAndObject[0], bucketAndObject[1], monitor);
parameters.put("awsRegion", s3Region.id());
}

Builder builder = buildResource(parameters, project, monitor, resourceUrn);
if (builder != null) {
Expand Down
3 changes: 0 additions & 3 deletions adapters/klab.ogc/src/main/resources/ogc/prototypes/stac.kdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,4 @@
optional text 'asset'
"The asset that is going to be retrieved from the items."

optional text 's3BucketRegion'
"The Region for S3 elements."

}
18 changes: 18 additions & 0 deletions klab.engine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,24 @@
<version>${junit5.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${mockito.version}</version>
<scope>test</scope>
</dependency>
<!--- AWS dependencies-->
<dependency>
<artifactId>apache-client</artifactId>
<groupId>software.amazon.awssdk</groupId>
<version>${aws.version}</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>

</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package org.integratedmodelling.klab.utils.s3;

import java.util.List;
import java.util.stream.Collectors;

import org.integratedmodelling.klab.api.runtime.monitoring.IMonitor;
import org.integratedmodelling.klab.exceptions.KlabResourceAccessException;

import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider;
import software.amazon.awssdk.http.apache.ApacheHttpClient;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest;
import software.amazon.awssdk.services.s3.model.GetBucketLocationResponse;
import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
import software.amazon.awssdk.services.s3.model.S3Exception;

public class S3RegionResolver {

public static Region resolveBucketRegion(String bucketName, String objectKey, IMonitor monitor) {
// Step 1: Attempt to dynamically resolve the bucket's region
try (S3Client s3 = S3Client.builder()
.httpClientBuilder(ApacheHttpClient.builder())
.region(Region.US_EAST_1) // Use the default region
.credentialsProvider(AnonymousCredentialsProvider.create()) // Anonymous credentials
.build()) {

GetBucketLocationRequest request = GetBucketLocationRequest.builder()
.bucket(bucketName)
.build();

GetBucketLocationResponse response = s3.getBucketLocation(request);
String location = response.locationConstraintAsString();

// Handle "null" or "global" regions
if (location == null || location.equalsIgnoreCase("null")) {
return Region.US_EAST_1;
}

Region resolvedRegion = Region.of(location);
monitor.debug("Bucket " + bucketName + " exists in the default region.");
return resolvedRegion;
} catch (S3Exception e) {
;// Nothing to do here. It is expected to fail if the bucket is not in the default region. Try on step 2.
} catch (Exception e) {
monitor.debug("Unexpected exception trying to get to the S3 default region: " + e.getMessage());
}

// Step 2: Iterate through all regions and test lightweight requests
return resolveRegionByTesting(bucketName, objectKey, monitor);
}

private static List<Region> getAwsRegions() {
// List of regions to exclude (e.g., isolated regions or restricted access regions)
List<Region> excludedRegions = List.of(
Region.US_ISO_EAST_1, // Restricted to isolated networks
Region.US_ISO_WEST_1, // Example of another restricted region
Region.CN_NORTH_1, // China regions may require special accounts
Region.CN_NORTHWEST_1
);

// Get the list of all AWS regions, excluding problematic ones
return Region.regions().stream()
.filter(region -> !excludedRegions.contains(region))
.collect(Collectors.toList());
}

private static Region resolveRegionByTesting(String bucketName, String objectKey, IMonitor monitor) {
// Get the list of all AWS regions
List<Region> regions = getAwsRegions();

// Iterate through regions to perform a lightweight test
for (Region region : regions) {
try (S3Client s3 = S3Client.builder()
.httpClientBuilder(ApacheHttpClient.builder())
.region(region)
.credentialsProvider(AnonymousCredentialsProvider.create())
.build()) {

// Perform a lightweight HEAD request to check if the object exists
HeadObjectRequest request = HeadObjectRequest.builder()
.bucket(bucketName)
.key(objectKey)
.build();

s3.headObject(request); // If no exception is thrown, the region is correct
monitor.debug("Bucket " + bucketName + " exists in the region " + region.id() + ".");
return region;
} catch (S3Exception e) {
// Continue testing other regions if the bucket is not found
continue;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This catch is very general, could it be interesting to write a warning to the log file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure about adding the logging class here, as the loop will fail most times. I could manage the exceptions in a more specialized way.

} catch (Exception e) {
monitor.debug("Unexpected exception trying to get to the S3 region " + region.id() + ": " + e.getMessage());
continue;
}
}

throw new KlabResourceAccessException("Unable to resolve region for bucket: " + bucketName);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package org.integratedmodelling.klab.test.utils;

import org.integratedmodelling.klab.api.runtime.monitoring.IMonitor;
import org.integratedmodelling.klab.exceptions.KlabResourceAccessException;
import org.integratedmodelling.klab.utils.s3.S3RegionResolver;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;

import software.amazon.awssdk.regions.Region;

public class S3RegionResolverTest {
@Test
public void resolveBucketRegion_resolutionSuccessful() {
String bucket = "deafrica-input-datasets";
String objectKey = "rainfall_chirps_monthly/chirps-v2.0_2024.03.tif";
IMonitor monitor = Mockito.mock(IMonitor.class);

Region ret = S3RegionResolver.resolveBucketRegion(bucket, objectKey, monitor);

Assertions.assertEquals(Region.AF_SOUTH_1, ret);
}

@Test
// Warning: a relatively costly test (sometimes over 30 seconds)
public void resolveBucketRegion_resolutionUnsuccessful() {
String bucket = "fake-bucket";
String objectKey = "fake-object.tif";
IMonitor monitor = Mockito.mock(IMonitor.class);

Assertions.assertThrows(KlabResourceAccessException.class, () -> {
S3RegionResolver.resolveBucketRegion(bucket, objectKey, monitor);
});
}

}