-
Notifications
You must be signed in to change notification settings - Fork 108
Correct offloading of large batch entries to s3. Closes #154 #155
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,14 +22,18 @@ | |
import static com.amazon.sqs.javamessaging.AmazonSQSExtendedClientUtil.getReservedAttributeNameIfPresent; | ||
import static com.amazon.sqs.javamessaging.AmazonSQSExtendedClientUtil.isLarge; | ||
import static com.amazon.sqs.javamessaging.AmazonSQSExtendedClientUtil.isS3ReceiptHandle; | ||
import static com.amazon.sqs.javamessaging.AmazonSQSExtendedClientUtil.sizeOf; | ||
import static com.amazon.sqs.javamessaging.AmazonSQSExtendedClientUtil.updateMessageAttributePayloadSize; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Comparator; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.UUID; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.IntStream; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
@@ -73,6 +77,7 @@ | |
import software.amazon.awssdk.services.sqs.model.SendMessageResponse; | ||
import software.amazon.awssdk.services.sqs.model.SqsException; | ||
import software.amazon.awssdk.services.sqs.model.TooManyEntriesInBatchRequestException; | ||
import software.amazon.awssdk.utils.Pair; | ||
import software.amazon.awssdk.utils.StringUtils; | ||
import software.amazon.payloadoffloading.PayloadStore; | ||
import software.amazon.payloadoffloading.S3BackedPayloadStore; | ||
|
@@ -616,23 +621,38 @@ public SendMessageBatchResponse sendMessageBatch(SendMessageBatchRequest sendMes | |
return super.sendMessageBatch(sendMessageBatchRequest); | ||
} | ||
|
||
List<SendMessageBatchRequestEntry> batchEntries = new ArrayList<>(sendMessageBatchRequest.entries().size()); | ||
List<SendMessageBatchRequestEntry> originalEntries = sendMessageBatchRequest.entries(); | ||
ArrayList<SendMessageBatchRequestEntry> alteredEntries = new ArrayList<>(originalEntries.size()); | ||
alteredEntries.addAll(originalEntries); | ||
|
||
// Batch entry sizes order by size | ||
List<Pair<Integer, Long>> entrySizes = IntStream.range(0, originalEntries.size()) | ||
.boxed() | ||
.map(i -> Pair.of(i, sizeOf(originalEntries.get(i)))) | ||
.sorted((p1, p2) -> Long.compare(p2.right(), p1.right())) | ||
.collect(Collectors.toList()); | ||
|
||
long totalSize = entrySizes.stream().map(Pair::right).mapToLong(Long::longValue).sum(); | ||
|
||
// Move messages to s3 starting from the largest until total size is under the threshold if needed | ||
boolean hasS3Entries = false; | ||
for (SendMessageBatchRequestEntry entry : sendMessageBatchRequest.entries()) { | ||
//Check message attributes for ExtendedClient related constraints | ||
checkMessageAttributes(clientConfiguration.getPayloadSizeThreshold(), entry.messageAttributes()); | ||
|
||
if (clientConfiguration.isAlwaysThroughS3() | ||
|| isLarge(clientConfiguration.getPayloadSizeThreshold(), entry)) { | ||
entry = storeMessageInS3(entry); | ||
hasS3Entries = true; | ||
for (Pair<Integer, Long> pair : entrySizes) { | ||
// Verify that total size of batch request is within limits | ||
if (totalSize <= clientConfiguration.getPayloadSizeThreshold() && !clientConfiguration.isAlwaysThroughS3()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we are checking the total size, does it make sense to move the if condition outside of the for loop? If it is true, we can throw an exception, else, we will keep add entry to s3 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The overall idea described in comment above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ziyanli-amazon the idea is to offload as few entries as possible, as each offload is a roundtrip against S3. So offloading goes from biggest entries to smallest until batch fits into size limit. |
||
break; | ||
} | ||
batchEntries.add(entry); | ||
Integer entryIndex = pair.left(); | ||
Long originalEntrySize = pair.right(); | ||
SendMessageBatchRequestEntry originalEntry = originalEntries.get(entryIndex); | ||
checkMessageAttributes(clientConfiguration.getPayloadSizeThreshold(), originalEntry.messageAttributes()); | ||
SendMessageBatchRequestEntry alteredEntry = storeMessageInS3(originalEntry); | ||
totalSize = totalSize - originalEntrySize + sizeOf(alteredEntry); | ||
alteredEntries.set(entryIndex, alteredEntry); | ||
hasS3Entries = true; | ||
} | ||
|
||
if (hasS3Entries) { | ||
sendMessageBatchRequest = sendMessageBatchRequest.toBuilder().entries(batchEntries).build(); | ||
sendMessageBatchRequest = sendMessageBatchRequest.toBuilder().entries(alteredEntries).build(); | ||
} | ||
|
||
return super.sendMessageBatch(sendMessageBatchRequest); | ||
|
@@ -896,6 +916,6 @@ private static <T extends AwsRequest.Builder> T appendUserAgent(final T builder) | |
public void close() { | ||
super.close(); | ||
this.clientConfiguration.getS3Client().close(); | ||
} | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what's the purpose of sorting here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The overall idea here is that we want to offload the least amount of batch entries possible. Less AWS calls means less time spent on roundtrips.
So we sort by size, and after that we start moving entries from largest to smallest, stoping at the moment
totalSize
fits the limit.We only offload according to the sort order, and preserve original order of entries in resulting batch request, as it is important for fifo use-case.