Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Java: Fix scriptKill Flaky Tests #3364

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
10f66bd
wip
edlng Mar 14, 2025
2e161cf
(wip) small fix and cleaned up code
edlng Mar 14, 2025
005e6f3
(wip) maybe fixed unkillable flaky test
edlng Mar 14, 2025
800756e
(wip) fix lint and added details
edlng Mar 14, 2025
633e145
removed prints and used easier dummy command
edlng Mar 14, 2025
7cbe05f
removed resetting timeouts and added thread sleeps instead
edlng Mar 14, 2025
2e4e042
Merge branch 'main' into java/fix-script-kill-tests
edlng Mar 14, 2025
2be158b
added extra timeout check before rerunning script
edlng Mar 14, 2025
4e1ab13
repeat tests
edlng Mar 17, 2025
4f1c9b1
temporarily bump timeout for flaky tests
edlng Mar 17, 2025
326d6f9
(wip) added more debug logs
edlng Mar 17, 2025
bc255d0
(wip) added more debug logs
edlng Mar 17, 2025
c4a813b
(wip) even more debugging to test flakiness
edlng Mar 17, 2025
0900737
(wip) bump GA timeout more to ensure flakiness doesn't arise in later…
edlng Mar 17, 2025
6109a84
(wip) remove prints, rerun tests
edlng Mar 18, 2025
204a423
run unkillable
edlng Mar 18, 2025
6931951
retesting script kill with less timeout
edlng Mar 18, 2025
a9d42ae
make test run once
edlng Mar 18, 2025
89e3c0a
Merge branch 'main' into java/fix-script-kill-tests
edlng Mar 18, 2025
8606747
Merge branch 'main' into java/fix-script-kill-tests
edlng Mar 19, 2025
e01de61
Merge branch 'main' into java/fix-script-kill-tests
edlng Mar 19, 2025
c01f396
added more checking
edlng Mar 19, 2025
e13077a
Revert "added more checking"
edlng Mar 19, 2025
c05254c
fix lint
edlng Mar 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 91 additions & 10 deletions java/integTest/src/test/java/glide/standalone/CommandTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@
import java.util.stream.Stream;
import lombok.SneakyThrows;
import org.apache.commons.lang3.ArrayUtils;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
Expand Down Expand Up @@ -1700,11 +1699,11 @@ public void scriptFlush(GlideClient regularClient) {
script.close();
}

@Disabled("flaky test: re-enable once fixed")
@ParameterizedTest
@MethodSource("getClients")
@SneakyThrows
public void scriptKill(GlideClient regularClient) {

// Verify that script_kill raises an error when no script is running
ExecutionException executionException =
assertThrows(ExecutionException.class, () -> regularClient.scriptKill().get());
Expand All @@ -1715,27 +1714,76 @@ public void scriptKill(GlideClient regularClient) {
.toLowerCase()
.contains("no scripts in execution right now"));

CompletableFuture<Object> promise = new CompletableFuture<>();
promise.complete(null);

// create and load a long-running script
Script script = new Script(createLongRunningLuaScript(5, true), true);
Script script = new Script(createLongRunningLuaScript(6, true), true);

try (var testClient =
GlideClient.createClient(commonClientConfig().requestTimeout(10000).build()).get()) {
try {
testClient.invokeScript(script);

Thread.sleep(1000);

// Wait until script runs
int timeout = 4000; // ms
while (timeout >= 0) {
try {
regularClient.ping().get(); // Dummy test command
} catch (ExecutionException err) {
if (err.getCause() instanceof RequestException) {

// Check if the script is executing
if (err.getMessage().toLowerCase().contains("valkey is busy running a script")) {
break;
}

// Try rerunning the script if 2 seconds have passed and if the exception has not
// changed to "busy running a script"
if (timeout <= 2000
&& err.getMessage().toLowerCase().contains("no scripts in execution right now")) {
testClient.invokeScript(script);
Thread.sleep(1000);
}
}
}
timeout -= 500;
}

// Run script kill until it returns OK
boolean scriptKilled = false;
int timeout = 4000; // ms
timeout = 4000; // ms
while (timeout >= 0) {
try {
assertEquals(OK, regularClient.scriptKill().get());
scriptKilled = true;
break;
} catch (ExecutionException exception) {
// If 2s passed and exception still says "no scripts in execution right now",
// rerun script.
if (timeout <= 2000
&& exception.getCause() instanceof RequestException
&& exception
.getMessage()
.toLowerCase()
.contains("no scripts in execution right now")) {
testClient.invokeScript(script);
Thread.sleep(1000);

// Wait until script runs
int scriptTimeout = 2000; // ms
while (scriptTimeout >= 0) {
try {
regularClient.ping().get(); // Dummy test
} catch (ExecutionException err) {
if (err.getCause() instanceof RequestException
&& err.getMessage()
.toLowerCase()
.contains("valkey is busy running a script")) {
break;
}
}
scriptTimeout -= 500;
}
}
} catch (RequestException ignored) {
}
Thread.sleep(500);
Expand All @@ -1760,7 +1808,6 @@ public void scriptKill(GlideClient regularClient) {
.contains("no scripts in execution right now"));
}

@Disabled("flaky test: re-enable once fixed")
@SneakyThrows
@ParameterizedTest
@MethodSource("getClients")
Expand All @@ -1780,8 +1827,33 @@ public void scriptKill_unkillable(GlideClient regularClient) {

Thread.sleep(1000);

boolean foundUnkillable = false;
// To prevent timeout issues, ensure script is actually running before trying to kill it
int timeout = 4000; // ms
while (timeout >= 0) {
try {
regularClient.ping().get(); // Dummy test command
} catch (ExecutionException err) {
if (err.getCause() instanceof RequestException) {

// Check if the script is executing
if (err.getMessage().toLowerCase().contains("valkey is busy running a script")) {
break;
}

// Try rerunning the script if 2 seconds have passed and if the exception has not
// changed to "busy running a script"
if (timeout <= 2000
&& err.getMessage().toLowerCase().contains("no scripts in execution right now")) {
promise = testClient.invokeScript(script, ScriptOptions.builder().key(key).build());
Thread.sleep(1000);
}
}
}
timeout -= 500;
}

boolean foundUnkillable = false;
timeout = 4000;
while (timeout >= 0) {
try {
// valkey kills a script with 5 sec delay
Expand All @@ -1795,6 +1867,15 @@ public void scriptKill_unkillable(GlideClient regularClient) {
foundUnkillable = true;
break;
}

if (execException.getCause() instanceof RequestException
&& execException
.getMessage()
.toLowerCase()
.contains("no scripts in execution right now")) {
promise = testClient.invokeScript(script, ScriptOptions.builder().key(key).build());
Thread.sleep(1000);
}
}
Thread.sleep(500);
timeout -= 500;
Expand Down
Loading