From 3c235bb7b4321df8923eca694f7c027de99dab34 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 8 Jan 2024 07:53:08 -0500 Subject: [PATCH] LockVerifyServer does not need to reuse addresses nor set accept timeout (#12535) --- .../apache/lucene/store/LockStressTest.java | 3 +- .../apache/lucene/store/LockVerifyServer.java | 3 +- .../lucene/store/TestStressLockFactories.java | 122 ++++++++++++++---- 3 files changed, 96 insertions(+), 32 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/store/LockStressTest.java b/lucene/core/src/java/org/apache/lucene/store/LockStressTest.java index 1388f633b0a..65ccefb452d 100644 --- a/lucene/core/src/java/org/apache/lucene/store/LockStressTest.java +++ b/lucene/core/src/java/org/apache/lucene/store/LockStressTest.java @@ -103,8 +103,7 @@ public class LockStressTest { System.out.println( "Connecting to server " + addr + " and registering as client " + myID + "..."); try (Socket socket = new Socket()) { - socket.setReuseAddress(true); - socket.connect(addr, 500); + socket.connect(addr, 3000); // wait at most 3 seconds to successfully connect, else fail final OutputStream out = socket.getOutputStream(); final InputStream in = socket.getInputStream(); diff --git a/lucene/core/src/java/org/apache/lucene/store/LockVerifyServer.java b/lucene/core/src/java/org/apache/lucene/store/LockVerifyServer.java index 4daac89e777..c45b1d1c336 100644 --- a/lucene/core/src/java/org/apache/lucene/store/LockVerifyServer.java +++ b/lucene/core/src/java/org/apache/lucene/store/LockVerifyServer.java @@ -43,8 +43,7 @@ public class LockVerifyServer { static void run(String hostname, int maxClients, Consumer startClients) throws Exception { try (final ServerSocket s = new ServerSocket()) { - s.setReuseAddress(true); - s.setSoTimeout(30000); // initially 30 secs to give clients enough time to startup + s.setSoTimeout(30000); // give clients at most 30 secs to connect and send bytes s.bind(new InetSocketAddress(hostname, 0)); final InetSocketAddress localAddr = (InetSocketAddress) s.getLocalSocketAddress(); System.out.println("Listening on " + localAddr + "..."); diff --git a/lucene/core/src/test/org/apache/lucene/store/TestStressLockFactories.java b/lucene/core/src/test/org/apache/lucene/store/TestStressLockFactories.java index 3a3c5b1cb54..173dc04ffe8 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestStressLockFactories.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestStressLockFactories.java @@ -18,6 +18,8 @@ package org.apache.lucene.store; import java.io.IOException; import java.lang.ProcessBuilder.Redirect; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; @@ -40,6 +42,22 @@ public class TestStressLockFactories extends LuceneTestCase { } } + private String readIfExists(Path path) throws IOException { + if (Files.exists(path)) { + return new String(Files.readAllBytes(path), StandardCharsets.UTF_8); + } else { + return null; + } + } + + private String readClientStdout(Path dir, int client) throws IOException { + return readIfExists(dir.resolve("out-" + client + ".txt")); + } + + private String readClientStderr(Path dir, int client) throws IOException { + return readIfExists(dir.resolve("err-" + client + ".txt")); + } + private void runImpl(Class impl) throws Exception { final int clients = TEST_NIGHTLY ? 5 : 2; final String host = "127.0.0.1"; @@ -50,41 +68,89 @@ public class TestStressLockFactories extends LuceneTestCase { final List processes = new ArrayList<>(clients); - LockVerifyServer.run( - host, - clients, - addr -> { - // spawn clients as separate Java processes - for (int i = 0; i < clients; i++) { - try { - List args = new ArrayList<>(); - args.add(Paths.get(System.getProperty("java.home"), "bin", "java").toString()); - args.addAll(getJvmForkArguments()); - args.addAll( - List.of( - "-Xmx32M", - LockStressTest.class.getName(), - Integer.toString(i), - addr.getHostString(), - Integer.toString(addr.getPort()), - impl.getName(), - dir.toString(), - Integer.toString(delay), - Integer.toString(rounds))); + try { + LockVerifyServer.run( + host, + clients, + addr -> { + // spawn clients as separate Java processes + for (int i = 0; i < clients; i++) { + try { + List args = new ArrayList<>(); + args.add(Paths.get(System.getProperty("java.home"), "bin", "java").toString()); + args.addAll(getJvmForkArguments()); + args.addAll( + List.of( + "-Xmx32M", + LockStressTest.class.getName(), + Integer.toString(i), + addr.getHostString(), + Integer.toString(addr.getPort()), + impl.getName(), + dir.toString(), + Integer.toString(delay), + Integer.toString(rounds))); - processes.add(applyRedirection(new ProcessBuilder(args), i, dir).start()); - } catch (IOException ioe) { - throw new AssertionError("Failed to start child process.", ioe); + processes.add(applyRedirection(new ProcessBuilder(args), i, dir).start()); + } catch (IOException ioe) { + throw new AssertionError("Failed to start child process.", ioe); + } } - } - }); + }); + } catch (Exception e) { + System.err.println("Server failed"); + int client = 0; + for (Process p : processes) { + System.err.println("stderr for " + p.pid() + ":\n" + readClientStderr(dir, client)); + System.err.println("stdout for " + p.pid() + ":\n" + readClientStdout(dir, client)); + client++; + } + + throw e; + } // wait for all processes to exit... try { + int client = 0; for (Process p : processes) { - if (p.waitFor(15, TimeUnit.SECONDS)) { - assertEquals("Process " + p.pid() + " died abnormally?", 0, p.waitFor()); + int clientTimeoutSeconds = 15; + + boolean doFail = false; + String reason = null; + + if (p.waitFor(clientTimeoutSeconds, TimeUnit.SECONDS)) { + // process finished within our 15 second wait period; get its exit code + int exitCode = p.waitFor(); + if (exitCode != 0) { + doFail = true; + reason = + "Process " + + p.pid() + + " (client " + + client + + ") exited abnormally: exit code " + + exitCode; + } + } else { + doFail = true; + reason = + "Process " + + p.pid() + + " (client " + + client + + ") did not finish within " + + clientTimeoutSeconds + + " second timeout"; } + + if (doFail) { + System.err.println(reason); + System.err.println("stderr for " + p.pid() + ":\n" + readClientStderr(dir, client)); + System.err.println("stdout for " + p.pid() + ":\n" + readClientStdout(dir, client)); + fail(reason); + } + + client++; } } finally { // kill all processes, which are still alive.