From d893041634b91ced00736e6b88d0f76c4f647b0f Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Fri, 18 May 2018 07:46:52 +0200 Subject: [PATCH] [ML] add version information in case of crash of native ML process (#30674) This change adds version information in case a native ML process crashes, the version is important for choosing the right symbol files when analyzing the crash. Adding the version combines all necessary information on one line. relates elastic/ml-cpp#94 --- .../ml/job/process/NativeController.java | 17 +------- .../process/logging/CppLogMessageHandler.java | 39 ++++++++++++++++++- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/NativeController.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/NativeController.java index a45f44c2276..43c3f4825dd 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/NativeController.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/NativeController.java @@ -22,8 +22,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeoutException; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** @@ -84,20 +82,7 @@ public class NativeController { } public Map getNativeCodeInfo() throws TimeoutException { - String copyrightMessage = cppLogHandler.getCppCopyright(CONTROLLER_CONNECT_TIMEOUT); - Matcher matcher = Pattern.compile("Version (.+) \\(Build ([^)]+)\\) Copyright ").matcher(copyrightMessage); - if (matcher.find()) { - Map info = new HashMap<>(2); - info.put("version", matcher.group(1)); - info.put("build_hash", matcher.group(2)); - return info; - } else { - // If this happens it probably means someone has changed the format in lib/ver/CBuildInfo.cc - // in the machine-learning-cpp repo without changing the pattern above to match - String msg = "Unexpected native controller process copyright format: " + copyrightMessage; - LOGGER.error(msg); - throw new ElasticsearchException(msg); - } + return cppLogHandler.getNativeCodeInfo(CONTROLLER_CONNECT_TIMEOUT); } public void startProcess(List command) throws IOException { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java index ddafc36416b..af0f199dd0c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java @@ -8,7 +8,7 @@ package org.elasticsearch.xpack.ml.job.process.logging; import org.apache.logging.log4j.Level; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.elasticsearch.common.ParsingException; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; @@ -30,10 +30,15 @@ import java.time.Duration; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.Deque; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; import java.util.Objects; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Handle a stream of C++ log messages that arrive via a named pipe in JSON format. @@ -181,6 +186,26 @@ public class CppLogMessageHandler implements Closeable { return cppCopyright; } + /** + * Extracts version information from the copyright string which assumes a certain format. + */ + public Map getNativeCodeInfo(Duration timeout) throws TimeoutException { + String copyrightMessage = getCppCopyright(timeout); + Matcher matcher = Pattern.compile("Version (.+) \\(Build ([^)]+)\\) Copyright ").matcher(copyrightMessage); + if (matcher.find()) { + Map info = new HashMap<>(2); + info.put("version", matcher.group(1)); + info.put("build_hash", matcher.group(2)); + return info; + } else { + // If this happens it probably means someone has changed the format in lib/ver/CBuildInfo.cc + // in the ml-cpp repo without changing the pattern above to match + String msg = "Unexpected native process copyright format: " + copyrightMessage; + LOGGER.error(msg); + throw new ElasticsearchException(msg); + } + } + /** * Expected to be called very infrequently. */ @@ -281,8 +306,18 @@ public class CppLogMessageHandler implements Closeable { } catch (XContentParseException e) { String upstreamMessage = "Fatal error: '" + bytesRef.utf8ToString() + "'"; if (upstreamMessage.contains("bad_alloc")) { - upstreamMessage += ", process ran out of memory."; + upstreamMessage += ", process ran out of memory"; } + + // add version information, so it's conveniently next to the crash log + upstreamMessage += ", version: "; + try { + Map versionInfo = getNativeCodeInfo(Duration.ofMillis(10)); + upstreamMessage += String.format(Locale.ROOT, "%s (build %s)", versionInfo.get("version"), versionInfo.get("build_hash")); + } catch (TimeoutException timeoutException) { + upstreamMessage += "failed to retrieve"; + } + storeError(upstreamMessage); seenFatalError = true; } catch (IOException e) {