From 742a05261905975052558aec4794e9a533643c50 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 10 Nov 2017 15:14:28 +0000 Subject: [PATCH] [ML] Account for the possibility of C++ log messages being UTF-16 (elastic/x-pack-elasticsearch#2952) On Windows, log4cxx always writes to stderr in UTF-16, and we get the logs from C++ to Java by redirecting stderr to our named pipe. Hence the log handler in Java needs to tolerate the log stream it's reading being either UTF-16 (for Windows) or UTF-8 (for other platforms). Fixes elastic/machine-learning-cpp#385 Original commit: elastic/x-pack-elasticsearch@89237d71255558c7b4452d6c7b8ae24bc5d1aa32 --- .../xpack/ml/job/process/logging/CppLogMessageHandler.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java index d0948dee88f..c57096e0e60 100644 --- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java +++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java @@ -205,6 +205,12 @@ public class CppLogMessageHandler implements Closeable { parseMessage(xContent, bytesRef.slice(from, nextMarker - from)); } from = nextMarker + 1; + if (from < bytesRef.length() && bytesRef.get(from) == (byte) 0) { + // This is to work around the problem of log4cxx on Windows + // outputting UTF-16 instead of UTF-8. For full details see + // https://github.com/elastic/machine-learning-cpp/issues/385 + ++from; + } } if (from >= bytesRef.length()) { return null;