From 766d29e7cf195443559421a2eb0bae3453b7316d Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 6 Nov 2017 13:20:30 -0500 Subject: [PATCH] Correctly encode warning headers The warnings headers have a fairly limited set of valid characters (cf. quoted-text in RFC 7230). While we have assertions that we adhere to this set of valid characters ensuring that our warning messages do not violate the specificaion, we were neglecting the possibility that arbitrary user input would trickle into these warning headers. Thus, missing here was tests for these situations and encoding of characters that appear outside the set of valid characters. This commit addresses this by encoding any characters in a deprecation message that are not from the set of valid characters. Relates #27269 --- .../common/logging/DeprecationLogger.java | 94 ++++++++++++++++++- .../logging/DeprecationLoggerTests.java | 77 +++++++++++++-- .../common/logging/EvilLoggerTests.java | 5 +- .../org/elasticsearch/test/ESTestCase.java | 2 +- .../test/rest/yaml/section/DoSection.java | 2 +- 5 files changed, 165 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/common/logging/DeprecationLogger.java b/core/src/main/java/org/elasticsearch/common/logging/DeprecationLogger.java index 3ed1d9d30ac..1c559cf64fb 100644 --- a/core/src/main/java/org/elasticsearch/common/logging/DeprecationLogger.java +++ b/core/src/main/java/org/elasticsearch/common/logging/DeprecationLogger.java @@ -26,11 +26,14 @@ import org.elasticsearch.Version; import org.elasticsearch.common.SuppressLoggerChecks; import org.elasticsearch.common.util.concurrent.ThreadContext; +import java.io.CharArrayWriter; +import java.nio.charset.Charset; import java.time.ZoneId; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.time.format.SignStyle; +import java.util.BitSet; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -228,7 +231,7 @@ public class DeprecationLogger { public static Pattern WARNING_HEADER_PATTERN = Pattern.compile( "299 " + // warn code "Elasticsearch-\\d+\\.\\d+\\.\\d+(?:-(?:alpha|beta|rc)\\d+)?(?:-SNAPSHOT)?-(?:[a-f0-9]{7}|Unknown) " + // warn agent - "\"((?:\t| |!|[\\x23-\\x5b]|[\\x5d-\\x7e]|[\\x80-\\xff]|\\\\|\\\\\")*)\" " + // quoted warning value, captured + "\"((?:\t| |!|[\\x23-\\x5B]|[\\x5D-\\x7E]|[\\x80-\\xFF]|\\\\|\\\\\")*)\" " + // quoted warning value, captured // quoted RFC 1123 date format "\"" + // opening quote "(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun), " + // weekday @@ -304,7 +307,7 @@ public class DeprecationLogger { final String formattedMessage = LoggerMessageFormat.format(message, params); final String warningHeaderValue = formatWarning(formattedMessage); assert WARNING_HEADER_PATTERN.matcher(warningHeaderValue).matches(); - assert extractWarningValueFromWarningHeader(warningHeaderValue).equals(escape(formattedMessage)); + assert extractWarningValueFromWarningHeader(warningHeaderValue).equals(escapeAndEncode(formattedMessage)); while (iterator.hasNext()) { try { final ThreadContext next = iterator.next(); @@ -328,7 +331,17 @@ public class DeprecationLogger { * @return a warning value formatted according to RFC 7234 */ public static String formatWarning(final String s) { - return String.format(Locale.ROOT, WARNING_FORMAT, escape(s), RFC_7231_DATE_TIME.format(ZonedDateTime.now(GMT))); + return String.format(Locale.ROOT, WARNING_FORMAT, escapeAndEncode(s), RFC_7231_DATE_TIME.format(ZonedDateTime.now(GMT))); + } + + /** + * Escape and encode a string as a valid RFC 7230 quoted-string. + * + * @param s the string to escape and encode + * @return the escaped and encoded string + */ + public static String escapeAndEncode(final String s) { + return encode(escapeBackslashesAndQuotes(s)); } /** @@ -337,8 +350,81 @@ public class DeprecationLogger { * @param s the string to escape * @return the escaped string */ - public static String escape(String s) { + static String escapeBackslashesAndQuotes(final String s) { return s.replaceAll("([\"\\\\])", "\\\\$1"); } + private static BitSet doesNotNeedEncoding; + + static { + doesNotNeedEncoding = new BitSet(1 + 0xFF); + doesNotNeedEncoding.set('\t'); + doesNotNeedEncoding.set(' '); + doesNotNeedEncoding.set('!'); + doesNotNeedEncoding.set('\\'); + doesNotNeedEncoding.set('"'); + // we have to skip '%' which is 0x25 so that it is percent-encoded too + for (int i = 0x23; i <= 0x24; i++) { + doesNotNeedEncoding.set(i); + } + for (int i = 0x26; i <= 0x5B; i++) { + doesNotNeedEncoding.set(i); + } + for (int i = 0x5D; i <= 0x7E; i++) { + doesNotNeedEncoding.set(i); + } + for (int i = 0x80; i <= 0xFF; i++) { + doesNotNeedEncoding.set(i); + } + assert !doesNotNeedEncoding.get('%'); + } + + private static final Charset UTF_8 = Charset.forName("UTF-8"); + + /** + * Encode a string containing characters outside of the legal characters for an RFC 7230 quoted-string. + * + * @param s the string to encode + * @return the encoded string + */ + static String encode(final String s) { + final StringBuilder sb = new StringBuilder(s.length()); + boolean encodingNeeded = false; + for (int i = 0; i < s.length();) { + int current = (int) s.charAt(i); + /* + * Either the character does not need encoding or it does; when the character does not need encoding we append the character to + * a buffer and move to the next character and when the character does need encoding, we peel off as many characters as possible + * which we encode using UTF-8 until we encounter another character that does not need encoding. + */ + if (doesNotNeedEncoding.get(current)) { + // append directly and move to the next character + sb.append((char) current); + i++; + } else { + int startIndex = i; + do { + i++; + } while (i < s.length() && !doesNotNeedEncoding.get(s.charAt(i))); + + final byte[] bytes = s.substring(startIndex, i).getBytes(UTF_8); + // noinspection ForLoopReplaceableByForEach + for (int j = 0; j < bytes.length; j++) { + sb.append('%').append(hex(bytes[j] >> 4)).append(hex(bytes[j])); + } + encodingNeeded = true; + } + } + return encodingNeeded ? sb.toString() : s; + } + + private static char hex(int b) { + final char ch = Character.forDigit(b & 0xF, 16); + if (Character.isLetter(ch)) { + return Character.toUpperCase(ch); + } else { + return ch; + } + } + } diff --git a/core/src/test/java/org/elasticsearch/common/logging/DeprecationLoggerTests.java b/core/src/test/java/org/elasticsearch/common/logging/DeprecationLoggerTests.java index 3f2274321a2..fdb530749e1 100644 --- a/core/src/test/java/org/elasticsearch/common/logging/DeprecationLoggerTests.java +++ b/core/src/test/java/org/elasticsearch/common/logging/DeprecationLoggerTests.java @@ -23,11 +23,13 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.hamcrest.RegexMatcher; +import org.hamcrest.core.IsSame; import java.io.IOException; import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.stream.IntStream; @@ -71,6 +73,54 @@ public class DeprecationLoggerTests extends ESTestCase { } } + public void testContainingNewline() throws IOException { + try (ThreadContext threadContext = new ThreadContext(Settings.EMPTY)) { + final Set threadContexts = Collections.singleton(threadContext); + + logger.deprecated(threadContexts, "this message contains a newline\n"); + + final Map> responseHeaders = threadContext.getResponseHeaders(); + + assertThat(responseHeaders.size(), equalTo(1)); + final List responses = responseHeaders.get("Warning"); + assertThat(responses, hasSize(1)); + assertThat(responses.get(0), warningValueMatcher); + assertThat(responses.get(0), containsString("\"this message contains a newline%0A\"")); + } + } + + public void testSurrogatePair() throws IOException { + try (ThreadContext threadContext = new ThreadContext(Settings.EMPTY)) { + final Set threadContexts = Collections.singleton(threadContext); + + logger.deprecated(threadContexts, "this message contains a surrogate pair 😱"); + + final Map> responseHeaders = threadContext.getResponseHeaders(); + + assertThat(responseHeaders.size(), equalTo(1)); + final List responses = responseHeaders.get("Warning"); + assertThat(responses, hasSize(1)); + assertThat(responses.get(0), warningValueMatcher); + + // convert UTF-16 to UTF-8 by hand to show the hard-coded constant below is correct + assertThat("😱", equalTo("\uD83D\uDE31")); + final int code = 0x10000 + ((0xD83D & 0x3FF) << 10) + (0xDE31 & 0x3FF); + @SuppressWarnings("PointlessBitwiseExpression") + final int[] points = new int[] { + (code >> 18) & 0x07 | 0xF0, + (code >> 12) & 0x3F | 0x80, + (code >> 6) & 0x3F | 0x80, + (code >> 0) & 0x3F | 0x80}; + final StringBuilder sb = new StringBuilder(); + // noinspection ForLoopReplaceableByForEach + for (int i = 0; i < points.length; i++) { + sb.append("%").append(Integer.toString(points[i], 16).toUpperCase(Locale.ROOT)); + } + assertThat(sb.toString(), equalTo("%F0%9F%98%B1")); + assertThat(responses.get(0), containsString("\"this message contains a surrogate pair %F0%9F%98%B1\"")); + } + } + public void testAddsCombinedHeaderWithThreadContext() throws IOException { try (ThreadContext threadContext = new ThreadContext(Settings.EMPTY)) { final Set threadContexts = Collections.singleton(threadContext); @@ -172,15 +222,28 @@ public class DeprecationLoggerTests extends ESTestCase { assertThat(DeprecationLogger.extractWarningValueFromWarningHeader(first), equalTo(s)); } - public void testEscape() { - assertThat(DeprecationLogger.escape("\\"), equalTo("\\\\")); - assertThat(DeprecationLogger.escape("\""), equalTo("\\\"")); - assertThat(DeprecationLogger.escape("\\\""), equalTo("\\\\\\\"")); - assertThat(DeprecationLogger.escape("\"foo\\bar\""),equalTo("\\\"foo\\\\bar\\\"")); + public void testEscapeBackslashesAndQuotes() { + assertThat(DeprecationLogger.escapeBackslashesAndQuotes("\\"), equalTo("\\\\")); + assertThat(DeprecationLogger.escapeBackslashesAndQuotes("\""), equalTo("\\\"")); + assertThat(DeprecationLogger.escapeBackslashesAndQuotes("\\\""), equalTo("\\\\\\\"")); + assertThat(DeprecationLogger.escapeBackslashesAndQuotes("\"foo\\bar\""),equalTo("\\\"foo\\\\bar\\\"")); // test that characters other than '\' and '"' are left unchanged - String chars = "\t !" + range(0x23, 0x5b) + range(0x5d, 0x73) + range(0x80, 0xff); + String chars = "\t !" + range(0x23, 0x24) + range(0x26, 0x5b) + range(0x5d, 0x73) + range(0x80, 0xff); final String s = new CodepointSetGenerator(chars.toCharArray()).ofCodePointsLength(random(), 16, 16); - assertThat(DeprecationLogger.escape(s), equalTo(s)); + assertThat(DeprecationLogger.escapeBackslashesAndQuotes(s), equalTo(s)); + } + + public void testEncode() { + assertThat(DeprecationLogger.encode("\n"), equalTo("%0A")); + assertThat(DeprecationLogger.encode("😱"), equalTo("%F0%9F%98%B1")); + assertThat(DeprecationLogger.encode("福島深雪"), equalTo("%E7%A6%8F%E5%B3%B6%E6%B7%B1%E9%9B%AA")); + assertThat(DeprecationLogger.encode("100%\n"), equalTo("100%25%0A")); + // test that valid characters are left unchanged + String chars = "\t !" + range(0x23, 0x24) + range(0x26, 0x5b) + range(0x5d, 0x73) + range(0x80, 0xff) + '\\' + '"'; + final String s = new CodepointSetGenerator(chars.toCharArray()).ofCodePointsLength(random(), 16, 16); + assertThat(DeprecationLogger.encode(s), equalTo(s)); + // when no encoding is needed, the original string is returned (optimization) + assertThat(DeprecationLogger.encode(s), IsSame.sameInstance(s)); } private String range(int lowerInclusive, int upperInclusive) { diff --git a/qa/evil-tests/src/test/java/org/elasticsearch/common/logging/EvilLoggerTests.java b/qa/evil-tests/src/test/java/org/elasticsearch/common/logging/EvilLoggerTests.java index 97692e5ea6b..d4bc754689e 100644 --- a/qa/evil-tests/src/test/java/org/elasticsearch/common/logging/EvilLoggerTests.java +++ b/qa/evil-tests/src/test/java/org/elasticsearch/common/logging/EvilLoggerTests.java @@ -28,7 +28,6 @@ import org.apache.logging.log4j.core.appender.ConsoleAppender; import org.apache.logging.log4j.core.appender.CountingNoOpAppender; import org.apache.logging.log4j.core.config.Configurator; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.apache.lucene.util.Constants; import org.elasticsearch.cli.UserException; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.common.Randomness; @@ -165,7 +164,9 @@ public class EvilLoggerTests extends ESTestCase { final Set actualWarningValues = warnings.stream().map(DeprecationLogger::extractWarningValueFromWarningHeader).collect(Collectors.toSet()); for (int j = 0; j < 128; j++) { - assertThat(actualWarningValues, hasItem(DeprecationLogger.escape("This is a maybe logged deprecation message" + j))); + assertThat( + actualWarningValues, + hasItem(DeprecationLogger.escapeAndEncode("This is a maybe logged deprecation message" + j))); } try { diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index db43b5c9c59..e10411e5a43 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -341,7 +341,7 @@ public abstract class ESTestCase extends LuceneTestCase { final Set actualWarningValues = actualWarnings.stream().map(DeprecationLogger::extractWarningValueFromWarningHeader).collect(Collectors.toSet()); for (String msg : expectedWarnings) { - assertThat(actualWarningValues, hasItem(DeprecationLogger.escape(msg))); + assertThat(actualWarningValues, hasItem(DeprecationLogger.escapeAndEncode(msg))); } assertEquals("Expected " + expectedWarnings.length + " warnings but found " + actualWarnings.size() + "\nExpected: " + Arrays.asList(expectedWarnings) + "\nActual: " + actualWarnings, diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java index d509b6685a2..082040fb1eb 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java @@ -263,7 +263,7 @@ public class DoSection implements ExecutableSection { final List missing = new ArrayList<>(); // LinkedHashSet so that missing expected warnings come back in a predictable order which is nice for testing final Set expected = - new LinkedHashSet<>(expectedWarningHeaders.stream().map(DeprecationLogger::escape).collect(Collectors.toList())); + new LinkedHashSet<>(expectedWarningHeaders.stream().map(DeprecationLogger::escapeAndEncode).collect(Collectors.toList())); for (final String header : warningHeaders) { final Matcher matcher = WARNING_HEADER_PATTERN.matcher(header); final boolean matches = matcher.matches();