Improve warning value extraction performance in Response (#50208)

This commit improves the performance of warning value extraction in the
low-level REST client, and is similar to the approach taken in
#24114. There are some differences since the low-level REST client might
be connected to Elasticsearch through a proxy that injects its own
warnings.
This commit is contained in:
Darren Foong 2020-01-13 16:11:52 +00:00 committed by Jason Tedor
parent 1a32d7142a
commit d1bde0a718
No known key found for this signature in database
GPG Key ID: 8CF9C19984731E85
2 changed files with 92 additions and 5 deletions

View File

@ -116,6 +116,86 @@ public class Response {
"GMT" + // GMT
"\")?"); // closing quote (optional, since an older version can still send a warn-date)
/**
* Optimized regular expression to test if a string matches the RFC 1123 date
* format (with quotes and leading space). Start/end of line characters and
* atomic groups are used to prevent backtracking.
*/
private static final Pattern WARNING_HEADER_DATE_PATTERN = Pattern.compile(
"^ " + // start of line, leading space
// quoted RFC 1123 date format
"\"" + // opening quote
"(?>Mon|Tue|Wed|Thu|Fri|Sat|Sun), " + // day of week, atomic group to prevent backtracking
"\\d{2} " + // 2-digit day
"(?>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) " + // month, atomic group to prevent backtracking
"\\d{4} " + // 4-digit year
"\\d{2}:\\d{2}:\\d{2} " + // (two-digit hour):(two-digit minute):(two-digit second)
"GMT" + // GMT
"\"$"); // closing quote (optional, since an older version can still send a warn-date), end of line
/**
* Length of RFC 1123 format (with quotes and leading space), used in
* matchWarningHeaderPatternByPrefix(String).
*/
private static final int WARNING_HEADER_DATE_LENGTH = 0
+ 1
+ 1
+ 3 + 1 + 1
+ 2 + 1
+ 3 + 1
+ 4 + 1
+ 2 + 1 + 2 + 1 + 2 + 1
+ 3
+ 1;
/**
* Tests if a string matches the RFC 7234 specification for warning headers.
* This assumes that the warn code is always 299 and the warn agent is always
* Elasticsearch.
*
* @param s the value of a warning header formatted according to RFC 7234
* @return {@code true} if the input string matches the specification
*/
private static boolean matchWarningHeaderPatternByPrefix(final String s) {
return s.startsWith("299 Elasticsearch-");
}
/**
* Refer to org.elasticsearch.common.logging.DeprecationLogger
*/
private static String extractWarningValueFromWarningHeader(final String s) {
String warningHeader = s;
/*
* The following block tests for the existence of a RFC 1123 date in the warning header. If the date exists, it is removed for
* extractWarningValueFromWarningHeader(String) to work properly (as it does not handle dates).
*/
if (s.length() > WARNING_HEADER_DATE_LENGTH) {
final String possibleDateString = s.substring(s.length() - WARNING_HEADER_DATE_LENGTH);
final Matcher matcher = WARNING_HEADER_DATE_PATTERN.matcher(possibleDateString);
if (matcher.matches()) {
warningHeader = warningHeader.substring(0, s.length() - WARNING_HEADER_DATE_LENGTH);
}
}
final int firstQuote = warningHeader.indexOf('\"');
final int lastQuote = warningHeader.length() - 1;
final String warningValue = warningHeader.substring(firstQuote + 1, lastQuote);
assert assertWarningValue(s, warningValue);
return warningValue;
}
/**
* Refer to org.elasticsearch.common.logging.DeprecationLogger
*/
private static boolean assertWarningValue(final String s, final String warningValue) {
final Matcher matcher = WARNING_HEADER_PATTERN.matcher(s);
final boolean matches = matcher.matches();
assert matches;
return matcher.group(1).equals(warningValue);
}
/**
* Returns a list of all warning headers returned in the response.
*/
@ -123,9 +203,8 @@ public class Response {
List<String> warnings = new ArrayList<>();
for (Header header : response.getHeaders("Warning")) {
String warning = header.getValue();
final Matcher matcher = WARNING_HEADER_PATTERN.matcher(warning);
if (matcher.matches()) {
warnings.add(matcher.group(1));
if (matchWarningHeaderPatternByPrefix(warning)) {
warnings.add(extractWarningValueFromWarningHeader(warning));
} else {
warnings.add(warning);
}

View File

@ -424,6 +424,7 @@ public class RestClientSingleHostTests extends RestClientTestCase {
public void testDeprecationWarnings() throws Exception {
String chars = randomAsciiAlphanumOfLength(5);
assertDeprecationWarnings(singletonList("poorly formatted " + chars), singletonList("poorly formatted " + chars));
assertDeprecationWarnings(singletonList(formatWarningWithoutDate(chars)), singletonList(chars));
assertDeprecationWarnings(singletonList(formatWarning(chars)), singletonList(chars));
assertDeprecationWarnings(
Arrays.asList(formatWarning(chars), "another one", "and another"),
@ -433,6 +434,9 @@ public class RestClientSingleHostTests extends RestClientTestCase {
Arrays.asList("ignorable one", "and another"));
assertDeprecationWarnings(singletonList("exact"), singletonList("exact"));
assertDeprecationWarnings(Collections.<String>emptyList(), Collections.<String>emptyList());
String proxyWarning = "112 - \"network down\" \"Sat, 25 Aug 2012 23:34:45 GMT\"";
assertDeprecationWarnings(singletonList(proxyWarning), singletonList(proxyWarning));
}
private enum DeprecationWarningOption {
@ -518,9 +522,13 @@ public class RestClientSingleHostTests extends RestClientTestCase {
* Emulates Elasticsearch's DeprecationLogger.formatWarning in simple
* cases. We don't have that available because we're testing against 1.7.
*/
private static String formatWarning(String warningBody) {
private static String formatWarningWithoutDate(String warningBody) {
final String hash = new String(new byte[40], StandardCharsets.UTF_8).replace('\0', 'e');
return "299 Elasticsearch-1.2.2-SNAPSHOT-" + hash + " \"" + warningBody + "\" \"Mon, 01 Jan 2001 00:00:00 GMT\"";
return "299 Elasticsearch-1.2.2-SNAPSHOT-" + hash + " \"" + warningBody + "\"";
}
private static String formatWarning(String warningBody) {
return formatWarningWithoutDate(warningBody) + " \"Mon, 01 Jan 2001 00:00:00 GMT\"";
}
private HttpUriRequest performRandomRequest(String method) throws Exception {