From bb8a1551c3a4a8e3fa68611585cb872a7c1a0fe0 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 6 Feb 2015 01:08:32 -0800 Subject: [PATCH] HADOOP-11506. Configuration variable expansion regex expensive for long values. (Gera Shegalov via gera) (cherry picked from commit 644548f201743408904dfe24b9f5b515b2c96713) Conflicts: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java (cherry picked from commit 3d36d4737c160d7dc8829e9dd6b801ef6726c0c0) --- .../hadoop-common/CHANGES.txt | 3 + .../org/apache/hadoop/conf/Configuration.java | 100 ++++++++++++++++-- .../apache/hadoop/conf/TestConfiguration.java | 66 ++++++++++++ 3 files changed, 159 insertions(+), 10 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index bafd3bf90e5..6e563705a8a 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -13,6 +13,9 @@ Release 2.6.1 - UNRELEASED HADOOP-11238. Update the NameNode's Group Cache in the background when possible (Chris Li via Colin P. McCabe) + HADOOP-11506. Configuration variable expansion regex expensive for long + values. (Gera Shegalov via gera) + BUG FIXES HADOOP-11466: FastByteComparisons: do not use UNSAFE_COMPARER on the SPARC diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 12768656fe2..8967e50f8dc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -831,25 +831,101 @@ public class Configuration implements Iterable>, resources.add(resource); // add to resources reloadConfiguration(); } - - private static final Pattern VAR_PATTERN = - Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}"); private static final int MAX_SUBST = 20; + private static final int SUB_START_IDX = 0; + private static final int SUB_END_IDX = SUB_START_IDX + 1; + + /** + * This is a manual implementation of the following regex + * "\\$\\{[^\\}\\$\u0020]+\\}". It can be 15x more efficient than + * a regex matcher as demonstrated by HADOOP-11506. This is noticeable with + * Hadoop apps building on the assumption Configuration#get is an O(1) + * hash table lookup, especially when the eval is a long string. + * + * @param eval a string that may contain variables requiring expansion. + * @return a 2-element int array res such that + * eval.substring(res[0], res[1]) is "var" for the left-most occurrence of + * ${var} in eval. If no variable is found -1, -1 is returned. + */ + private static int[] findSubVariable(String eval) { + int[] result = {-1, -1}; + + int matchStart; + int leftBrace; + + // scanning for a brace first because it's less frequent than $ + // that can occur in nested class names + // + match_loop: + for (matchStart = 1, leftBrace = eval.indexOf('{', matchStart); + // minimum left brace position (follows '$') + leftBrace > 0 + // right brace of a smallest valid expression "${c}" + && leftBrace + "{c".length() < eval.length(); + leftBrace = eval.indexOf('{', matchStart)) { + int matchedLen = 0; + if (eval.charAt(leftBrace - 1) == '$') { + int subStart = leftBrace + 1; // after '{' + for (int i = subStart; i < eval.length(); i++) { + switch (eval.charAt(i)) { + case '}': + if (matchedLen > 0) { // match + result[SUB_START_IDX] = subStart; + result[SUB_END_IDX] = subStart + matchedLen; + break match_loop; + } + // fall through to skip 1 char + case ' ': + case '$': + matchStart = i + 1; + continue match_loop; + default: + matchedLen++; + } + } + // scanned from "${" to the end of eval, and no reset via ' ', '$': + // no match! + break match_loop; + } else { + // not a start of a variable + // + matchStart = leftBrace + 1; + } + } + return result; + } + + /** + * Attempts to repeatedly expand the value {@code expr} by replacing the + * left-most substring of the form "${var}" in the following precedence order + *
    + *
  1. by the value of the Java system property "var" if defined
  2. + *
  3. by the value of the configuration key "var" if defined
  4. + *
+ * + * If var is unbounded the current state of expansion "prefix${var}suffix" is + * returned. + * + * @param expr the literal value of a config key + * @return null if expr is null, otherwise the value resulting from expanding + * expr using the algorithm above. + * @throws IllegalArgumentException when more than + * {@link Configuration#MAX_SUBST} replacements are required + */ private String substituteVars(String expr) { if (expr == null) { return null; } - Matcher match = VAR_PATTERN.matcher(""); String eval = expr; - for(int s=0; s>, if (val == null) { return eval; // return literal ${var}: var is unbound } + final int dollar = varBounds[SUB_START_IDX] - "${".length(); + final int afterRightBrace = varBounds[SUB_END_IDX] + "}".length(); // substitute - eval = eval.substring(0, match.start())+val+eval.substring(match.end()); + eval = eval.substring(0, dollar) + + val + + eval.substring(afterRightBrace); } throw new IllegalStateException("Variable substitution depth too large: " + MAX_SUBST + " " + expr); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index 3bc8365e890..4d1414990b8 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -49,6 +49,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import static org.apache.hadoop.util.PlatformName.IBM_JAVA; +import static org.junit.Assert.fail; + import org.codehaus.jackson.map.ObjectMapper; public class TestConfiguration extends TestCase { @@ -1194,6 +1196,70 @@ public class TestConfiguration extends TestCase { } } + public void testInvalidSubstitutation() { + final Configuration configuration = new Configuration(false); + + // 2-var loops + // + final String key = "test.random.key"; + for (String keyExpression : Arrays.asList( + "${" + key + "}", + "foo${" + key + "}", + "foo${" + key + "}bar", + "${" + key + "}bar")) { + configuration.set(key, keyExpression); + checkSubDepthException(configuration, key); + } + + // + // 3-variable loops + // + + final String expVal1 = "${test.var2}"; + String testVar1 = "test.var1"; + configuration.set(testVar1, expVal1); + configuration.set("test.var2", "${test.var3}"); + configuration.set("test.var3", "${test.var1}"); + checkSubDepthException(configuration, testVar1); + + // 3-variable loop with non-empty value prefix/suffix + // + final String expVal2 = "foo2${test.var2}bar2"; + configuration.set(testVar1, expVal2); + configuration.set("test.var2", "foo3${test.var3}bar3"); + configuration.set("test.var3", "foo1${test.var1}bar1"); + checkSubDepthException(configuration, testVar1); + } + + private static void checkSubDepthException(Configuration configuration, + String key) { + try { + configuration.get(key); + fail("IllegalStateException depth too large not thrown"); + } catch (IllegalStateException e) { + assertTrue("Unexpected exception text: " + e, + e.getMessage().contains("substitution depth")); + } + } + + public void testIncompleteSubbing() { + Configuration configuration = new Configuration(false); + String key = "test.random.key"; + for (String keyExpression : Arrays.asList( + "{}", + "${}", + "{" + key, + "${" + key, + "foo${" + key, + "foo${" + key + "bar", + "foo{" + key + "}bar", + "${" + key + "bar")) { + configuration.set(key, keyExpression); + String value = configuration.get(key); + assertTrue("Unexpected value " + value, value.equals(keyExpression)); + } + } + public void testGetClassByNameOrNull() throws Exception { Configuration config = new Configuration(); Class clazz = config.getClassByNameOrNull("java.lang.Object");