From 3d36d4737c160d7dc8829e9dd6b801ef6726c0c0 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 6 Feb 2015 01:08:32 -0800 Subject: [PATCH] HADOOP-11506. Configuration variable expansion regex expensive for long values. (Gera Shegalov via gera) (cherry picked from commit 644548f201743408904dfe24b9f5b515b2c96713) Conflicts: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java --- .../hadoop-common/CHANGES.txt | 3 + .../org/apache/hadoop/conf/Configuration.java | 100 ++++++++++++++++-- .../apache/hadoop/conf/TestConfiguration.java | 66 ++++++++++++ 3 files changed, 159 insertions(+), 10 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 4b0faf46ff8..41efde07246 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -192,6 +192,9 @@ Release 2.7.0 - UNRELEASED HADOOP-11188. hadoop-azure: automatically expand page blobs when they become full. (Eric Hanson via cnauroth) + HADOOP-11506. Configuration variable expansion regex expensive for long + values. (Gera Shegalov via gera) + BUG FIXES HADOOP-11488. Difference in default connection timeout for S3A FS diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 927d4b67ffa..4271c11808e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -844,25 +844,101 @@ private synchronized void addResourceObject(Resource resource) { resources.add(resource); // add to resources reloadConfiguration(); } - - private static final Pattern VAR_PATTERN = - Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}"); private static final int MAX_SUBST = 20; + private static final int SUB_START_IDX = 0; + private static final int SUB_END_IDX = SUB_START_IDX + 1; + + /** + * This is a manual implementation of the following regex + * "\\$\\{[^\\}\\$\u0020]+\\}". It can be 15x more efficient than + * a regex matcher as demonstrated by HADOOP-11506. This is noticeable with + * Hadoop apps building on the assumption Configuration#get is an O(1) + * hash table lookup, especially when the eval is a long string. + * + * @param eval a string that may contain variables requiring expansion. + * @return a 2-element int array res such that + * eval.substring(res[0], res[1]) is "var" for the left-most occurrence of + * ${var} in eval. If no variable is found -1, -1 is returned. + */ + private static int[] findSubVariable(String eval) { + int[] result = {-1, -1}; + + int matchStart; + int leftBrace; + + // scanning for a brace first because it's less frequent than $ + // that can occur in nested class names + // + match_loop: + for (matchStart = 1, leftBrace = eval.indexOf('{', matchStart); + // minimum left brace position (follows '$') + leftBrace > 0 + // right brace of a smallest valid expression "${c}" + && leftBrace + "{c".length() < eval.length(); + leftBrace = eval.indexOf('{', matchStart)) { + int matchedLen = 0; + if (eval.charAt(leftBrace - 1) == '$') { + int subStart = leftBrace + 1; // after '{' + for (int i = subStart; i < eval.length(); i++) { + switch (eval.charAt(i)) { + case '}': + if (matchedLen > 0) { // match + result[SUB_START_IDX] = subStart; + result[SUB_END_IDX] = subStart + matchedLen; + break match_loop; + } + // fall through to skip 1 char + case ' ': + case '$': + matchStart = i + 1; + continue match_loop; + default: + matchedLen++; + } + } + // scanned from "${" to the end of eval, and no reset via ' ', '$': + // no match! + break match_loop; + } else { + // not a start of a variable + // + matchStart = leftBrace + 1; + } + } + return result; + } + + /** + * Attempts to repeatedly expand the value {@code expr} by replacing the + * left-most substring of the form "${var}" in the following precedence order + *
    + *
  1. by the value of the Java system property "var" if defined
  2. + *
  3. by the value of the configuration key "var" if defined
  4. + *
+ * + * If var is unbounded the current state of expansion "prefix${var}suffix" is + * returned. + * + * @param expr the literal value of a config key + * @return null if expr is null, otherwise the value resulting from expanding + * expr using the algorithm above. + * @throws IllegalArgumentException when more than + * {@link Configuration#MAX_SUBST} replacements are required + */ private String substituteVars(String expr) { if (expr == null) { return null; } - Matcher match = VAR_PATTERN.matcher(""); String eval = expr; - for(int s=0; s clazz = config.getClassByNameOrNull("java.lang.Object");