HADOOP-11506. Configuration variable expansion regex expensive for long values. (Gera Shegalov via gera)
This commit is contained in:
parent
02f154a001
commit
644548f201
|
@ -573,6 +573,9 @@ Release 2.7.0 - UNRELEASED
|
||||||
HADOOP-11188. hadoop-azure: automatically expand page blobs when they become
|
HADOOP-11188. hadoop-azure: automatically expand page blobs when they become
|
||||||
full. (Eric Hanson via cnauroth)
|
full. (Eric Hanson via cnauroth)
|
||||||
|
|
||||||
|
HADOOP-11506. Configuration variable expansion regex expensive for long
|
||||||
|
values. (Gera Shegalov via gera)
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
HADOOP-11488. Difference in default connection timeout for S3A FS
|
HADOOP-11488. Difference in default connection timeout for S3A FS
|
||||||
|
|
|
@ -846,30 +846,105 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
|
||||||
reloadConfiguration();
|
reloadConfiguration();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Pattern VAR_PATTERN =
|
|
||||||
Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
|
|
||||||
|
|
||||||
private static final int MAX_SUBST = 20;
|
private static final int MAX_SUBST = 20;
|
||||||
|
|
||||||
|
private static final int SUB_START_IDX = 0;
|
||||||
|
private static final int SUB_END_IDX = SUB_START_IDX + 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a manual implementation of the following regex
|
||||||
|
* "\\$\\{[^\\}\\$\u0020]+\\}". It can be 15x more efficient than
|
||||||
|
* a regex matcher as demonstrated by HADOOP-11506. This is noticeable with
|
||||||
|
* Hadoop apps building on the assumption Configuration#get is an O(1)
|
||||||
|
* hash table lookup, especially when the eval is a long string.
|
||||||
|
*
|
||||||
|
* @param eval a string that may contain variables requiring expansion.
|
||||||
|
* @return a 2-element int array res such that
|
||||||
|
* eval.substring(res[0], res[1]) is "var" for the left-most occurrence of
|
||||||
|
* ${var} in eval. If no variable is found -1, -1 is returned.
|
||||||
|
*/
|
||||||
|
private static int[] findSubVariable(String eval) {
|
||||||
|
int[] result = {-1, -1};
|
||||||
|
|
||||||
|
int matchStart;
|
||||||
|
int leftBrace;
|
||||||
|
|
||||||
|
// scanning for a brace first because it's less frequent than $
|
||||||
|
// that can occur in nested class names
|
||||||
|
//
|
||||||
|
match_loop:
|
||||||
|
for (matchStart = 1, leftBrace = eval.indexOf('{', matchStart);
|
||||||
|
// minimum left brace position (follows '$')
|
||||||
|
leftBrace > 0
|
||||||
|
// right brace of a smallest valid expression "${c}"
|
||||||
|
&& leftBrace + "{c".length() < eval.length();
|
||||||
|
leftBrace = eval.indexOf('{', matchStart)) {
|
||||||
|
int matchedLen = 0;
|
||||||
|
if (eval.charAt(leftBrace - 1) == '$') {
|
||||||
|
int subStart = leftBrace + 1; // after '{'
|
||||||
|
for (int i = subStart; i < eval.length(); i++) {
|
||||||
|
switch (eval.charAt(i)) {
|
||||||
|
case '}':
|
||||||
|
if (matchedLen > 0) { // match
|
||||||
|
result[SUB_START_IDX] = subStart;
|
||||||
|
result[SUB_END_IDX] = subStart + matchedLen;
|
||||||
|
break match_loop;
|
||||||
|
}
|
||||||
|
// fall through to skip 1 char
|
||||||
|
case ' ':
|
||||||
|
case '$':
|
||||||
|
matchStart = i + 1;
|
||||||
|
continue match_loop;
|
||||||
|
default:
|
||||||
|
matchedLen++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// scanned from "${" to the end of eval, and no reset via ' ', '$':
|
||||||
|
// no match!
|
||||||
|
break match_loop;
|
||||||
|
} else {
|
||||||
|
// not a start of a variable
|
||||||
|
//
|
||||||
|
matchStart = leftBrace + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts to repeatedly expand the value {@code expr} by replacing the
|
||||||
|
* left-most substring of the form "${var}" in the following precedence order
|
||||||
|
* <ol>
|
||||||
|
* <li>by the value of the Java system property "var" if defined</li>
|
||||||
|
* <li>by the value of the configuration key "var" if defined</li>
|
||||||
|
* </ol>
|
||||||
|
*
|
||||||
|
* If var is unbounded the current state of expansion "prefix${var}suffix" is
|
||||||
|
* returned.
|
||||||
|
*
|
||||||
|
* If a cycle is detected: replacing var1 requires replacing var2 ... requires
|
||||||
|
* replacing var1, i.e., the cycle is shorter than
|
||||||
|
* {@link Configuration#MAX_SUBST} then the original expr is returned.
|
||||||
|
*
|
||||||
|
* @param expr the literal value of a config key
|
||||||
|
* @return null if expr is null, otherwise the value resulting from expanding
|
||||||
|
* expr using the algorithm above.
|
||||||
|
* @throws IllegalArgumentException when more than
|
||||||
|
* {@link Configuration#MAX_SUBST} replacements are required
|
||||||
|
*/
|
||||||
private String substituteVars(String expr) {
|
private String substituteVars(String expr) {
|
||||||
if (expr == null) {
|
if (expr == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
Matcher match = VAR_PATTERN.matcher("");
|
|
||||||
String eval = expr;
|
String eval = expr;
|
||||||
Set<String> evalSet = new HashSet<String>();
|
Set<String> evalSet = null;
|
||||||
for(int s=0; s<MAX_SUBST; s++) {
|
for(int s = 0; s < MAX_SUBST; s++) {
|
||||||
if (evalSet.contains(eval)) {
|
final int[] varBounds = findSubVariable(eval);
|
||||||
// Cyclic resolution pattern detected. Return current expression.
|
if (varBounds[SUB_START_IDX] == -1) {
|
||||||
return eval;
|
return eval;
|
||||||
}
|
}
|
||||||
evalSet.add(eval);
|
final String var = eval.substring(varBounds[SUB_START_IDX],
|
||||||
match.reset(eval);
|
varBounds[SUB_END_IDX]);
|
||||||
if (!match.find()) {
|
|
||||||
return eval;
|
|
||||||
}
|
|
||||||
String var = match.group();
|
|
||||||
var = var.substring(2, var.length()-1); // remove ${ .. }
|
|
||||||
String val = null;
|
String val = null;
|
||||||
try {
|
try {
|
||||||
val = System.getProperty(var);
|
val = System.getProperty(var);
|
||||||
|
@ -882,8 +957,23 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
|
||||||
if (val == null) {
|
if (val == null) {
|
||||||
return eval; // return literal ${var}: var is unbound
|
return eval; // return literal ${var}: var is unbound
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// prevent recursive resolution
|
||||||
|
//
|
||||||
|
final int dollar = varBounds[SUB_START_IDX] - "${".length();
|
||||||
|
final int afterRightBrace = varBounds[SUB_END_IDX] + "}".length();
|
||||||
|
final String refVar = eval.substring(dollar, afterRightBrace);
|
||||||
|
if (evalSet == null) {
|
||||||
|
evalSet = new HashSet<String>();
|
||||||
|
}
|
||||||
|
if (!evalSet.add(refVar)) {
|
||||||
|
return expr; // return original expression if there is a loop
|
||||||
|
}
|
||||||
|
|
||||||
// substitute
|
// substitute
|
||||||
eval = eval.substring(0, match.start())+val+eval.substring(match.end());
|
eval = eval.substring(0, dollar)
|
||||||
|
+ val
|
||||||
|
+ eval.substring(afterRightBrace);
|
||||||
}
|
}
|
||||||
throw new IllegalStateException("Variable substitution depth too large: "
|
throw new IllegalStateException("Variable substitution depth too large: "
|
||||||
+ MAX_SUBST + " " + expr);
|
+ MAX_SUBST + " " + expr);
|
||||||
|
|
|
@ -1271,12 +1271,56 @@ public class TestConfiguration extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testInvalidSubstitutation() {
|
public void testInvalidSubstitutation() {
|
||||||
|
final Configuration configuration = new Configuration(false);
|
||||||
|
|
||||||
|
// 2-var loops
|
||||||
|
//
|
||||||
|
final String key = "test.random.key";
|
||||||
|
for (String keyExpression : Arrays.asList(
|
||||||
|
"${" + key + "}",
|
||||||
|
"foo${" + key + "}",
|
||||||
|
"foo${" + key + "}bar",
|
||||||
|
"${" + key + "}bar")) {
|
||||||
|
configuration.set(key, keyExpression);
|
||||||
|
assertEquals("Unexpected value", keyExpression, configuration.get(key));
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// 3-variable loops
|
||||||
|
//
|
||||||
|
|
||||||
|
final String expVal1 = "${test.var2}";
|
||||||
|
String testVar1 = "test.var1";
|
||||||
|
configuration.set(testVar1, expVal1);
|
||||||
|
configuration.set("test.var2", "${test.var3}");
|
||||||
|
configuration.set("test.var3", "${test.var1}");
|
||||||
|
assertEquals("Unexpected value", expVal1, configuration.get(testVar1));
|
||||||
|
|
||||||
|
// 3-variable loop with non-empty value prefix/suffix
|
||||||
|
//
|
||||||
|
final String expVal2 = "foo2${test.var2}bar2";
|
||||||
|
configuration.set(testVar1, expVal2);
|
||||||
|
configuration.set("test.var2", "foo3${test.var3}bar3");
|
||||||
|
configuration.set("test.var3", "foo1${test.var1}bar1");
|
||||||
|
assertEquals("Unexpected value", expVal2, configuration.get(testVar1));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIncompleteSubbing() {
|
||||||
|
Configuration configuration = new Configuration(false);
|
||||||
String key = "test.random.key";
|
String key = "test.random.key";
|
||||||
String keyExpression = "${" + key + "}";
|
for (String keyExpression : Arrays.asList(
|
||||||
Configuration configuration = new Configuration();
|
"{}",
|
||||||
configuration.set(key, keyExpression);
|
"${}",
|
||||||
String value = configuration.get(key);
|
"{" + key,
|
||||||
assertTrue("Unexpected value " + value, value.equals(keyExpression));
|
"${" + key,
|
||||||
|
"foo${" + key,
|
||||||
|
"foo${" + key + "bar",
|
||||||
|
"foo{" + key + "}bar",
|
||||||
|
"${" + key + "bar")) {
|
||||||
|
configuration.set(key, keyExpression);
|
||||||
|
String value = configuration.get(key);
|
||||||
|
assertTrue("Unexpected value " + value, value.equals(keyExpression));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testBoolean() {
|
public void testBoolean() {
|
||||||
|
|
Loading…
Reference in New Issue