HADOOP-11506. Configuration variable expansion regex expensive for long values. (Gera Shegalov via gera)

(cherry picked from commit 644548f201)

Conflicts:
	hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
	hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
This commit is contained in:
Gera Shegalov 2015-02-06 01:08:32 -08:00
parent 92ff524182
commit 3d36d4737c
3 changed files with 159 additions and 10 deletions

View File

@ -192,6 +192,9 @@ Release 2.7.0 - UNRELEASED
HADOOP-11188. hadoop-azure: automatically expand page blobs when they become HADOOP-11188. hadoop-azure: automatically expand page blobs when they become
full. (Eric Hanson via cnauroth) full. (Eric Hanson via cnauroth)
HADOOP-11506. Configuration variable expansion regex expensive for long
values. (Gera Shegalov via gera)
BUG FIXES BUG FIXES
HADOOP-11488. Difference in default connection timeout for S3A FS HADOOP-11488. Difference in default connection timeout for S3A FS

View File

@ -845,24 +845,100 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
reloadConfiguration(); reloadConfiguration();
} }
private static final Pattern VAR_PATTERN =
Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
private static final int MAX_SUBST = 20; private static final int MAX_SUBST = 20;
private static final int SUB_START_IDX = 0;
private static final int SUB_END_IDX = SUB_START_IDX + 1;
/**
* This is a manual implementation of the following regex
* "\\$\\{[^\\}\\$\u0020]+\\}". It can be 15x more efficient than
* a regex matcher as demonstrated by HADOOP-11506. This is noticeable with
* Hadoop apps building on the assumption Configuration#get is an O(1)
* hash table lookup, especially when the eval is a long string.
*
* @param eval a string that may contain variables requiring expansion.
* @return a 2-element int array res such that
* eval.substring(res[0], res[1]) is "var" for the left-most occurrence of
* ${var} in eval. If no variable is found -1, -1 is returned.
*/
private static int[] findSubVariable(String eval) {
int[] result = {-1, -1};
int matchStart;
int leftBrace;
// scanning for a brace first because it's less frequent than $
// that can occur in nested class names
//
match_loop:
for (matchStart = 1, leftBrace = eval.indexOf('{', matchStart);
// minimum left brace position (follows '$')
leftBrace > 0
// right brace of a smallest valid expression "${c}"
&& leftBrace + "{c".length() < eval.length();
leftBrace = eval.indexOf('{', matchStart)) {
int matchedLen = 0;
if (eval.charAt(leftBrace - 1) == '$') {
int subStart = leftBrace + 1; // after '{'
for (int i = subStart; i < eval.length(); i++) {
switch (eval.charAt(i)) {
case '}':
if (matchedLen > 0) { // match
result[SUB_START_IDX] = subStart;
result[SUB_END_IDX] = subStart + matchedLen;
break match_loop;
}
// fall through to skip 1 char
case ' ':
case '$':
matchStart = i + 1;
continue match_loop;
default:
matchedLen++;
}
}
// scanned from "${" to the end of eval, and no reset via ' ', '$':
// no match!
break match_loop;
} else {
// not a start of a variable
//
matchStart = leftBrace + 1;
}
}
return result;
}
/**
* Attempts to repeatedly expand the value {@code expr} by replacing the
* left-most substring of the form "${var}" in the following precedence order
* <ol>
* <li>by the value of the Java system property "var" if defined</li>
* <li>by the value of the configuration key "var" if defined</li>
* </ol>
*
* If var is unbounded the current state of expansion "prefix${var}suffix" is
* returned.
*
* @param expr the literal value of a config key
* @return null if expr is null, otherwise the value resulting from expanding
* expr using the algorithm above.
* @throws IllegalArgumentException when more than
* {@link Configuration#MAX_SUBST} replacements are required
*/
private String substituteVars(String expr) { private String substituteVars(String expr) {
if (expr == null) { if (expr == null) {
return null; return null;
} }
Matcher match = VAR_PATTERN.matcher("");
String eval = expr; String eval = expr;
for(int s=0; s<MAX_SUBST; s++) { for (int s = 0; s < MAX_SUBST; s++) {
match.reset(eval); final int[] varBounds = findSubVariable(eval);
if (!match.find()) { if (varBounds[SUB_START_IDX] == -1) {
return eval; return eval;
} }
String var = match.group(); final String var = eval.substring(varBounds[SUB_START_IDX],
var = var.substring(2, var.length()-1); // remove ${ .. } varBounds[SUB_END_IDX]);
String val = null; String val = null;
try { try {
val = System.getProperty(var); val = System.getProperty(var);
@ -875,8 +951,12 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
if (val == null) { if (val == null) {
return eval; // return literal ${var}: var is unbound return eval; // return literal ${var}: var is unbound
} }
final int dollar = varBounds[SUB_START_IDX] - "${".length();
final int afterRightBrace = varBounds[SUB_END_IDX] + "}".length();
// substitute // substitute
eval = eval.substring(0, match.start())+val+eval.substring(match.end()); eval = eval.substring(0, dollar)
+ val
+ eval.substring(afterRightBrace);
} }
throw new IllegalStateException("Variable substitution depth too large: " throw new IllegalStateException("Variable substitution depth too large: "
+ MAX_SUBST + " " + expr); + MAX_SUBST + " " + expr);

View File

@ -49,6 +49,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import static org.apache.hadoop.util.PlatformName.IBM_JAVA; import static org.apache.hadoop.util.PlatformName.IBM_JAVA;
import static org.junit.Assert.fail;
import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.ObjectMapper;
public class TestConfiguration extends TestCase { public class TestConfiguration extends TestCase {
@ -1194,6 +1196,70 @@ public class TestConfiguration extends TestCase {
} }
} }
public void testInvalidSubstitutation() {
final Configuration configuration = new Configuration(false);
// 2-var loops
//
final String key = "test.random.key";
for (String keyExpression : Arrays.asList(
"${" + key + "}",
"foo${" + key + "}",
"foo${" + key + "}bar",
"${" + key + "}bar")) {
configuration.set(key, keyExpression);
checkSubDepthException(configuration, key);
}
//
// 3-variable loops
//
final String expVal1 = "${test.var2}";
String testVar1 = "test.var1";
configuration.set(testVar1, expVal1);
configuration.set("test.var2", "${test.var3}");
configuration.set("test.var3", "${test.var1}");
checkSubDepthException(configuration, testVar1);
// 3-variable loop with non-empty value prefix/suffix
//
final String expVal2 = "foo2${test.var2}bar2";
configuration.set(testVar1, expVal2);
configuration.set("test.var2", "foo3${test.var3}bar3");
configuration.set("test.var3", "foo1${test.var1}bar1");
checkSubDepthException(configuration, testVar1);
}
private static void checkSubDepthException(Configuration configuration,
String key) {
try {
configuration.get(key);
fail("IllegalStateException depth too large not thrown");
} catch (IllegalStateException e) {
assertTrue("Unexpected exception text: " + e,
e.getMessage().contains("substitution depth"));
}
}
public void testIncompleteSubbing() {
Configuration configuration = new Configuration(false);
String key = "test.random.key";
for (String keyExpression : Arrays.asList(
"{}",
"${}",
"{" + key,
"${" + key,
"foo${" + key,
"foo${" + key + "bar",
"foo{" + key + "}bar",
"${" + key + "bar")) {
configuration.set(key, keyExpression);
String value = configuration.get(key);
assertTrue("Unexpected value " + value, value.equals(keyExpression));
}
}
public void testGetClassByNameOrNull() throws Exception { public void testGetClassByNameOrNull() throws Exception {
Configuration config = new Configuration(); Configuration config = new Configuration();
Class<?> clazz = config.getClassByNameOrNull("java.lang.Object"); Class<?> clazz = config.getClassByNameOrNull("java.lang.Object");