HADOOP-11506. Configuration variable expansion regex expensive for long values. (Gera Shegalov via gera)

(cherry picked from commit 644548f201)

Conflicts:
	hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
	hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java

(cherry picked from commit 3d36d4737c)
This commit is contained in:
Gera Shegalov 2015-02-06 01:08:32 -08:00 committed by Vinod Kumar Vavilapalli
parent a1c3e6a058
commit bb8a1551c3
3 changed files with 159 additions and 10 deletions

View File

@ -13,6 +13,9 @@ Release 2.6.1 - UNRELEASED
HADOOP-11238. Update the NameNode's Group Cache in the background when
possible (Chris Li via Colin P. McCabe)
HADOOP-11506. Configuration variable expansion regex expensive for long
values. (Gera Shegalov via gera)
BUG FIXES
HADOOP-11466: FastByteComparisons: do not use UNSAFE_COMPARER on the SPARC

View File

@ -832,24 +832,100 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
reloadConfiguration();
}
private static final Pattern VAR_PATTERN =
Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
private static final int MAX_SUBST = 20;
private static final int SUB_START_IDX = 0;
private static final int SUB_END_IDX = SUB_START_IDX + 1;
/**
* This is a manual implementation of the following regex
* "\\$\\{[^\\}\\$\u0020]+\\}". It can be 15x more efficient than
* a regex matcher as demonstrated by HADOOP-11506. This is noticeable with
* Hadoop apps building on the assumption Configuration#get is an O(1)
* hash table lookup, especially when the eval is a long string.
*
* @param eval a string that may contain variables requiring expansion.
* @return a 2-element int array res such that
* eval.substring(res[0], res[1]) is "var" for the left-most occurrence of
* ${var} in eval. If no variable is found -1, -1 is returned.
*/
private static int[] findSubVariable(String eval) {
int[] result = {-1, -1};
int matchStart;
int leftBrace;
// scanning for a brace first because it's less frequent than $
// that can occur in nested class names
//
match_loop:
for (matchStart = 1, leftBrace = eval.indexOf('{', matchStart);
// minimum left brace position (follows '$')
leftBrace > 0
// right brace of a smallest valid expression "${c}"
&& leftBrace + "{c".length() < eval.length();
leftBrace = eval.indexOf('{', matchStart)) {
int matchedLen = 0;
if (eval.charAt(leftBrace - 1) == '$') {
int subStart = leftBrace + 1; // after '{'
for (int i = subStart; i < eval.length(); i++) {
switch (eval.charAt(i)) {
case '}':
if (matchedLen > 0) { // match
result[SUB_START_IDX] = subStart;
result[SUB_END_IDX] = subStart + matchedLen;
break match_loop;
}
// fall through to skip 1 char
case ' ':
case '$':
matchStart = i + 1;
continue match_loop;
default:
matchedLen++;
}
}
// scanned from "${" to the end of eval, and no reset via ' ', '$':
// no match!
break match_loop;
} else {
// not a start of a variable
//
matchStart = leftBrace + 1;
}
}
return result;
}
/**
* Attempts to repeatedly expand the value {@code expr} by replacing the
* left-most substring of the form "${var}" in the following precedence order
* <ol>
* <li>by the value of the Java system property "var" if defined</li>
* <li>by the value of the configuration key "var" if defined</li>
* </ol>
*
* If var is unbounded the current state of expansion "prefix${var}suffix" is
* returned.
*
* @param expr the literal value of a config key
* @return null if expr is null, otherwise the value resulting from expanding
* expr using the algorithm above.
* @throws IllegalArgumentException when more than
* {@link Configuration#MAX_SUBST} replacements are required
*/
private String substituteVars(String expr) {
if (expr == null) {
return null;
}
Matcher match = VAR_PATTERN.matcher("");
String eval = expr;
for(int s=0; s<MAX_SUBST; s++) {
match.reset(eval);
if (!match.find()) {
for (int s = 0; s < MAX_SUBST; s++) {
final int[] varBounds = findSubVariable(eval);
if (varBounds[SUB_START_IDX] == -1) {
return eval;
}
String var = match.group();
var = var.substring(2, var.length()-1); // remove ${ .. }
final String var = eval.substring(varBounds[SUB_START_IDX],
varBounds[SUB_END_IDX]);
String val = null;
try {
val = System.getProperty(var);
@ -862,8 +938,12 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
if (val == null) {
return eval; // return literal ${var}: var is unbound
}
final int dollar = varBounds[SUB_START_IDX] - "${".length();
final int afterRightBrace = varBounds[SUB_END_IDX] + "}".length();
// substitute
eval = eval.substring(0, match.start())+val+eval.substring(match.end());
eval = eval.substring(0, dollar)
+ val
+ eval.substring(afterRightBrace);
}
throw new IllegalStateException("Variable substitution depth too large: "
+ MAX_SUBST + " " + expr);

View File

@ -49,6 +49,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.NetUtils;
import static org.apache.hadoop.util.PlatformName.IBM_JAVA;
import static org.junit.Assert.fail;
import org.codehaus.jackson.map.ObjectMapper;
public class TestConfiguration extends TestCase {
@ -1194,6 +1196,70 @@ public class TestConfiguration extends TestCase {
}
}
public void testInvalidSubstitutation() {
final Configuration configuration = new Configuration(false);
// 2-var loops
//
final String key = "test.random.key";
for (String keyExpression : Arrays.asList(
"${" + key + "}",
"foo${" + key + "}",
"foo${" + key + "}bar",
"${" + key + "}bar")) {
configuration.set(key, keyExpression);
checkSubDepthException(configuration, key);
}
//
// 3-variable loops
//
final String expVal1 = "${test.var2}";
String testVar1 = "test.var1";
configuration.set(testVar1, expVal1);
configuration.set("test.var2", "${test.var3}");
configuration.set("test.var3", "${test.var1}");
checkSubDepthException(configuration, testVar1);
// 3-variable loop with non-empty value prefix/suffix
//
final String expVal2 = "foo2${test.var2}bar2";
configuration.set(testVar1, expVal2);
configuration.set("test.var2", "foo3${test.var3}bar3");
configuration.set("test.var3", "foo1${test.var1}bar1");
checkSubDepthException(configuration, testVar1);
}
private static void checkSubDepthException(Configuration configuration,
String key) {
try {
configuration.get(key);
fail("IllegalStateException depth too large not thrown");
} catch (IllegalStateException e) {
assertTrue("Unexpected exception text: " + e,
e.getMessage().contains("substitution depth"));
}
}
public void testIncompleteSubbing() {
Configuration configuration = new Configuration(false);
String key = "test.random.key";
for (String keyExpression : Arrays.asList(
"{}",
"${}",
"{" + key,
"${" + key,
"foo${" + key,
"foo${" + key + "bar",
"foo{" + key + "}bar",
"${" + key + "bar")) {
configuration.set(key, keyExpression);
String value = configuration.get(key);
assertTrue("Unexpected value " + value, value.equals(keyExpression));
}
}
public void testGetClassByNameOrNull() throws Exception {
Configuration config = new Configuration();
Class<?> clazz = config.getClassByNameOrNull("java.lang.Object");