mirror of https://github.com/apache/lucene.git
LUCENE-3820: limiting the amount of input for pattern matching to go past exponential time patterns, even if they happen. A nice catch from Mike too -- un-ignore testNastyPattern and look at processing time go wild with each additional input character...
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294797 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6f40a126e6
commit
8c2e3cef8f
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.pattern;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
|
@ -31,6 +32,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* Tests {@link PatternReplaceCharFilter}
|
||||
|
@ -276,6 +278,26 @@ public class TestPatternReplaceCharFilter extends BaseTokenStreamTestCase {
|
|||
return Pattern.compile( p );
|
||||
}
|
||||
|
||||
/**
|
||||
* A demonstration of how backtracking regular expressions can lead to relatively
|
||||
* easy DoS attacks.
|
||||
*
|
||||
* @see "http://swtch.com/~rsc/regexp/regexp1.html"
|
||||
*/
|
||||
@Ignore
|
||||
public void testNastyPattern() throws Exception {
|
||||
Pattern p = Pattern.compile("(c.+)*xy");
|
||||
String input = "[;<!--aecbbaa-->< febcfdc fbb = \"fbeeebff\" fc = dd >\\';<eefceceaa e= babae\" eacbaff =\"fcfaccacd\" = bcced>>>< bccaafe edb = ecfccdff\" <?</script>< edbd ebbcd=\"faacfcc\" aeca= bedbc ceeaac =adeafde aadccdaf = \"afcc ffda=aafbe �\"1843785582']";
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
Matcher matcher = p.matcher(input.substring(0, i));
|
||||
long t = System.currentTimeMillis();
|
||||
if (matcher.find()) {
|
||||
System.out.println(matcher.group());
|
||||
}
|
||||
System.out.println(i + " > " + (System.currentTimeMillis() - t) / 1000.0);
|
||||
}
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
int numPatterns = atLeast(100);
|
||||
|
@ -296,9 +318,9 @@ public class TestPatternReplaceCharFilter extends BaseTokenStreamTestCase {
|
|||
return new PatternReplaceCharFilter(p, replacement, CharReader.get(reader));
|
||||
}
|
||||
};
|
||||
long s = System.currentTimeMillis();
|
||||
checkRandomData(random, a, 1000 * RANDOM_MULTIPLIER, true); // only ascii
|
||||
System.out.println((System.currentTimeMillis() - s) / 1000.0 + " > " + p);
|
||||
checkRandomData(random, a, 1000 * RANDOM_MULTIPLIER,
|
||||
/* max input length. don't make it longer -- exponential processing
|
||||
* time for certain patterns. */ 40, true); // only ascii
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue