LUCENE-3911: always use the same unicode block in the realistic case, sometimes use regexpish for lots of punctuation, fix off-by-one in randomRegexpIshString

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1304823 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-03-24 15:30:58 +00:00
parent a738c94656
commit 8f53e6eaa7
2 changed files with 14 additions and 2 deletions

View File

@ -611,9 +611,13 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
if (evilness < 10) {
sb.append(_TestUtil.randomSimpleString(random, wordLength));
} else if (evilness < 15) {
sb.append(_TestUtil.randomRealisticUnicodeString(random, wordLength));
assert sb.length() == 0; // we should always get wordLength back!
sb.append(_TestUtil.randomRealisticUnicodeString(random, wordLength, wordLength));
} else if (evilness == 16) {
sb.append(_TestUtil.randomHtmlishString(random, wordLength));
} else if (evilness == 17) {
// gives a lot of punctuation
sb.append(_TestUtil.randomRegexpishString(random, wordLength));
} else {
sb.append(_TestUtil.randomUnicodeString(random, wordLength));
}

View File

@ -264,7 +264,15 @@ public class _TestUtil {
* If you call this enough times, you might get a valid regex!
*/
public static String randomRegexpishString(Random r) {
final int end = r.nextInt(20);
return randomRegexpishString(r, 20);
}
/**
* Returns a String thats "regexpish" (contains lots of operators typically found in regular expressions)
* If you call this enough times, you might get a valid regex!
*/
public static String randomRegexpishString(Random r, int maxLength) {
final int end = nextInt(r, 0, maxLength);
if (end == 0) {
// allow 0 length
return "";