LUCENE-5448: centralize random string generation in _TestUtil.

This closes #35



git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1568974 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Benson Margulies 2014-02-17 13:15:22 +00:00
parent c95e1f4969
commit 4cb6fa0ca8
2 changed files with 72 additions and 72 deletions

View File

@ -598,7 +598,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
}
} else {
// synthetic
text = randomAnalysisString(random, maxWordLength, simple);
text = _TestUtil.randomAnalysisString(random, maxWordLength, simple);
}
try {
@ -876,77 +876,6 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
field.setReaderValue(useCharFilter ? new MockCharFilter(reader, remainder) : reader);
}
}
private static String randomAnalysisString(Random random, int maxLength, boolean simple) {
assert maxLength >= 0;
// sometimes just a purely random string
if (random.nextInt(31) == 0) {
return randomSubString(random, random.nextInt(maxLength), simple);
}
// otherwise, try to make it more realistic with 'words' since most tests use MockTokenizer
// first decide how big the string will really be: 0..n
maxLength = random.nextInt(maxLength);
int avgWordLength = _TestUtil.nextInt(random, 3, 8);
StringBuilder sb = new StringBuilder();
while (sb.length() < maxLength) {
if (sb.length() > 0) {
sb.append(' ');
}
int wordLength = -1;
while (wordLength < 0) {
wordLength = (int) (random.nextGaussian() * 3 + avgWordLength);
}
wordLength = Math.min(wordLength, maxLength - sb.length());
sb.append(randomSubString(random, wordLength, simple));
}
return sb.toString();
}
private static String randomSubString(Random random, int wordLength, boolean simple) {
if (wordLength == 0) {
return "";
}
int evilness = _TestUtil.nextInt(random, 0, 20);
StringBuilder sb = new StringBuilder();
while (sb.length() < wordLength) {;
if (simple) {
sb.append(random.nextBoolean() ? _TestUtil.randomSimpleString(random, wordLength) : _TestUtil.randomHtmlishString(random, wordLength));
} else {
if (evilness < 10) {
sb.append(_TestUtil.randomSimpleString(random, wordLength));
} else if (evilness < 15) {
assert sb.length() == 0; // we should always get wordLength back!
sb.append(_TestUtil.randomRealisticUnicodeString(random, wordLength, wordLength));
} else if (evilness == 16) {
sb.append(_TestUtil.randomHtmlishString(random, wordLength));
} else if (evilness == 17) {
// gives a lot of punctuation
sb.append(_TestUtil.randomRegexpishString(random, wordLength));
} else {
sb.append(_TestUtil.randomUnicodeString(random, wordLength));
}
}
}
if (sb.length() > wordLength) {
sb.setLength(wordLength);
if (Character.isHighSurrogate(sb.charAt(wordLength-1))) {
sb.setLength(wordLength-1);
}
}
if (random.nextInt(17) == 0) {
// mix up case
String mixedUp = _TestUtil.randomlyRecaseCodePoints(random, sb.toString());
assert mixedUp.length() == sb.length();
return mixedUp;
} else {
return sb.toString();
}
}
protected String toDot(Analyzer a, String inputText) throws IOException {
final StringWriter sw = new StringWriter();

View File

@ -1068,6 +1068,77 @@ public class _TestUtil {
}
return out.toString();
}
public static String randomAnalysisString(Random random, int maxLength, boolean simple) {
assert maxLength >= 0;
// sometimes just a purely random string
if (random.nextInt(31) == 0) {
return randomSubString(random, random.nextInt(maxLength), simple);
}
// otherwise, try to make it more realistic with 'words' since most tests use MockTokenizer
// first decide how big the string will really be: 0..n
maxLength = random.nextInt(maxLength);
int avgWordLength = _TestUtil.nextInt(random, 3, 8);
StringBuilder sb = new StringBuilder();
while (sb.length() < maxLength) {
if (sb.length() > 0) {
sb.append(' ');
}
int wordLength = -1;
while (wordLength < 0) {
wordLength = (int) (random.nextGaussian() * 3 + avgWordLength);
}
wordLength = Math.min(wordLength, maxLength - sb.length());
sb.append(randomSubString(random, wordLength, simple));
}
return sb.toString();
}
public static String randomSubString(Random random, int wordLength, boolean simple) {
if (wordLength == 0) {
return "";
}
int evilness = _TestUtil.nextInt(random, 0, 20);
StringBuilder sb = new StringBuilder();
while (sb.length() < wordLength) {;
if (simple) {
sb.append(random.nextBoolean() ? _TestUtil.randomSimpleString(random, wordLength) : _TestUtil.randomHtmlishString(random, wordLength));
} else {
if (evilness < 10) {
sb.append(_TestUtil.randomSimpleString(random, wordLength));
} else if (evilness < 15) {
assert sb.length() == 0; // we should always get wordLength back!
sb.append(_TestUtil.randomRealisticUnicodeString(random, wordLength, wordLength));
} else if (evilness == 16) {
sb.append(_TestUtil.randomHtmlishString(random, wordLength));
} else if (evilness == 17) {
// gives a lot of punctuation
sb.append(_TestUtil.randomRegexpishString(random, wordLength));
} else {
sb.append(_TestUtil.randomUnicodeString(random, wordLength));
}
}
}
if (sb.length() > wordLength) {
sb.setLength(wordLength);
if (Character.isHighSurrogate(sb.charAt(wordLength-1))) {
sb.setLength(wordLength-1);
}
}
if (random.nextInt(17) == 0) {
// mix up case
String mixedUp = _TestUtil.randomlyRecaseCodePoints(random, sb.toString());
assert mixedUp.length() == sb.length();
return mixedUp;
} else {
return sb.toString();
}
}
/** List of characters that match {@link Character#isWhitespace} */
public static final char[] WHITESPACE_CHARACTERS = new char[] {