LUCENE-9186: remove linefiledocs usage from BaseTokenStreamTestCase

This commit is contained in:
Robert Muir 2020-01-28 11:55:51 -05:00
parent 4350efa932
commit 3bcc97c8eb
No known key found for this signature in database
GPG Key ID: 817AE1DD322D7ECA
1 changed files with 21 additions and 51 deletions

View File

@ -45,7 +45,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Rethrow;
import org.apache.lucene.util.TestUtil;
@ -596,7 +595,6 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean graphOffsetsAreCorrect, RandomIndexWriter iw) throws IOException {
final LineFileDocs docs = new LineFileDocs(random);
Document doc = null;
Field field = null, currentField = null;
StringReader bogus = new StringReader("");
@ -625,61 +623,33 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
doc.add(currentField);
}
try {
for (int i = 0; i < iterations; i++) {
String text;
for (int i = 0; i < iterations; i++) {
String text = TestUtil.randomAnalysisString(random, maxWordLength, simple);
if (random.nextInt(10) == 7) {
// real data from linedocs
text = docs.nextDoc().get("body");
if (text.length() > maxWordLength) {
// Take a random slice from the text...:
int startPos = random.nextInt(text.length() - maxWordLength);
if (startPos > 0 && Character.isLowSurrogate(text.charAt(startPos))) {
// Take care not to split up a surrogate pair:
startPos--;
assert Character.isHighSurrogate(text.charAt(startPos));
}
int endPos = startPos + maxWordLength - 1;
if (Character.isHighSurrogate(text.charAt(endPos))) {
// Take care not to split up a surrogate pair:
endPos--;
}
text = text.substring(startPos, 1+endPos);
}
} else {
// synthetic
text = TestUtil.randomAnalysisString(random, maxWordLength, simple);
}
try {
checkAnalysisConsistency(random, a, useCharFilter, text, graphOffsetsAreCorrect, currentField);
if (iw != null) {
if (random.nextInt(7) == 0) {
// pile up a multivalued field
IndexableFieldType ft = field.fieldType();
currentField = new Field("dummy", bogus, ft);
try {
checkAnalysisConsistency(random, a, useCharFilter, text, graphOffsetsAreCorrect, currentField);
if (iw != null) {
if (random.nextInt(7) == 0) {
// pile up a multivalued field
IndexableFieldType ft = field.fieldType();
currentField = new Field("dummy", bogus, ft);
doc.add(currentField);
} else {
iw.addDocument(doc);
if (doc.getFields().size() > 1) {
// back to 1 field
currentField = field;
doc.removeFields("dummy");
doc.add(currentField);
} else {
iw.addDocument(doc);
if (doc.getFields().size() > 1) {
// back to 1 field
currentField = field;
doc.removeFields("dummy");
doc.add(currentField);
}
}
}
} catch (Throwable t) {
// TODO: really we should pass a random seed to
// checkAnalysisConsistency then print it here too:
System.err.println("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + escape(text) + "'");
Rethrow.rethrow(t);
}
} catch (Throwable t) {
// TODO: really we should pass a random seed to
// checkAnalysisConsistency then print it here too:
System.err.println("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + escape(text) + "'");
Rethrow.rethrow(t);
}
} finally {
IOUtils.closeWhileHandlingException(docs);
}
}