add more deterministic tests; increase iters for testRandom

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1391865 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-09-29 20:13:02 +00:00
parent 9e3a9ec24d
commit 536af60a26
1 changed files with 105 additions and 59 deletions

View File

@ -17,6 +17,7 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@ -174,9 +175,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
continue;
}
boolean fieldHasPayloads = random().nextBoolean();
fieldInfoArray[fieldUpto] = new FieldInfo(field, true, fieldUpto, false, false, fieldHasPayloads,
fieldInfoArray[fieldUpto] = new FieldInfo(field, true, fieldUpto, false, false, true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
null, DocValues.Type.FIXED_INTS_8, null);
fieldUpto++;
@ -185,9 +184,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
fields.put(field, postings);
Set<String> seenTerms = new HashSet<String>();
// TODO
//final int numTerms = atLeast(10);
final int numTerms = 4;
final int numTerms = atLeast(10);
for(int termUpto=0;termUpto<numTerms;termUpto++) {
String term = _TestUtil.randomSimpleString(random());
if (seenTerms.contains(term)) {
@ -196,14 +193,15 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
seenTerms.add(term);
int numDocs;
if (random().nextInt(10) == 3 && numBigTerms < 2) {
// 10% of the time make a highish freq term:
if (numBigTerms == 0 || (random().nextInt(10) == 3 && numBigTerms < 2)) {
// Make at least 1 big term, then maybe (~10%
// chance) make another:
numDocs = RANDOM_MULTIPLIER * _TestUtil.nextInt(random(), 50000, 70000);
numBigTerms++;
term = "big_" + term;
} else if (random().nextInt(10) == 3 && numMediumTerms < 5) {
// 10% of the time make a medium freq term:
// TODO not high enough to test level 1 skipping:
} else if (numMediumTerms == 0 || (random().nextInt(10) == 3 && numMediumTerms < 5)) {
// Make at least 1 medium term, then maybe (~10%
// chance) make up to 4 more:
numDocs = RANDOM_MULTIPLIER * _TestUtil.nextInt(random(), 3000, 6000);
numMediumTerms++;
term = "medium_" + term;
@ -225,11 +223,9 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
// TODO: more realistic to inversely tie this to numDocs:
int maxDocSpacing = _TestUtil.nextInt(random(), 1, 100);
// 10% of the time create big payloads:
int payloadSize;
if (!fieldHasPayloads) {
payloadSize = 0;
} else if (random().nextInt(10) == 7) {
if (random().nextInt(10) == 7) {
// 10% of the time create big payloads:
payloadSize = random().nextInt(50);
} else {
payloadSize = random().nextInt(10);
@ -353,7 +349,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
// maxAllowed = the "highest" we can index, but we will still
// randomly index at lower IndexOption
private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads) throws IOException {
private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
Codec codec = getCodec();
SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", 1+maxDocID, false, codec, null, null);
@ -380,7 +376,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
// Randomly picked the IndexOptions to index this
// field with:
IndexOptions indexOptions = IndexOptions.values()[random().nextInt(1+fieldMaxIndexOption)];
IndexOptions indexOptions = IndexOptions.values()[alwaysTestMax ? fieldMaxIndexOption : random().nextInt(1+fieldMaxIndexOption)];
boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;
newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name,
@ -501,7 +497,8 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
// Maximum options (docs/freqs/positions/offsets) to test:
IndexOptions maxIndexOptions,
EnumSet<Option> options) throws IOException {
EnumSet<Option> options,
boolean alwaysTestMax) throws IOException {
if (VERBOSE) {
System.out.println(" verifyEnum: options=" + options + " maxIndexOptions=" + maxIndexOptions);
@ -533,17 +530,17 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
boolean allowFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 &&
maxIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
boolean doCheckFreqs = allowFreqs && random().nextInt(3) <= 2;
boolean doCheckFreqs = allowFreqs && (alwaysTestMax || random().nextInt(3) <= 2);
boolean allowPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 &&
maxIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
boolean doCheckPositions = allowPositions && random().nextInt(3) <= 2;
boolean doCheckPositions = allowPositions && (alwaysTestMax || random().nextInt(3) <= 2);
boolean allowOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0 &&
maxIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
boolean doCheckOffsets = allowOffsets && random().nextInt(3) <= 2;
boolean doCheckOffsets = allowOffsets && (alwaysTestMax || random().nextInt(3) <= 2);
boolean doCheckPayloads = options.contains(Option.PAYLOADS) && allowPositions && fieldInfo.hasPayloads() && random().nextInt(3) <= 2;
boolean doCheckPayloads = options.contains(Option.PAYLOADS) && allowPositions && fieldInfo.hasPayloads() && (alwaysTestMax || random().nextInt(3) <= 2);
DocsEnum prevDocsEnum = null;
@ -559,10 +556,10 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
}
int flags = 0;
if (random().nextBoolean()) {
if (alwaysTestMax || random().nextBoolean()) {
flags |= DocsAndPositionsEnum.FLAG_OFFSETS;
}
if (random().nextBoolean()) {
if (alwaysTestMax || random().nextBoolean()) {
flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
}
@ -590,10 +587,10 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
}
int flags = 0;
if (doCheckOffsets || random().nextInt(3) == 1) {
if (alwaysTestMax || doCheckOffsets || random().nextInt(3) == 1) {
flags |= DocsAndPositionsEnum.FLAG_OFFSETS;
}
if (doCheckPayloads|| random().nextInt(3) == 1) {
if (alwaysTestMax || doCheckPayloads|| random().nextInt(3) == 1) {
flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
}
@ -622,7 +619,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
// 10% of the time don't consume all docs:
int stopAt;
if (options.contains(Option.PARTIAL_DOC_CONSUME) && expected.size() > 1 && random().nextInt(10) == 7) {
if (!alwaysTestMax && options.contains(Option.PARTIAL_DOC_CONSUME) && expected.size() > 1 && random().nextInt(10) == 7) {
stopAt = random().nextInt(expected.size()-1);
if (VERBOSE) {
System.out.println(" will not consume all docs (" + stopAt + " vs " + expected.size() + ")");
@ -634,7 +631,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
}
}
double skipChance = random().nextDouble();
double skipChance = alwaysTestMax ? 0.5 : random().nextDouble();
int numSkips = expected.size() < 3 ? 1 : _TestUtil.nextInt(random(), 1, Math.min(20, expected.size()/3));
int skipInc = expected.size()/numSkips;
int skipDocInc = (1+maxDocID)/numSkips;
@ -642,9 +639,9 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
// Sometimes do 100% skipping:
boolean doAllSkipping = options.contains(Option.SKIPPING) && random().nextInt(7) == 1;
double freqAskChance = random().nextDouble();
double payloadCheckChance = random().nextDouble();
double offsetCheckChance = random().nextDouble();
double freqAskChance = alwaysTestMax ? 1.0 : random().nextDouble();
double payloadCheckChance = alwaysTestMax ? 1.0 : random().nextDouble();
double offsetCheckChance = alwaysTestMax ? 1.0 : random().nextDouble();
if (VERBOSE) {
if (options.contains(Option.SKIPPING)) {
@ -732,7 +729,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
if (doCheckPositions) {
int freq = docsEnum.freq();
int numPosToConsume;
if (options.contains(Option.PARTIAL_POS_CONSUME) && random().nextInt(5) == 1) {
if (!alwaysTestMax && options.contains(Option.PARTIAL_POS_CONSUME) && random().nextInt(5) == 1) {
numPosToConsume = random().nextInt(freq);
} else {
numPosToConsume = freq;
@ -789,7 +786,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
System.out.println(" skip check offsets");
}
}
} else {
} else if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
if (VERBOSE) {
System.out.println(" now check offsets are -1");
}
@ -801,7 +798,9 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
}
}
private void testTerms(final Fields fieldsSource, final EnumSet<Option> options, final IndexOptions maxIndexOptions) throws Exception {
private void testTerms(final Fields fieldsSource, final EnumSet<Option> options,
final IndexOptions maxIndexOptions,
final boolean alwaysTestMax) throws Exception {
if (options.contains(Option.THREADS)) {
int numThreads = _TestUtil.nextInt(random(), 2, 5);
@ -811,7 +810,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
@Override
public void run() {
try {
testTermsOneThread(fieldsSource, options, maxIndexOptions);
testTermsOneThread(fieldsSource, options, maxIndexOptions, alwaysTestMax);
} catch (Throwable t) {
throw new RuntimeException(t);
}
@ -823,11 +822,11 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
threads[threadUpto].join();
}
} else {
testTermsOneThread(fieldsSource, options, maxIndexOptions);
testTermsOneThread(fieldsSource, options, maxIndexOptions, alwaysTestMax);
}
}
private void testTermsOneThread(Fields fieldsSource, EnumSet<Option> options, IndexOptions maxIndexOptions) throws IOException {
private void testTermsOneThread(Fields fieldsSource, EnumSet<Option> options, IndexOptions maxIndexOptions, boolean alwaysTestMax) throws IOException {
ThreadState threadState = new ThreadState();
@ -885,7 +884,8 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
fieldAndTerm.term,
termsEnum,
maxIndexOptions,
options);
options,
alwaysTestMax);
// Sometimes save term state after pulling the enum:
if (options.contains(Option.TERM_STATE) && !useTermState && !savedTermState && random().nextInt(5) == 1) {
@ -897,7 +897,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
// 10% of the time make sure you can pull another enum
// from the same term:
if (random().nextInt(10) == 7) {
if (alwaysTestMax || random().nextInt(10) == 7) {
// Try same term again
if (VERBOSE) {
System.out.println("TEST: try enum again on same term");
@ -908,7 +908,8 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
fieldAndTerm.term,
termsEnum,
maxIndexOptions,
options);
options,
alwaysTestMax);
}
}
}
@ -933,33 +934,78 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
}
}
public void test() throws Exception {
Directory dir = newFSDirectory(_TestUtil.getTempDir("testPostingsFormat"));
/** Indexes all fields/terms at the specified
* IndexOptions, and fully tests at that IndexOptions. */
private void testFull(IndexOptions options, boolean withPayloads) throws Exception {
File path = _TestUtil.getTempDir("testPostingsFormat.testExact");
Directory dir = newFSDirectory(path);
boolean indexPayloads = random().nextBoolean();
// TODO test thread safety of buildIndex too
FieldsProducer fieldsProducer = buildIndex(dir, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, indexPayloads);
FieldsProducer fieldsProducer = buildIndex(dir, options, withPayloads, true);
testFields(fieldsProducer);
//testTerms(fieldsProducer, EnumSet.noneOf(Option.class), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
//testTerms(fieldsProducer, EnumSet.of(Option.LIVE_DOCS), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
//testTerms(fieldsProducer, EnumSet.of(Option.TERM_STATE, Option.LIVE_DOCS, Option.PARTIAL_DOC_CONSUME, Option.PARTIAL_POS_CONSUME), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
//testTerms(fieldsProducer, EnumSet.of(Option.SKIPPING), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
//testTerms(fieldsProducer, EnumSet.of(Option.THREADS, Option.TERM_STATE, Option.SKIPPING, Option.PARTIAL_DOC_CONSUME, Option.PARTIAL_POS_CONSUME), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
//testTerms(fieldsProducer, EnumSet.of(Option.TERM_STATE, Option.SKIPPING, Option.PARTIAL_DOC_CONSUME, Option.PARTIAL_POS_CONSUME), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
//testTerms(fieldsProducer, EnumSet.of(Option.TERM_STATE, Option.PAYLOADS, Option.PARTIAL_DOC_CONSUME, Option.PARTIAL_POS_CONSUME, Option.SKIPPING), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
IndexOptions[] allOptions = IndexOptions.values();
int maxIndexOption = Arrays.asList(allOptions).indexOf(options);
// NOTE: you can also test "weaker" index options than
// you indexed with:
testTerms(fieldsProducer, EnumSet.allOf(Option.class), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
//testTerms(fieldsProducer, EnumSet.complementOf(EnumSet.of(Option.THREADS)), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
for(int i=0;i<=maxIndexOption;i++) {
testTerms(fieldsProducer, EnumSet.allOf(Option.class), allOptions[i], true);
if (withPayloads) {
// If we indexed w/ payloads, also test enums w/o accessing payloads:
testTerms(fieldsProducer, EnumSet.complementOf(EnumSet.of(Option.PAYLOADS)), allOptions[i], true);
}
}
fieldsProducer.close();
dir.close();
_TestUtil.rmDir(path);
}
public void testDocsOnly() throws Exception {
testFull(IndexOptions.DOCS_ONLY, false);
}
public void testDocsAndFreqs() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS, false);
}
public void testDocsAndFreqsAndPositions() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, false);
}
public void testDocsAndFreqsAndPositionsAndPayloads() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, true);
}
public void testDocsAndFreqsAndPositionsAndOffsets() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false);
}
public void testDocsAndFreqsAndPositionsAndOffsetsAndPayloads() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true);
}
public void testRandom() throws Exception {
int iters = atLeast(10);
for(int iter=0;iter<iters;iter++) {
File path = _TestUtil.getTempDir("testPostingsFormat");
Directory dir = newFSDirectory(path);
boolean indexPayloads = random().nextBoolean();
// TODO test thread safety of buildIndex too
FieldsProducer fieldsProducer = buildIndex(dir, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, indexPayloads, false);
testFields(fieldsProducer);
// NOTE: you can also test "weaker" index options than
// you indexed with:
testTerms(fieldsProducer, EnumSet.allOf(Option.class), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false);
fieldsProducer.close();
dir.close();
_TestUtil.rmDir(path);
}
}
}
// TODO test that start/endOffset return -1 if field has
// no offsets