LUCENE-6135: renumber fields randomly in tests

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1648188 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-12-28 11:09:08 +00:00
parent e5c766891d
commit da07e538b2
5 changed files with 251 additions and 4 deletions

View File

@ -56,6 +56,7 @@ import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
@ -763,4 +764,51 @@ public abstract class BaseStoredFieldsFormatTestCase extends BaseIndexFileFormat
dir.close();
}
/** mix up field numbers, merge, and check that data is correct */
public void testMismatchedFields() throws Exception {
Directory dirs[] = new Directory[10];
for (int i = 0; i < dirs.length; i++) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(null);
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
for (int j = 0; j < 10; j++) {
// add fields where name=value (e.g. 3=3) so we can detect if stuff gets screwed up.
doc.add(new StringField(Integer.toString(j), Integer.toString(j), Field.Store.YES));
}
for (int j = 0; j < 10; j++) {
iw.addDocument(doc);
}
DirectoryReader reader = DirectoryReader.open(iw, true);
// mix up fields explicitly
if (random().nextBoolean()) {
reader = new MismatchedDirectoryReader(reader, random());
}
dirs[i] = newDirectory();
IndexWriter adder = new IndexWriter(dirs[i], new IndexWriterConfig(null));
adder.addIndexes(reader);
adder.commit();
adder.close();
IOUtils.close(reader, iw, dir);
}
Directory everything = newDirectory();
IndexWriter iw = new IndexWriter(everything, new IndexWriterConfig(null));
iw.addIndexes(dirs);
iw.forceMerge(1);
LeafReader ir = getOnlySegmentReader(DirectoryReader.open(iw, true));
for (int i = 0; i < ir.maxDoc(); i++) {
StoredDocument doc = ir.document(i);
assertEquals(10, doc.getFields().size());
for (int j = 0; j < 10; j++) {
assertEquals(Integer.toString(j), doc.get(Integer.toString(j)));
}
}
IOUtils.close(iw, ir, everything);
IOUtils.close(dirs);
}
}

View File

@ -0,0 +1,49 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Random;
/**
* A {@link DirectoryReader} that wraps all its subreaders with
* {@link MismatchedLeafReader}
*/
public class MismatchedDirectoryReader extends FilterDirectoryReader {
static class MismatchedSubReaderWrapper extends SubReaderWrapper {
final Random random;
MismatchedSubReaderWrapper(Random random) {
this.random = random;
}
@Override
public LeafReader wrap(LeafReader reader) {
return new MismatchedLeafReader(reader, random);
}
}
public MismatchedDirectoryReader(DirectoryReader in, Random random) {
super(in, new MismatchedSubReaderWrapper(random));
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) {
return new AssertingDirectoryReader(in);
}
}

View File

@ -0,0 +1,132 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
/**
* Shuffles field numbers around to try to trip bugs where field numbers
* are assumed to always be consistent across segments.
*/
public class MismatchedLeafReader extends FilterLeafReader {
final FieldInfos shuffled;
/** Creates a new reader which will renumber fields in {@code in} */
public MismatchedLeafReader(LeafReader in, Random random) {
super(in);
shuffled = shuffleInfos(in.getFieldInfos(), random);
}
@Override
public FieldInfos getFieldInfos() {
return shuffled;
}
@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
in.document(docID, new MismatchedVisitor(visitor));
}
static FieldInfos shuffleInfos(FieldInfos infos, Random random) {
// first, shuffle the order
List<FieldInfo> shuffled = new ArrayList<>();
for (FieldInfo info : infos) {
shuffled.add(info);
}
Collections.shuffle(shuffled, random);
// now renumber:
for (int i = 0; i < shuffled.size(); i++) {
FieldInfo oldInfo = shuffled.get(i);
// TODO: should we introduce "gaps" too?
FieldInfo newInfo = new FieldInfo(oldInfo.name, // name
i, // number
oldInfo.hasVectors(), // storeTermVector
oldInfo.omitsNorms(), // omitNorms
oldInfo.hasPayloads(), // storePayloads
oldInfo.getIndexOptions(), // indexOptions
oldInfo.getDocValuesType(), // docValuesType
oldInfo.getDocValuesGen(), // dvGen
oldInfo.attributes()); // attributes
shuffled.set(i, newInfo);
}
return new FieldInfos(shuffled.toArray(new FieldInfo[shuffled.size()]));
}
/**
* StoredFieldsVisitor that remaps actual field numbers
* to our new shuffled ones.
*/
// TODO: its strange this part of our IR api exposes FieldInfo,
// no other "user-accessible" codec apis do this?
class MismatchedVisitor extends StoredFieldVisitor {
final StoredFieldVisitor in;
MismatchedVisitor(StoredFieldVisitor in) {
this.in = in;
}
@Override
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
in.binaryField(renumber(fieldInfo), value);
}
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
in.stringField(renumber(fieldInfo), value);
}
@Override
public void intField(FieldInfo fieldInfo, int value) throws IOException {
in.intField(renumber(fieldInfo), value);
}
@Override
public void longField(FieldInfo fieldInfo, long value) throws IOException {
in.longField(renumber(fieldInfo), value);
}
@Override
public void floatField(FieldInfo fieldInfo, float value) throws IOException {
in.floatField(renumber(fieldInfo), value);
}
@Override
public void doubleField(FieldInfo fieldInfo, double value) throws IOException {
in.doubleField(renumber(fieldInfo), value);
}
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return in.needsField(renumber(fieldInfo));
}
FieldInfo renumber(FieldInfo original) {
FieldInfo renumbered = shuffled.fieldInfo(original.name);
if (renumbered == null) {
throw new AssertionError("stored fields sending bogus infos!");
}
return renumbered;
}
}
}

View File

@ -73,7 +73,7 @@ public class MockRandomMergePolicy extends MergePolicy {
// TODO: sometimes make more than 1 merge?
mergeSpec = new MergeSpecification();
final int segsToMerge = TestUtil.nextInt(random, 1, numSegments);
if (doNonBulkMerges) {
if (doNonBulkMerges && random.nextBoolean()) {
mergeSpec.add(new MockRandomOneMerge(segments.subList(0, segsToMerge),random.nextLong()));
} else {
mergeSpec.add(new OneMerge(segments.subList(0, segsToMerge)));
@ -106,7 +106,7 @@ public class MockRandomMergePolicy extends MergePolicy {
while(upto < eligibleSegments.size()) {
int max = Math.min(10, eligibleSegments.size()-upto);
int inc = max <= 2 ? max : TestUtil.nextInt(random, 2, max);
if (doNonBulkMerges) {
if (doNonBulkMerges && random.nextBoolean()) {
mergeSpec.add(new MockRandomOneMerge(eligibleSegments.subList(upto, upto+inc), random.nextLong()));
} else {
mergeSpec.add(new OneMerge(eligibleSegments.subList(upto, upto+inc)));
@ -151,9 +151,18 @@ public class MockRandomMergePolicy extends MergePolicy {
readers = new ArrayList<LeafReader>(super.getMergeReaders());
for (int i = 0; i < readers.size(); i++) {
// wrap it (e.g. prevent bulk merge etc)
if (r.nextInt(4) == 0) {
int thingToDo = r.nextInt(7);
if (thingToDo == 0) {
// simple no-op FilterReader
readers.set(i, new FilterLeafReader(readers.get(i)));
} else if (thingToDo == 1) {
// renumber fields
// NOTE: currently this only "blocks" bulk merges just by
// being a FilterReader. But it might find bugs elsewhere,
// and maybe the situation can be improved in the future.
readers.set(i, new MismatchedLeafReader(readers.get(i), r));
}
// otherwise, reader is unchanged
}
}
return readers;

View File

@ -89,6 +89,8 @@ import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.MismatchedDirectoryReader;
import org.apache.lucene.index.MismatchedLeafReader;
import org.apache.lucene.index.MockRandomMergePolicy;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
@ -1587,7 +1589,7 @@ public abstract class LuceneTestCase extends Assert {
// TODO: remove this, and fix those tests to wrap before putting slow around:
final boolean wasOriginallyAtomic = r instanceof LeafReader;
for (int i = 0, c = random.nextInt(6)+1; i < c; i++) {
switch(random.nextInt(5)) {
switch(random.nextInt(6)) {
case 0:
r = SlowCompositeReaderWrapper.wrap(r);
break;
@ -1628,6 +1630,13 @@ public abstract class LuceneTestCase extends Assert {
r = new AssertingDirectoryReader((DirectoryReader)r);
}
break;
case 5:
if (r instanceof LeafReader) {
r = new MismatchedLeafReader((LeafReader)r, random);
} else if (r instanceof DirectoryReader) {
r = new MismatchedDirectoryReader((DirectoryReader)r, random);
}
break;
default:
fail("should not get here");
}