LUCENE-4085: improve TestBackwardsCompatibility to test Lucene 4.x features

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1349510 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-06-12 20:42:19 +00:00
parent bc9a59a692
commit 919f9ce695
5 changed files with 109 additions and 3 deletions

View File

@ -29,13 +29,24 @@ import java.util.Map;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.ByteDocValuesField;
import org.apache.lucene.document.DerefBytesDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.IntDocValuesField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongDocValuesField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.PackedLongDocValuesField;
import org.apache.lucene.document.ShortDocValuesField;
import org.apache.lucene.document.SortedBytesDocValuesField;
import org.apache.lucene.document.StraightBytesDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
@ -52,6 +63,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -63,6 +75,8 @@ import org.junit.BeforeClass;
// TODO: don't use 4.x codec, its unrealistic since it means
// we won't even be running the actual code, only the impostor
// @SuppressCodecs("Lucene4x")
// Sep codec cannot yet handle the offsets in our 4.x index!
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
public class TestBackwardsCompatibility extends LuceneTestCase {
// Uncomment these cases & run them on an older Lucene
@ -305,6 +319,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
IndexSearcher searcher = new IndexSearcher(reader);
_TestUtil.checkIndex(dir);
// true if this is a 4.0+ index
final boolean is40Index = MultiFields.getMergedFieldInfos(reader).fieldInfo("content5") != null;
assert is40Index; // NOTE: currently we can only do this on trunk!
final Bits liveDocs = MultiFields.getLiveDocs(reader);
@ -312,8 +330,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
if (liveDocs.get(i)) {
Document d = reader.document(i);
List<IndexableField> fields = d.getFields();
if (d.getField("content3") == null) {
final int numFields = 5;
boolean isProxDoc = d.getField("content3") == null;
if (isProxDoc) {
final int numFields = is40Index ? 7 : 5;
assertEquals(numFields, fields.size());
IndexableField f = d.getField("id");
assertEquals(""+i, f.stringValue());
@ -341,6 +360,48 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
}
if (is40Index) {
// check docvalues fields
Source dvByte = MultiDocValues.getDocValues(reader, "dvByte").getSource();
Source dvBytesDerefFixed = MultiDocValues.getDocValues(reader, "dvBytesDerefFixed").getSource();
Source dvBytesDerefVar = MultiDocValues.getDocValues(reader, "dvBytesDerefVar").getSource();
Source dvBytesSortedFixed = MultiDocValues.getDocValues(reader, "dvBytesSortedFixed").getSource();
Source dvBytesSortedVar = MultiDocValues.getDocValues(reader, "dvBytesSortedVar").getSource();
Source dvBytesStraightFixed = MultiDocValues.getDocValues(reader, "dvBytesStraightFixed").getSource();
Source dvBytesStraightVar = MultiDocValues.getDocValues(reader, "dvBytesStraightVar").getSource();
Source dvDouble = MultiDocValues.getDocValues(reader, "dvDouble").getSource();
Source dvFloat = MultiDocValues.getDocValues(reader, "dvFloat").getSource();
Source dvInt = MultiDocValues.getDocValues(reader, "dvInt").getSource();
Source dvLong = MultiDocValues.getDocValues(reader, "dvLong").getSource();
Source dvPacked = MultiDocValues.getDocValues(reader, "dvPacked").getSource();
Source dvShort = MultiDocValues.getDocValues(reader, "dvShort").getSource();
for (int i=0;i<35;i++) {
int id = Integer.parseInt(reader.document(i).get("id"));
assertEquals((byte)id, dvByte.getInt(i));
byte bytes[] = new byte[] {
(byte)(id >>> 24), (byte)(id >>> 16),(byte)(id >>> 8),(byte)id
};
BytesRef expectedRef = new BytesRef(bytes);
BytesRef scratch = new BytesRef();
assertEquals(expectedRef, dvBytesDerefFixed.getBytes(i, scratch));
assertEquals(expectedRef, dvBytesDerefVar.getBytes(i, scratch));
assertEquals(expectedRef, dvBytesSortedFixed.getBytes(i, scratch));
assertEquals(expectedRef, dvBytesSortedVar.getBytes(i, scratch));
assertEquals(expectedRef, dvBytesStraightFixed.getBytes(i, scratch));
assertEquals(expectedRef, dvBytesStraightVar.getBytes(i, scratch));
assertEquals((double)id, dvDouble.getFloat(i), 0D);
assertEquals((float)id, dvFloat.getFloat(i), 0F);
assertEquals(id, dvInt.getInt(i));
assertEquals(id, dvLong.getInt(i));
assertEquals(id, dvPacked.getInt(i));
assertEquals(id, dvShort.getInt(i));
}
}
ScoreDoc[] hits = searcher.search(new TermQuery(new Term(new String("content"), "aaa")), null, 1000).scoreDocs;
// First document should be #0
@ -348,6 +409,16 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertEquals("didn't get the right document first", "0", d.get("id"));
doTestHits(hits, 34, searcher.getIndexReader());
if (is40Index) {
hits = searcher.search(new TermQuery(new Term(new String("content5"), "aaa")), null, 1000).scoreDocs;
doTestHits(hits, 34, searcher.getIndexReader());
hits = searcher.search(new TermQuery(new Term(new String("content6"), "aaa")), null, 1000).scoreDocs;
doTestHits(hits, 34, searcher.getIndexReader());
}
hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).scoreDocs;
assertEquals(34, hits.length);
@ -432,7 +503,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
public File createIndex(String dirName, boolean doCFS, boolean fullyMerged) throws IOException {
// we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName);
File indexDir = new File("/tmp/4x", dirName);
_TestUtil.rmDir(indexDir);
Directory dir = newFSDirectory(indexDir);
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
@ -569,6 +640,41 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// add numeric fields, to test if flex preserves encoding
doc.add(new IntField("trieInt", id, Field.Store.NO));
doc.add(new LongField("trieLong", (long) id, Field.Store.NO));
// add docvalues fields
doc.add(new ByteDocValuesField("dvByte", (byte) id));
byte bytes[] = new byte[] {
(byte)(id >>> 24), (byte)(id >>> 16),(byte)(id >>> 8),(byte)id
};
BytesRef ref = new BytesRef(bytes);
doc.add(new DerefBytesDocValuesField("dvBytesDerefFixed", ref, true));
doc.add(new DerefBytesDocValuesField("dvBytesDerefVar", ref, false));
doc.add(new SortedBytesDocValuesField("dvBytesSortedFixed", ref, true));
doc.add(new SortedBytesDocValuesField("dvBytesSortedVar", ref, false));
doc.add(new StraightBytesDocValuesField("dvBytesStraightFixed", ref, true));
doc.add(new StraightBytesDocValuesField("dvBytesStraightVar", ref, false));
doc.add(new DoubleDocValuesField("dvDouble", (double)id));
doc.add(new FloatDocValuesField("dvFloat", (float)id));
doc.add(new IntDocValuesField("dvInt", id));
doc.add(new LongDocValuesField("dvLong", id));
doc.add(new PackedLongDocValuesField("dvPacked", id));
doc.add(new ShortDocValuesField("dvShort", (short)id));
// a field with both offsets and term vectors for a cross-check
FieldType customType3 = new FieldType(TextField.TYPE_STORED);
customType3.setStoreTermVectors(true);
customType3.setStoreTermVectorPositions(true);
customType3.setStoreTermVectorOffsets(true);
customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
// a field that omits only positions
FieldType customType4 = new FieldType(TextField.TYPE_STORED);
customType4.setStoreTermVectors(true);
customType4.setStoreTermVectorPositions(false);
customType4.setStoreTermVectorOffsets(true);
customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
// TODO:
// index different norms types via similarity (we use a random one currently?!)
// remove any analyzer randomness, explicitly add payloads for certain fields.
writer.addDocument(doc);
}