LUCENE-7000: Split BasePointFormatTestCase from TestPointValues

This commit is contained in:
Robert Muir 2016-01-29 04:27:27 -05:00
parent 0607f007bf
commit ae6a971c35
5 changed files with 1053 additions and 898 deletions

View File

@ -0,0 +1,33 @@
package org.apache.lucene.codecs.simpletext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BasePointFormatTestCase;
/**
* Tests SimpleText's point format
*/
public class TestSimpleTextPointFormat extends BasePointFormatTestCase {
private final Codec codec = new SimpleTextCodec();
@Override
protected Codec getCodec() {
return codec;
}
}

View File

@ -0,0 +1,83 @@
package org.apache.lucene.codecs.lucene60;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointFormat;
import org.apache.lucene.codecs.PointReader;
import org.apache.lucene.codecs.PointWriter;
import org.apache.lucene.index.BasePointFormatTestCase;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.TestUtil;
/**
* Tests Lucene60PointFormat
*/
public class TestLucene60PointFormat extends BasePointFormatTestCase {
private final Codec codec;
public TestLucene60PointFormat() {
// standard issue
Codec defaultCodec = TestUtil.getDefaultCodec();
if (random().nextBoolean()) {
// randomize parameters
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
double maxMBSortInHeap = 0.1 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
// sneaky impersonation!
codec = new FilterCodec(defaultCodec.getName(), defaultCodec) {
@Override
public PointFormat pointFormat() {
return new PointFormat() {
@Override
public PointWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override
public PointReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene60PointReader(readState);
}
};
}
};
} else {
// standard issue
codec = defaultCodec;
}
}
@Override
protected Codec getCodec() {
return codec;
}
@Override
public void testMergeStability() throws Exception {
assumeFalse("TODO: mess with the parameters and test gets angry!", codec instanceof FilterCodec);
super.testMergeStability();
}
}

View File

@ -0,0 +1,882 @@
package org.apache.lucene.index;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Abstract class to do basic tests for a points format.
* NOTE: This test focuses on the points impl, nothing else.
* The [stretch] goal is for this test to be
* so thorough in testing a new PointFormat that if this
* test passes, then all Lucene/Solr tests should also pass. Ie,
* if there is some bug in a given PointFormat that this
* test fails to catch then this test needs to be improved! */
public abstract class BasePointFormatTestCase extends BaseIndexFileFormatTestCase {
@Override
protected void addRandomFields(Document doc) {
final int numValues = random().nextInt(3);
for (int i = 0; i < numValues; i++) {
doc.add(new IntPoint("f", random().nextInt()));
}
}
public void testBasic() throws Exception {
Directory dir = getDirectory(20);
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter w = new IndexWriter(dir, iwc);
byte[] point = new byte[4];
for(int i=0;i<20;i++) {
Document doc = new Document();
NumericUtils.intToBytes(i, point, 0);
doc.add(new BinaryPoint("dim", point));
w.addDocument(doc);
}
w.forceMerge(1);
w.close();
DirectoryReader r = DirectoryReader.open(dir);
LeafReader sub = getOnlySegmentReader(r);
PointValues values = sub.getPointValues();
// Simple test: make sure intersect can visit every doc:
BitSet seen = new BitSet();
values.intersect("dim",
new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) {
seen.set(docID);
assertEquals(docID, NumericUtils.bytesToInt(packedValue, 0));
}
});
assertEquals(20, seen.cardinality());
IOUtils.close(r, dir);
}
public void testMerge() throws Exception {
Directory dir = getDirectory(20);
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter w = new IndexWriter(dir, iwc);
byte[] point = new byte[4];
for(int i=0;i<20;i++) {
Document doc = new Document();
NumericUtils.intToBytes(i, point, 0);
doc.add(new BinaryPoint("dim", point));
w.addDocument(doc);
if (i == 10) {
w.commit();
}
}
w.forceMerge(1);
w.close();
DirectoryReader r = DirectoryReader.open(dir);
LeafReader sub = getOnlySegmentReader(r);
PointValues values = sub.getPointValues();
// Simple test: make sure intersect can visit every doc:
BitSet seen = new BitSet();
values.intersect("dim",
new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) {
seen.set(docID);
assertEquals(docID, NumericUtils.bytesToInt(packedValue, 0));
}
});
assertEquals(20, seen.cardinality());
IOUtils.close(r, dir);
}
public void testAllPointDocsDeletedInSegment() throws Exception {
Directory dir = getDirectory(20);
IndexWriterConfig iwc = newIndexWriterConfig();
IndexWriter w = new IndexWriter(dir, iwc);
byte[] point = new byte[4];
for(int i=0;i<10;i++) {
Document doc = new Document();
NumericUtils.intToBytes(i, point, 0);
doc.add(new BinaryPoint("dim", point));
doc.add(new NumericDocValuesField("id", i));
doc.add(newStringField("x", "x", Field.Store.NO));
w.addDocument(doc);
}
w.addDocument(new Document());
w.deleteDocuments(new Term("x", "x"));
if (random().nextBoolean()) {
w.forceMerge(1);
}
w.close();
DirectoryReader r = DirectoryReader.open(dir);
assertEquals(1, r.numDocs());
PointValues values = MultiPointValues.get(r);
Bits liveDocs = MultiFields.getLiveDocs(r);
NumericDocValues idValues = MultiDocValues.getNumericValues(r, "id");
if (values != null) {
BitSet seen = new BitSet();
values.intersect("dim",
new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) {
if (liveDocs.get(docID)) {
seen.set(docID);
}
assertEquals(idValues.get(docID), NumericUtils.bytesToInt(packedValue, 0));
}
});
assertEquals(0, seen.cardinality());
}
IOUtils.close(r, dir);
}
/** Make sure we close open files, delete temp files, etc., on exception */
public void testWithExceptions() throws Exception {
int numDocs = atLeast(10000);
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDims][];
for(int dim=0;dim<numDims;dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues[docID] = values;
}
// Keep retrying until we 1) we allow a big enough heap, and 2) we hit a random IOExc from MDW:
boolean done = false;
while (done == false) {
try (MockDirectoryWrapper dir = newMockFSDirectory(createTempDir())) {
try {
dir.setRandomIOExceptionRate(0.05);
dir.setRandomIOExceptionRateOnOpen(0.05);
if (dir instanceof MockDirectoryWrapper) {
dir.setEnableVirusScanner(false);
}
verify(dir, docValues, null, numDims, numBytesPerDim, true);
} catch (IllegalStateException ise) {
if (ise.getMessage().contains("this writer hit an unrecoverable error")) {
Throwable cause = ise.getCause();
if (cause != null && cause.getMessage().contains("a random IOException")) {
done = true;
} else {
throw ise;
}
} else {
throw ise;
}
} catch (AssertionError ae) {
if (ae.getMessage().contains("does not exist; files=")) {
// OK: likely we threw the random IOExc when IW was asserting the commit files exist
done = true;
} else {
throw ae;
}
} catch (IllegalArgumentException iae) {
// This just means we got a too-small maxMB for the maxPointsInLeafNode; just retry w/ more heap
assertTrue(iae.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
} catch (IOException ioe) {
String message = ioe.getMessage();
if (message.contains("a random IOException") || message.contains("background merge hit exception")) {
// BKDWriter should fully clean up after itself:
done = true;
} else {
throw ioe;
}
}
}
}
}
public void testMultiValued() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numDocs = atLeast(1000);
List<byte[][]> docValues = new ArrayList<>();
List<Integer> docIDs = new ArrayList<>();
for(int docID=0;docID<numDocs;docID++) {
int numValuesInDoc = TestUtil.nextInt(random(), 1, 5);
for(int ord=0;ord<numValuesInDoc;ord++) {
docIDs.add(docID);
byte[][] values = new byte[numDims][];
for(int dim=0;dim<numDims;dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues.add(values);
}
}
byte[][][] docValuesArray = docValues.toArray(new byte[docValues.size()][][]);
int[] docIDsArray = new int[docIDs.size()];
for(int i=0;i<docIDsArray.length;i++) {
docIDsArray[i] = docIDs.get(i);
}
verify(docValuesArray, docIDsArray, numDims, numBytesPerDim);
}
public void testAllEqual() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numDocs = atLeast(1000);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
if (docID == 0) {
byte[][] values = new byte[numDims][];
for(int dim=0;dim<numDims;dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues[docID] = values;
} else {
docValues[docID] = docValues[0];
}
}
verify(docValues, null, numDims, numBytesPerDim);
}
public void testOneDimEqual() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numDocs = atLeast(1000);
int theEqualDim = random().nextInt(numDims);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDims][];
for(int dim=0;dim<numDims;dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues[docID] = values;
if (docID > 0) {
docValues[docID][theEqualDim] = docValues[0][theEqualDim];
}
}
verify(docValues, null, numDims, numBytesPerDim);
}
// Tests on N-dimensional points where each dimension is a BigInteger
public void testBigIntNDims() throws Exception {
int numDocs = atLeast(1000);
try (Directory dir = getDirectory(numDocs)) {
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// We rely on docIDs not changing:
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
BigInteger[][] docs = new BigInteger[numDocs][];
for(int docID=0;docID<numDocs;docID++) {
BigInteger[] values = new BigInteger[numDims];
if (VERBOSE) {
System.out.println(" docID=" + docID);
}
byte[][] bytes = new byte[numDims][];
for(int dim=0;dim<numDims;dim++) {
values[dim] = randomBigInt(numBytesPerDim);
bytes[dim] = new byte[numBytesPerDim];
NumericUtils.bigIntToBytes(values[dim], bytes[dim], 0, numBytesPerDim);
if (VERBOSE) {
System.out.println(" " + dim + " -> " + values[dim]);
}
}
docs[docID] = values;
Document doc = new Document();
doc.add(new BinaryPoint("field", bytes));
w.addDocument(doc);
}
DirectoryReader r = w.getReader();
w.close();
PointValues dimValues = MultiPointValues.get(r);
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
}
// Random N dims rect query:
BigInteger[] queryMin = new BigInteger[numDims];
BigInteger[] queryMax = new BigInteger[numDims];
for(int dim=0;dim<numDims;dim++) {
queryMin[dim] = randomBigInt(numBytesPerDim);
queryMax[dim] = randomBigInt(numBytesPerDim);
if (queryMin[dim].compareTo(queryMax[dim]) > 0) {
BigInteger x = queryMin[dim];
queryMin[dim] = queryMax[dim];
queryMax[dim] = x;
}
if (VERBOSE) {
System.out.println(" " + dim + "\n min=" + queryMin[dim] + "\n max=" + queryMax[dim]);
}
}
final BitSet hits = new BitSet();
dimValues.intersect("field", new IntersectVisitor() {
@Override
public void visit(int docID) {
hits.set(docID);
//System.out.println("visit docID=" + docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
//System.out.println("visit check docID=" + docID);
for(int dim=0;dim<numDims;dim++) {
BigInteger x = NumericUtils.bytesToBigInt(packedValue, dim, numBytesPerDim);
if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
//System.out.println(" no");
return;
}
}
//System.out.println(" yes");
hits.set(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
boolean crosses = false;
for(int dim=0;dim<numDims;dim++) {
BigInteger min = NumericUtils.bytesToBigInt(minPacked, dim, numBytesPerDim);
BigInteger max = NumericUtils.bytesToBigInt(maxPacked, dim, numBytesPerDim);
assert max.compareTo(min) >= 0;
if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
crosses = true;
}
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
for(int docID=0;docID<numDocs;docID++) {
BigInteger[] docValues = docs[docID];
boolean expected = true;
for(int dim=0;dim<numDims;dim++) {
BigInteger x = docValues[dim];
if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
expected = false;
break;
}
}
boolean actual = hits.get(docID);
assertEquals("docID=" + docID, expected, actual);
}
}
r.close();
}
}
public void testRandomBinaryTiny() throws Exception {
doTestRandomBinary(10);
}
public void testRandomBinaryMedium() throws Exception {
doTestRandomBinary(10000);
}
@Nightly
public void testRandomBinaryBig() throws Exception {
assumeFalse("too slow with SimpleText", Codec.getDefault().getName().equals("SimpleText"));
doTestRandomBinary(200000);
}
private void doTestRandomBinary(int count) throws Exception {
int numDocs = TestUtil.nextInt(random(), count, count*2);
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDims][];
for(int dim=0;dim<numDims;dim++) {
values[dim] = new byte[numBytesPerDim];
// TODO: sometimes test on a "small" volume too, so we test the high density cases, higher chance of boundary, etc. cases:
random().nextBytes(values[dim]);
}
docValues[docID] = values;
}
verify(docValues, null, numDims, numBytesPerDim);
}
/** docIDs can be null, for the single valued case, else it maps value to docID, but all values for one doc must be adjacent */
private void verify(byte[][][] docValues, int[] docIDs, int numDims, int numBytesPerDim) throws Exception {
try (Directory dir = getDirectory(docValues.length)) {
while (true) {
try {
verify(dir, docValues, docIDs, numDims, numBytesPerDim, false);
return;
} catch (IllegalArgumentException iae) {
// This just means we got a too-small maxMB for the maxPointsInLeafNode; just retry
assertTrue(iae.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
}
}
}
}
private void verify(Directory dir, byte[][][] docValues, int[] ids, int numDims, int numBytesPerDim, boolean expectExceptions) throws Exception {
int numValues = docValues.length;
if (VERBOSE) {
System.out.println("TEST: numValues=" + numValues + " numDims=" + numDims + " numBytesPerDim=" + numBytesPerDim);
}
// RandomIndexWriter is too slow:
boolean useRealWriter = docValues.length > 10000;
IndexWriterConfig iwc;
if (useRealWriter) {
iwc = new IndexWriterConfig(new MockAnalyzer(random()));
} else {
iwc = newIndexWriterConfig();
}
if (expectExceptions) {
MergeScheduler ms = iwc.getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
((ConcurrentMergeScheduler) ms).setSuppressExceptions();
}
}
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
DirectoryReader r = null;
// Compute actual min/max values:
byte[][] expectedMinValues = new byte[numDims][];
byte[][] expectedMaxValues = new byte[numDims][];
for(int ord=0;ord<docValues.length;ord++) {
for(int dim=0;dim<numDims;dim++) {
if (ord == 0) {
expectedMinValues[dim] = new byte[numBytesPerDim];
System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], 0, numBytesPerDim);
expectedMaxValues[dim] = new byte[numBytesPerDim];
System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim);
} else {
// TODO: it's cheating that we use StringHelper.compare for "truth": what if it's buggy?
if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, expectedMinValues[dim], 0) < 0) {
System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], 0, numBytesPerDim);
}
if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, expectedMaxValues[dim], 0) > 0) {
System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim);
}
}
}
}
// 20% of the time we add into a separate directory, then at some point use
// addIndexes to bring the indexed point values to the main directory:
Directory saveDir;
RandomIndexWriter saveW;
int addIndexesAt;
if (random().nextInt(5) == 1) {
saveDir = dir;
saveW = w;
dir = getDirectory(numValues);
if (useRealWriter) {
iwc = new IndexWriterConfig(new MockAnalyzer(random()));
} else {
iwc = newIndexWriterConfig();
}
if (expectExceptions) {
MergeScheduler ms = iwc.getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
((ConcurrentMergeScheduler) ms).setSuppressExceptions();
}
}
w = new RandomIndexWriter(random(), dir, iwc);
addIndexesAt = TestUtil.nextInt(random(), 1, numValues-1);
} else {
saveW = null;
saveDir = null;
addIndexesAt = 0;
}
try {
Document doc = null;
int lastID = -1;
for(int ord=0;ord<numValues;ord++) {
int id;
if (ids == null) {
id = ord;
} else {
id = ids[ord];
}
if (id != lastID) {
if (doc != null) {
if (useRealWriter) {
w.w.addDocument(doc);
} else {
w.addDocument(doc);
}
}
doc = new Document();
doc.add(new NumericDocValuesField("id", id));
}
doc.add(new BinaryPoint("field", docValues[ord]));
lastID = id;
if (random().nextInt(30) == 17) {
// randomly index some documents without this field
if (useRealWriter) {
w.w.addDocument(new Document());
} else {
w.addDocument(new Document());
}
if (VERBOSE) {
System.out.println("add empty doc");
}
}
if (random().nextInt(30) == 17) {
// randomly index some documents with this field, but we will delete them:
Document xdoc = new Document();
xdoc.add(new BinaryPoint("field", docValues[ord]));
xdoc.add(new StringField("nukeme", "yes", Field.Store.NO));
if (useRealWriter) {
w.w.addDocument(xdoc);
} else {
w.addDocument(xdoc);
}
if (VERBOSE) {
System.out.println("add doc doc-to-delete");
}
if (random().nextInt(5) == 1) {
if (useRealWriter) {
w.w.deleteDocuments(new Term("nukeme", "yes"));
} else {
w.deleteDocuments(new Term("nukeme", "yes"));
}
}
}
if (VERBOSE) {
System.out.println(" ord=" + ord + " id=" + id);
for(int dim=0;dim<numDims;dim++) {
System.out.println(" dim=" + dim + " value=" + new BytesRef(docValues[ord][dim]));
}
}
if (saveW != null && ord >= addIndexesAt) {
switchIndex(w, dir, saveW);
w = saveW;
dir = saveDir;
saveW = null;
saveDir = null;
}
}
w.addDocument(doc);
w.deleteDocuments(new Term("nukeme", "yes"));
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("\nTEST: now force merge");
}
w.forceMerge(1);
}
r = w.getReader();
w.close();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
}
PointValues dimValues = MultiPointValues.get(r);
if (VERBOSE) {
System.out.println(" dimValues=" + dimValues);
}
assertNotNull(dimValues);
NumericDocValues idValues = MultiDocValues.getNumericValues(r, "id");
Bits liveDocs = MultiFields.getLiveDocs(r);
// Verify min/max values are correct:
byte[] minValues = dimValues.getMinPackedValue("field");
byte[] maxValues = dimValues.getMaxPackedValue("field");
byte[] scratch = new byte[numBytesPerDim];
for(int dim=0;dim<numDims;dim++) {
System.arraycopy(minValues, dim*numBytesPerDim, scratch, 0, scratch.length);
//System.out.println("dim=" + dim + " expectedMin=" + new BytesRef(expectedMinValues[dim]) + " min=" + new BytesRef(scratch));
assertTrue(Arrays.equals(expectedMinValues[dim], scratch));
System.arraycopy(maxValues, dim*numBytesPerDim, scratch, 0, scratch.length);
//System.out.println("dim=" + dim + " expectedMax=" + new BytesRef(expectedMaxValues[dim]) + " max=" + new BytesRef(scratch));
assertTrue(Arrays.equals(expectedMaxValues[dim], scratch));
}
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
}
// Random N dims rect query:
byte[][] queryMin = new byte[numDims][];
byte[][] queryMax = new byte[numDims][];
for(int dim=0;dim<numDims;dim++) {
queryMin[dim] = new byte[numBytesPerDim];
random().nextBytes(queryMin[dim]);
queryMax[dim] = new byte[numBytesPerDim];
random().nextBytes(queryMax[dim]);
if (NumericUtils.compare(numBytesPerDim, queryMin[dim], 0, queryMax[dim], 0) > 0) {
byte[] x = queryMin[dim];
queryMin[dim] = queryMax[dim];
queryMax[dim] = x;
}
}
if (VERBOSE) {
for(int dim=0;dim<numDims;dim++) {
System.out.println(" dim=" + dim + "\n queryMin=" + new BytesRef(queryMin[dim]) + "\n queryMax=" + new BytesRef(queryMax[dim]));
}
}
final BitSet hits = new BitSet();
dimValues.intersect("field", new PointValues.IntersectVisitor() {
@Override
public void visit(int docID) {
if (liveDocs == null || liveDocs.get(docID)) {
hits.set((int) idValues.get(docID));
}
//System.out.println("visit docID=" + docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (liveDocs != null && liveDocs.get(docID) == false) {
return;
}
//System.out.println("visit check docID=" + docID + " id=" + idValues.get(docID));
for(int dim=0;dim<numDims;dim++) {
//System.out.println(" dim=" + dim + " value=" + new BytesRef(packedValue, dim*numBytesPerDim, numBytesPerDim));
if (NumericUtils.compare(numBytesPerDim, packedValue, dim, queryMin[dim], 0) < 0 ||
NumericUtils.compare(numBytesPerDim, packedValue, dim, queryMax[dim], 0) > 0) {
//System.out.println(" no");
return;
}
}
//System.out.println(" yes");
hits.set((int) idValues.get(docID));
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
boolean crosses = false;
//System.out.println("compare");
for(int dim=0;dim<numDims;dim++) {
if (NumericUtils.compare(numBytesPerDim, maxPacked, dim, queryMin[dim], 0) < 0 ||
NumericUtils.compare(numBytesPerDim, minPacked, dim, queryMax[dim], 0) > 0) {
//System.out.println(" query_outside_cell");
return Relation.CELL_OUTSIDE_QUERY;
} else if (NumericUtils.compare(numBytesPerDim, minPacked, dim, queryMin[dim], 0) < 0 ||
NumericUtils.compare(numBytesPerDim, maxPacked, dim, queryMax[dim], 0) > 0) {
crosses = true;
}
}
if (crosses) {
//System.out.println(" query_crosses_cell");
return Relation.CELL_CROSSES_QUERY;
} else {
//System.out.println(" cell_inside_query");
return Relation.CELL_INSIDE_QUERY;
}
}
});
BitSet expected = new BitSet();
for(int ord=0;ord<numValues;ord++) {
boolean matches = true;
for(int dim=0;dim<numDims;dim++) {
byte[] x = docValues[ord][dim];
if (NumericUtils.compare(numBytesPerDim, x, 0, queryMin[dim], 0) < 0 ||
NumericUtils.compare(numBytesPerDim, x, 0, queryMax[dim], 0) > 0) {
matches = false;
break;
}
}
if (matches) {
int id;
if (ids == null) {
id = ord;
} else {
id = ids[ord];
}
expected.set(id);
}
}
int limit = Math.max(expected.length(), hits.length());
int failCount = 0;
int successCount = 0;
for(int id=0;id<limit;id++) {
if (expected.get(id) != hits.get(id)) {
System.out.println("FAIL: id=" + id);
failCount++;
} else {
successCount++;
}
}
if (failCount != 0) {
for(int docID=0;docID<r.maxDoc();docID++) {
System.out.println(" docID=" + docID + " id=" + idValues.get(docID));
}
fail(failCount + " docs failed; " + successCount + " docs succeeded");
}
}
} finally {
IOUtils.closeWhileHandlingException(r, w, saveW, saveDir == null ? null : dir);
}
}
private void switchIndex(RandomIndexWriter w, Directory dir, RandomIndexWriter saveW) throws IOException {
if (random().nextBoolean()) {
// Add via readers:
try (DirectoryReader r = w.getReader()) {
if (random().nextBoolean()) {
// Add via CodecReaders:
List<CodecReader> subs = new ArrayList<>();
for (LeafReaderContext context : r.leaves()) {
subs.add((CodecReader) context.reader());
}
if (VERBOSE) {
System.out.println("TEST: now use addIndexes(CodecReader[]) to switch writers");
}
saveW.addIndexes(subs.toArray(new CodecReader[subs.size()]));
} else {
if (VERBOSE) {
System.out.println("TEST: now use TestUtil.addIndexesSlowly(DirectoryReader[]) to switch writers");
}
TestUtil.addIndexesSlowly(saveW.w, r);
}
}
} else {
// Add via directory:
if (VERBOSE) {
System.out.println("TEST: now use addIndexes(Directory[]) to switch writers");
}
w.close();
saveW.addIndexes(new Directory[] {dir});
}
w.close();
dir.close();
}
private BigInteger randomBigInt(int numBytes) {
BigInteger x = new BigInteger(numBytes*8-1, random());
if (random().nextBoolean()) {
x = x.negate();
}
return x;
}
private static Directory noVirusChecker(Directory dir) {
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
private Directory getDirectory(int numPoints) throws IOException {
Directory dir;
if (numPoints > 100000) {
dir = newFSDirectory(createTempDir("TestBKDTree"));
} else {
dir = newDirectory();
}
noVirusChecker(dir);
//dir = FSDirectory.open(createTempDir());
return dir;
}
}

View File

@ -0,0 +1,31 @@
package org.apache.lucene.codecs.asserting;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BasePointFormatTestCase;
/** Test AssertingPointFormat directly */
public class TestAssertingPointFormat extends BasePointFormatTestCase {
private final Codec codec = new AssertingCodec();
@Override
protected Codec getCodec() {
return codec;
}
}