mirror of https://github.com/apache/lucene.git
simplify BKDWriter's comparator tie break for its temp files; add test case verifying tie break by docID
This commit is contained in:
parent
5174c4934c
commit
f859bab35f
|
@ -716,32 +716,20 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
@Override
|
||||
public int compare(BytesRef a, BytesRef b) {
|
||||
|
||||
// First compare the bytes on the dimension we are sorting on:
|
||||
// First compare by the requested dimension we are sorting by:
|
||||
int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + bytesPerDim*dim, b.bytes, b.offset + bytesPerDim*dim);
|
||||
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
// Tie-break by docID:
|
||||
int offset;
|
||||
if (singleValuePerDoc) {
|
||||
offset = 0;
|
||||
} else if (longOrds) {
|
||||
offset = Long.BYTES;
|
||||
} else {
|
||||
offset = Integer.BYTES;
|
||||
}
|
||||
reader.reset(a.bytes, a.offset + packedBytesLength + offset, a.length);
|
||||
final int docIDA = reader.readInt();
|
||||
// Tie-break by docID ... no need to tie break on ord, for the case where the same doc has
|
||||
// the same value in a given dimension indexed more than once: it can't matter at search
|
||||
// time since we don't write ords into the index:
|
||||
|
||||
reader.reset(b.bytes, b.offset + packedBytesLength + offset, b.length);
|
||||
final int docIDB = reader.readInt();
|
||||
|
||||
// No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
|
||||
// can't matter at search time since we don't write ords into the index:
|
||||
return Integer.compare(docIDA, docIDB);
|
||||
return StringHelper.compare(Integer.BYTES,
|
||||
a.bytes, a.offset + packedBytesLength,
|
||||
b.bytes, b.offset + packedBytesLength);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -93,15 +93,15 @@ final class OfflinePointReader extends PointReader {
|
|||
assert countLeft == -1;
|
||||
return false;
|
||||
}
|
||||
docID = in.readInt();
|
||||
if (singleValuePerDoc == false) {
|
||||
if (longOrds) {
|
||||
ord = in.readLong();
|
||||
} else {
|
||||
ord = in.readInt();
|
||||
}
|
||||
docID = in.readInt();
|
||||
} else {
|
||||
ord = docID = in.readInt();
|
||||
ord = docID;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -68,6 +68,7 @@ final class OfflinePointWriter implements PointWriter {
|
|||
public void append(byte[] packedValue, long ord, int docID) throws IOException {
|
||||
assert packedValue.length == packedBytesLength;
|
||||
out.writeBytes(packedValue, 0, packedValue.length);
|
||||
out.writeInt(docID);
|
||||
if (singleValuePerDoc == false) {
|
||||
if (longOrds) {
|
||||
out.writeLong(ord);
|
||||
|
@ -76,7 +77,6 @@ final class OfflinePointWriter implements PointWriter {
|
|||
out.writeInt((int) ord);
|
||||
}
|
||||
}
|
||||
out.writeInt(docID);
|
||||
count++;
|
||||
assert expectedCount == 0 || count <= expectedCount;
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
|
||||
public void testBasicInts1D() throws Exception {
|
||||
try (Directory dir = getDirectory(100)) {
|
||||
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
|
||||
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
|
||||
byte[] scratch = new byte[4];
|
||||
for(int docID=0;docID<100;docID++) {
|
||||
NumericUtils.intToSortableBytes(docID, scratch, 0);
|
||||
|
@ -889,4 +889,42 @@ public class TestBKD extends LuceneTestCase {
|
|||
}
|
||||
fail("did not see a supporessed CorruptIndexException");
|
||||
}
|
||||
|
||||
public void testTieBreakOrder() throws Exception {
|
||||
try (Directory dir = newDirectory()) {
|
||||
int numDocs = 10000;
|
||||
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", 1, 4, 2, 0.01f, numDocs, true);
|
||||
for(int i=0;i<numDocs;i++) {
|
||||
w.add(new byte[Integer.BYTES], i);
|
||||
}
|
||||
|
||||
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
|
||||
long fp = w.finish(out);
|
||||
out.close();
|
||||
|
||||
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
|
||||
in.seek(fp);
|
||||
BKDReader r = new BKDReader(in);
|
||||
r.intersect(new IntersectVisitor() {
|
||||
int lastDocID = -1;
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
assertTrue("lastDocID=" + lastDocID + " docID=" + docID, docID > lastDocID);
|
||||
lastDocID = docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
visit(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPacked, byte[] maxPacked) {
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
});
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue