mirror of https://github.com/apache/lucene.git
simplify BKDWriter's comparator tie break for its temp files; add test case verifying tie break by docID
This commit is contained in:
parent
5174c4934c
commit
f859bab35f
|
@ -716,32 +716,20 @@ public class BKDWriter implements Closeable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(BytesRef a, BytesRef b) {
|
public int compare(BytesRef a, BytesRef b) {
|
||||||
|
// First compare by the requested dimension we are sorting by:
|
||||||
// First compare the bytes on the dimension we are sorting on:
|
|
||||||
int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + bytesPerDim*dim, b.bytes, b.offset + bytesPerDim*dim);
|
int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + bytesPerDim*dim, b.bytes, b.offset + bytesPerDim*dim);
|
||||||
|
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tie-break by docID:
|
// Tie-break by docID ... no need to tie break on ord, for the case where the same doc has
|
||||||
int offset;
|
// the same value in a given dimension indexed more than once: it can't matter at search
|
||||||
if (singleValuePerDoc) {
|
// time since we don't write ords into the index:
|
||||||
offset = 0;
|
|
||||||
} else if (longOrds) {
|
|
||||||
offset = Long.BYTES;
|
|
||||||
} else {
|
|
||||||
offset = Integer.BYTES;
|
|
||||||
}
|
|
||||||
reader.reset(a.bytes, a.offset + packedBytesLength + offset, a.length);
|
|
||||||
final int docIDA = reader.readInt();
|
|
||||||
|
|
||||||
reader.reset(b.bytes, b.offset + packedBytesLength + offset, b.length);
|
return StringHelper.compare(Integer.BYTES,
|
||||||
final int docIDB = reader.readInt();
|
a.bytes, a.offset + packedBytesLength,
|
||||||
|
b.bytes, b.offset + packedBytesLength);
|
||||||
// No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
|
|
||||||
// can't matter at search time since we don't write ords into the index:
|
|
||||||
return Integer.compare(docIDA, docIDB);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -93,15 +93,15 @@ final class OfflinePointReader extends PointReader {
|
||||||
assert countLeft == -1;
|
assert countLeft == -1;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
docID = in.readInt();
|
||||||
if (singleValuePerDoc == false) {
|
if (singleValuePerDoc == false) {
|
||||||
if (longOrds) {
|
if (longOrds) {
|
||||||
ord = in.readLong();
|
ord = in.readLong();
|
||||||
} else {
|
} else {
|
||||||
ord = in.readInt();
|
ord = in.readInt();
|
||||||
}
|
}
|
||||||
docID = in.readInt();
|
|
||||||
} else {
|
} else {
|
||||||
ord = docID = in.readInt();
|
ord = docID;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,6 +68,7 @@ final class OfflinePointWriter implements PointWriter {
|
||||||
public void append(byte[] packedValue, long ord, int docID) throws IOException {
|
public void append(byte[] packedValue, long ord, int docID) throws IOException {
|
||||||
assert packedValue.length == packedBytesLength;
|
assert packedValue.length == packedBytesLength;
|
||||||
out.writeBytes(packedValue, 0, packedValue.length);
|
out.writeBytes(packedValue, 0, packedValue.length);
|
||||||
|
out.writeInt(docID);
|
||||||
if (singleValuePerDoc == false) {
|
if (singleValuePerDoc == false) {
|
||||||
if (longOrds) {
|
if (longOrds) {
|
||||||
out.writeLong(ord);
|
out.writeLong(ord);
|
||||||
|
@ -76,7 +77,6 @@ final class OfflinePointWriter implements PointWriter {
|
||||||
out.writeInt((int) ord);
|
out.writeInt((int) ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out.writeInt(docID);
|
|
||||||
count++;
|
count++;
|
||||||
assert expectedCount == 0 || count <= expectedCount;
|
assert expectedCount == 0 || count <= expectedCount;
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,7 +45,7 @@ public class TestBKD extends LuceneTestCase {
|
||||||
|
|
||||||
public void testBasicInts1D() throws Exception {
|
public void testBasicInts1D() throws Exception {
|
||||||
try (Directory dir = getDirectory(100)) {
|
try (Directory dir = getDirectory(100)) {
|
||||||
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
|
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
|
||||||
byte[] scratch = new byte[4];
|
byte[] scratch = new byte[4];
|
||||||
for(int docID=0;docID<100;docID++) {
|
for(int docID=0;docID<100;docID++) {
|
||||||
NumericUtils.intToSortableBytes(docID, scratch, 0);
|
NumericUtils.intToSortableBytes(docID, scratch, 0);
|
||||||
|
@ -889,4 +889,42 @@ public class TestBKD extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
fail("did not see a supporessed CorruptIndexException");
|
fail("did not see a supporessed CorruptIndexException");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTieBreakOrder() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
int numDocs = 10000;
|
||||||
|
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", 1, 4, 2, 0.01f, numDocs, true);
|
||||||
|
for(int i=0;i<numDocs;i++) {
|
||||||
|
w.add(new byte[Integer.BYTES], i);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
|
||||||
|
long fp = w.finish(out);
|
||||||
|
out.close();
|
||||||
|
|
||||||
|
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
|
||||||
|
in.seek(fp);
|
||||||
|
BKDReader r = new BKDReader(in);
|
||||||
|
r.intersect(new IntersectVisitor() {
|
||||||
|
int lastDocID = -1;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visit(int docID) {
|
||||||
|
assertTrue("lastDocID=" + lastDocID + " docID=" + docID, docID > lastDocID);
|
||||||
|
lastDocID = docID;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visit(int docID, byte[] packedValue) {
|
||||||
|
visit(docID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Relation compare(byte[] minPacked, byte[] maxPacked) {
|
||||||
|
return Relation.CELL_CROSSES_QUERY;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue