simplify BKDWriter's comparator tie break for its temp files; add test case verifying tie break by docID

This commit is contained in:
Mike McCandless 2016-03-24 10:02:48 -04:00
parent 5174c4934c
commit f859bab35f
4 changed files with 49 additions and 23 deletions

View File

@ -716,32 +716,20 @@ public class BKDWriter implements Closeable {
@Override
public int compare(BytesRef a, BytesRef b) {
// First compare the bytes on the dimension we are sorting on:
// First compare by the requested dimension we are sorting by:
int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + bytesPerDim*dim, b.bytes, b.offset + bytesPerDim*dim);
if (cmp != 0) {
return cmp;
}
// Tie-break by docID:
int offset;
if (singleValuePerDoc) {
offset = 0;
} else if (longOrds) {
offset = Long.BYTES;
} else {
offset = Integer.BYTES;
}
reader.reset(a.bytes, a.offset + packedBytesLength + offset, a.length);
final int docIDA = reader.readInt();
// Tie-break by docID ... no need to tie break on ord, for the case where the same doc has
// the same value in a given dimension indexed more than once: it can't matter at search
// time since we don't write ords into the index:
reader.reset(b.bytes, b.offset + packedBytesLength + offset, b.length);
final int docIDB = reader.readInt();
// No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
// can't matter at search time since we don't write ords into the index:
return Integer.compare(docIDA, docIDB);
return StringHelper.compare(Integer.BYTES,
a.bytes, a.offset + packedBytesLength,
b.bytes, b.offset + packedBytesLength);
}
};

View File

@ -93,15 +93,15 @@ final class OfflinePointReader extends PointReader {
assert countLeft == -1;
return false;
}
docID = in.readInt();
if (singleValuePerDoc == false) {
if (longOrds) {
ord = in.readLong();
} else {
ord = in.readInt();
}
docID = in.readInt();
} else {
ord = docID = in.readInt();
ord = docID;
}
return true;
}

View File

@ -68,6 +68,7 @@ final class OfflinePointWriter implements PointWriter {
public void append(byte[] packedValue, long ord, int docID) throws IOException {
assert packedValue.length == packedBytesLength;
out.writeBytes(packedValue, 0, packedValue.length);
out.writeInt(docID);
if (singleValuePerDoc == false) {
if (longOrds) {
out.writeLong(ord);
@ -76,7 +77,6 @@ final class OfflinePointWriter implements PointWriter {
out.writeInt((int) ord);
}
}
out.writeInt(docID);
count++;
assert expectedCount == 0 || count <= expectedCount;
}

View File

@ -45,7 +45,7 @@ public class TestBKD extends LuceneTestCase {
public void testBasicInts1D() throws Exception {
try (Directory dir = getDirectory(100)) {
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
byte[] scratch = new byte[4];
for(int docID=0;docID<100;docID++) {
NumericUtils.intToSortableBytes(docID, scratch, 0);
@ -889,4 +889,42 @@ public class TestBKD extends LuceneTestCase {
}
fail("did not see a supporessed CorruptIndexException");
}
public void testTieBreakOrder() throws Exception {
try (Directory dir = newDirectory()) {
int numDocs = 10000;
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", 1, 4, 2, 0.01f, numDocs, true);
for(int i=0;i<numDocs;i++) {
w.add(new byte[Integer.BYTES], i);
}
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
long fp = w.finish(out);
out.close();
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in);
r.intersect(new IntersectVisitor() {
int lastDocID = -1;
@Override
public void visit(int docID) {
assertTrue("lastDocID=" + lastDocID + " docID=" + docID, docID > lastDocID);
lastDocID = docID;
}
@Override
public void visit(int docID, byte[] packedValue) {
visit(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
});
in.close();
}
}
}