simplify BKDWriter's comparator tie break for its temp files; add test case verifying tie break by docID

This commit is contained in:
Mike McCandless 2016-03-24 10:02:48 -04:00
parent d9de158676
commit 2ca08aa6ad
4 changed files with 49 additions and 23 deletions

View File

@ -716,32 +716,20 @@ public class BKDWriter implements Closeable {
@Override @Override
public int compare(BytesRef a, BytesRef b) { public int compare(BytesRef a, BytesRef b) {
// First compare by the requested dimension we are sorting by:
// First compare the bytes on the dimension we are sorting on:
int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + bytesPerDim*dim, b.bytes, b.offset + bytesPerDim*dim); int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + bytesPerDim*dim, b.bytes, b.offset + bytesPerDim*dim);
if (cmp != 0) { if (cmp != 0) {
return cmp; return cmp;
} }
// Tie-break by docID: // Tie-break by docID ... no need to tie break on ord, for the case where the same doc has
int offset; // the same value in a given dimension indexed more than once: it can't matter at search
if (singleValuePerDoc) { // time since we don't write ords into the index:
offset = 0;
} else if (longOrds) {
offset = Long.BYTES;
} else {
offset = Integer.BYTES;
}
reader.reset(a.bytes, a.offset + packedBytesLength + offset, a.length);
final int docIDA = reader.readInt();
reader.reset(b.bytes, b.offset + packedBytesLength + offset, b.length); return StringHelper.compare(Integer.BYTES,
final int docIDB = reader.readInt(); a.bytes, a.offset + packedBytesLength,
b.bytes, b.offset + packedBytesLength);
// No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
// can't matter at search time since we don't write ords into the index:
return Integer.compare(docIDA, docIDB);
} }
}; };

View File

@ -93,15 +93,15 @@ final class OfflinePointReader extends PointReader {
assert countLeft == -1; assert countLeft == -1;
return false; return false;
} }
docID = in.readInt();
if (singleValuePerDoc == false) { if (singleValuePerDoc == false) {
if (longOrds) { if (longOrds) {
ord = in.readLong(); ord = in.readLong();
} else { } else {
ord = in.readInt(); ord = in.readInt();
} }
docID = in.readInt();
} else { } else {
ord = docID = in.readInt(); ord = docID;
} }
return true; return true;
} }

View File

@ -68,6 +68,7 @@ final class OfflinePointWriter implements PointWriter {
public void append(byte[] packedValue, long ord, int docID) throws IOException { public void append(byte[] packedValue, long ord, int docID) throws IOException {
assert packedValue.length == packedBytesLength; assert packedValue.length == packedBytesLength;
out.writeBytes(packedValue, 0, packedValue.length); out.writeBytes(packedValue, 0, packedValue.length);
out.writeInt(docID);
if (singleValuePerDoc == false) { if (singleValuePerDoc == false) {
if (longOrds) { if (longOrds) {
out.writeLong(ord); out.writeLong(ord);
@ -76,7 +77,6 @@ final class OfflinePointWriter implements PointWriter {
out.writeInt((int) ord); out.writeInt((int) ord);
} }
} }
out.writeInt(docID);
count++; count++;
assert expectedCount == 0 || count <= expectedCount; assert expectedCount == 0 || count <= expectedCount;
} }

View File

@ -889,4 +889,42 @@ public class TestBKD extends LuceneTestCase {
} }
fail("did not see a supporessed CorruptIndexException"); fail("did not see a supporessed CorruptIndexException");
} }
public void testTieBreakOrder() throws Exception {
try (Directory dir = newDirectory()) {
int numDocs = 10000;
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", 1, 4, 2, 0.01f, numDocs, true);
for(int i=0;i<numDocs;i++) {
w.add(new byte[Integer.BYTES], i);
}
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
long fp = w.finish(out);
out.close();
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in);
r.intersect(new IntersectVisitor() {
int lastDocID = -1;
@Override
public void visit(int docID) {
assertTrue("lastDocID=" + lastDocID + " docID=" + docID, docID > lastDocID);
lastDocID = docID;
}
@Override
public void visit(int docID, byte[] packedValue) {
visit(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
});
in.close();
}
}
} }