mirror of https://github.com/apache/lucene.git
optimize BKDWriter's offline comparator a bit
This commit is contained in:
parent
0f4f53a8f5
commit
251cdbcee3
|
@ -377,6 +377,7 @@ public abstract class StringHelper {
|
||||||
* big-endian unsigned values. Returns positive int if a > b,
|
* big-endian unsigned values. Returns positive int if a > b,
|
||||||
* negative int if a < b and 0 if a == b */
|
* negative int if a < b and 0 if a == b */
|
||||||
public static int compare(int count, byte[] a, int aOffset, byte[] b, int bOffset) {
|
public static int compare(int count, byte[] a, int aOffset, byte[] b, int bOffset) {
|
||||||
|
// TODO: dedup this w/ BytesRef.compareTo?
|
||||||
for(int i=0;i<count;i++) {
|
for(int i=0;i<count;i++) {
|
||||||
int cmp = (a[aOffset+i]&0xff) - (b[bOffset+i]&0xff);
|
int cmp = (a[aOffset+i]&0xff) - (b[bOffset+i]&0xff);
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
|
|
|
@ -665,34 +665,35 @@ public class BKDWriter implements Closeable {
|
||||||
// Offline sort:
|
// Offline sort:
|
||||||
assert tempInput != null;
|
assert tempInput != null;
|
||||||
|
|
||||||
final ByteArrayDataInput reader = new ByteArrayDataInput();
|
|
||||||
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
|
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
|
||||||
private final ByteArrayDataInput readerB = new ByteArrayDataInput();
|
|
||||||
|
final ByteArrayDataInput reader = new ByteArrayDataInput();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(BytesRef a, BytesRef b) {
|
public int compare(BytesRef a, BytesRef b) {
|
||||||
reader.reset(a.bytes, a.offset, a.length);
|
|
||||||
reader.readBytes(scratch1, 0, scratch1.length);
|
|
||||||
final int docIDA = reader.readVInt();
|
|
||||||
final long ordA = reader.readVLong();
|
|
||||||
|
|
||||||
reader.reset(b.bytes, b.offset, b.length);
|
// First compare the bytes on the dimension we are sorting on:
|
||||||
reader.readBytes(scratch2, 0, scratch2.length);
|
int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + bytesPerDim*dim, b.bytes, b.offset + bytesPerDim*dim);
|
||||||
final int docIDB = reader.readVInt();
|
|
||||||
final long ordB = reader.readVLong();
|
|
||||||
|
|
||||||
int cmp = StringHelper.compare(bytesPerDim, scratch1, bytesPerDim*dim, scratch2, bytesPerDim*dim);
|
|
||||||
|
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tie-break
|
// Tie-break by docID and then ord:
|
||||||
|
reader.reset(a.bytes, a.offset + packedBytesLength, a.length);
|
||||||
|
final int docIDA = reader.readVInt();
|
||||||
|
final long ordA = reader.readVLong();
|
||||||
|
|
||||||
|
reader.reset(b.bytes, b.offset + packedBytesLength, b.length);
|
||||||
|
final int docIDB = reader.readVInt();
|
||||||
|
final long ordB = reader.readVLong();
|
||||||
|
|
||||||
cmp = Integer.compare(docIDA, docIDB);
|
cmp = Integer.compare(docIDA, docIDB);
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: is this really necessary? If OfflineSorter is stable, we can safely return 0 here, and avoid writing ords?
|
||||||
return Long.compare(ordA, ordB);
|
return Long.compare(ordA, ordB);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue