LUCENE-7121: don't write ord for single-valued points, saving 4 bytes per point

This commit is contained in:
Mike McCandless 2016-03-20 09:16:43 -04:00
parent 3a4e1d1142
commit d392940092
12 changed files with 158 additions and 97 deletions

View File

@ -69,6 +69,8 @@ class SimpleTextPointsWriter extends PointsWriter {
@Override @Override
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException { public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
// We use the normal BKDWriter, but subclass to customize how it writes the index and blocks to disk: // We use the normal BKDWriter, but subclass to customize how it writes the index and blocks to disk:
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory, writeState.directory,
@ -77,7 +79,8 @@ class SimpleTextPointsWriter extends PointsWriter {
fieldInfo.getPointNumBytes(), fieldInfo.getPointNumBytes(),
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
values.size(fieldInfo.name)) { values.size(fieldInfo.name),
singleValuePerDoc) {
@Override @Override
protected void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException { protected void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {

View File

@ -42,16 +42,19 @@ public abstract class PointsWriter implements Closeable {
* a faster but more complex implementation. */ * a faster but more complex implementation. */
protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException { protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException {
long maxPointCount = 0; long maxPointCount = 0;
int docCount = 0;
for (int i=0;i<mergeState.pointsReaders.length;i++) { for (int i=0;i<mergeState.pointsReaders.length;i++) {
PointsReader pointsReader = mergeState.pointsReaders[i]; PointsReader pointsReader = mergeState.pointsReaders[i];
if (pointsReader != null) { if (pointsReader != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name); FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null) { if (readerFieldInfo != null) {
maxPointCount += pointsReader.size(fieldInfo.name); maxPointCount += pointsReader.size(fieldInfo.name);
docCount += pointsReader.getDocCount(fieldInfo.name);
} }
} }
} }
final long finalMaxPointCount = maxPointCount; final long finalMaxPointCount = maxPointCount;
final int finalDocCount = docCount;
writeField(fieldInfo, writeField(fieldInfo,
new PointsReader() { new PointsReader() {
@Override @Override
@ -141,7 +144,7 @@ public abstract class PointsWriter implements Closeable {
@Override @Override
public int getDocCount(String fieldName) { public int getDocCount(String fieldName) {
throw new UnsupportedOperationException(); return finalDocCount;
} }
}); });
} }

View File

@ -82,6 +82,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
@Override @Override
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException { public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory, writeState.directory,
writeState.segmentInfo.name, writeState.segmentInfo.name,
@ -89,7 +91,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
fieldInfo.getPointNumBytes(), fieldInfo.getPointNumBytes(),
maxPointsInLeafNode, maxPointsInLeafNode,
maxMBSortInHeap, maxMBSortInHeap,
values.size(fieldInfo.name))) { values.size(fieldInfo.name),
singleValuePerDoc)) {
values.intersect(fieldInfo.name, new IntersectVisitor() { values.intersect(fieldInfo.name, new IntersectVisitor() {
@Override @Override
@ -133,6 +136,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
if (fieldInfo.getPointDimensionCount() != 0) { if (fieldInfo.getPointDimensionCount() != 0) {
if (fieldInfo.getPointDimensionCount() == 1) { if (fieldInfo.getPointDimensionCount() == 1) {
boolean singleValuePerDoc = true;
// Worst case total maximum size (if none of the points are deleted): // Worst case total maximum size (if none of the points are deleted):
long totMaxSize = 0; long totMaxSize = 0;
for(int i=0;i<mergeState.pointsReaders.length;i++) { for(int i=0;i<mergeState.pointsReaders.length;i++) {
@ -142,6 +147,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name); FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null) { if (readerFieldInfo != null) {
totMaxSize += reader.size(fieldInfo.name); totMaxSize += reader.size(fieldInfo.name);
singleValuePerDoc &= reader.size(fieldInfo.name) == reader.getDocCount(fieldInfo.name);
} }
} }
} }
@ -157,7 +163,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
fieldInfo.getPointNumBytes(), fieldInfo.getPointNumBytes(),
maxPointsInLeafNode, maxPointsInLeafNode,
maxMBSortInHeap, maxMBSortInHeap,
totMaxSize)) { totMaxSize,
singleValuePerDoc)) {
List<BKDReader> bkdReaders = new ArrayList<>(); List<BKDReader> bkdReaders = new ArrayList<>();
List<MergeState.DocMap> docMaps = new ArrayList<>(); List<MergeState.DocMap> docMaps = new ArrayList<>();
List<Integer> docIDBases = new ArrayList<>(); List<Integer> docIDBases = new ArrayList<>();

View File

@ -32,6 +32,8 @@ class PointValuesWriter {
private final Counter iwBytesUsed; private final Counter iwBytesUsed;
private int[] docIDs; private int[] docIDs;
private int numPoints; private int numPoints;
private int numDocs;
private int lastDocID = -1;
private final byte[] packedValue; private final byte[] packedValue;
public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) { public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) {
@ -57,6 +59,10 @@ class PointValuesWriter {
} }
bytes.append(value); bytes.append(value);
docIDs[numPoints] = docID; docIDs[numPoints] = docID;
if (docID != lastDocID) {
numDocs++;
lastDocID = docID;
}
numPoints++; numPoints++;
} }
@ -116,7 +122,7 @@ class PointValuesWriter {
@Override @Override
public int getDocCount(String fieldName) { public int getDocCount(String fieldName) {
throw new UnsupportedOperationException(); return numDocs;
} }
}); });
} }

View File

@ -134,11 +134,16 @@ public class BKDWriter implements Closeable {
/** An upper bound on how many points the caller will add (includes deletions) */ /** An upper bound on how many points the caller will add (includes deletions) */
private final long totalPointCount; private final long totalPointCount;
public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, long totalPointCount) throws IOException { /** True if every document has at most one value. We specialize this case by not bothering to store the ord since it's redundant with docID. */
this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP, totalPointCount); private final boolean singleValuePerDoc;
private final int maxDoc;
public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, long totalPointCount, boolean singleValuePerDoc) throws IOException {
this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP, totalPointCount, singleValuePerDoc);
} }
public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount) throws IOException { public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount, boolean singleValuePerDoc) throws IOException {
verifyParams(numDims, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount); verifyParams(numDims, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount);
// We use tracking dir to deal with removing files on exception, so each place that // We use tracking dir to deal with removing files on exception, so each place that
// creates temp files doesn't need crazy try/finally/sucess logic: // creates temp files doesn't need crazy try/finally/sucess logic:
@ -148,6 +153,7 @@ public class BKDWriter implements Closeable {
this.numDims = numDims; this.numDims = numDims;
this.bytesPerDim = bytesPerDim; this.bytesPerDim = bytesPerDim;
this.totalPointCount = totalPointCount; this.totalPointCount = totalPointCount;
this.maxDoc = maxDoc;
docsSeen = new FixedBitSet(maxDoc); docsSeen = new FixedBitSet(maxDoc);
packedBytesLength = numDims * bytesPerDim; packedBytesLength = numDims * bytesPerDim;
@ -162,9 +168,14 @@ public class BKDWriter implements Closeable {
// If we may have more than 1+Integer.MAX_VALUE values, then we must encode ords with long (8 bytes), else we can use int (4 bytes). // If we may have more than 1+Integer.MAX_VALUE values, then we must encode ords with long (8 bytes), else we can use int (4 bytes).
longOrds = totalPointCount > Integer.MAX_VALUE; longOrds = totalPointCount > Integer.MAX_VALUE;
this.singleValuePerDoc = singleValuePerDoc;
// dimensional values (numDims * bytesPerDim) + ord (int or long) + docID (int) // dimensional values (numDims * bytesPerDim) + ord (int or long) + docID (int)
if (longOrds) { if (singleValuePerDoc) {
// Lucene only supports up to 2.1 docs, so we better not need longOrds in this case:
assert longOrds == false;
bytesPerDoc = packedBytesLength + Integer.BYTES;
} else if (longOrds) {
bytesPerDoc = packedBytesLength + Long.BYTES + Integer.BYTES; bytesPerDoc = packedBytesLength + Long.BYTES + Integer.BYTES;
} else { } else {
bytesPerDoc = packedBytesLength + Integer.BYTES + Integer.BYTES; bytesPerDoc = packedBytesLength + Integer.BYTES + Integer.BYTES;
@ -187,7 +198,7 @@ public class BKDWriter implements Closeable {
} }
// We write first maxPointsSortInHeap in heap, then cutover to offline for additional points: // We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength, longOrds); heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength, longOrds, singleValuePerDoc);
this.maxMBSortInHeap = maxMBSortInHeap; this.maxMBSortInHeap = maxMBSortInHeap;
} }
@ -216,7 +227,7 @@ public class BKDWriter implements Closeable {
private void spillToOffline() throws IOException { private void spillToOffline() throws IOException {
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree: // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill"); offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill", singleValuePerDoc);
tempInput = offlinePointWriter.out; tempInput = offlinePointWriter.out;
PointReader reader = heapPointWriter.getReader(0, pointCount); PointReader reader = heapPointWriter.getReader(0, pointCount);
for(int i=0;i<pointCount;i++) { for(int i=0;i<pointCount;i++) {
@ -622,6 +633,7 @@ public class BKDWriter implements Closeable {
writer.docIDs[i] = writer.docIDs[j]; writer.docIDs[i] = writer.docIDs[j];
writer.docIDs[j] = docID; writer.docIDs[j] = docID;
if (singleValuePerDoc == false) {
if (longOrds) { if (longOrds) {
long ord = writer.ordsLong[i]; long ord = writer.ordsLong[i];
writer.ordsLong[i] = writer.ordsLong[j]; writer.ordsLong[i] = writer.ordsLong[j];
@ -631,6 +643,7 @@ public class BKDWriter implements Closeable {
writer.ords[i] = writer.ords[j]; writer.ords[i] = writer.ords[j];
writer.ords[j] = ord; writer.ords[j] = ord;
} }
}
byte[] blockI = writer.blocks.get(i / writer.valuesPerBlock); byte[] blockI = writer.blocks.get(i / writer.valuesPerBlock);
int indexI = (i % writer.valuesPerBlock) * packedBytesLength; int indexI = (i % writer.valuesPerBlock) * packedBytesLength;
@ -681,7 +694,7 @@ public class BKDWriter implements Closeable {
sorted = heapPointWriter; sorted = heapPointWriter;
} else { } else {
// Subsequent dims need a private copy // Subsequent dims need a private copy
sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds); sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds, singleValuePerDoc);
sorted.copyFrom(heapPointWriter); sorted.copyFrom(heapPointWriter);
} }
@ -713,7 +726,9 @@ public class BKDWriter implements Closeable {
// Tie-break by docID: // Tie-break by docID:
int offset; int offset;
if (longOrds) { if (singleValuePerDoc) {
offset = 0;
} else if (longOrds) {
offset = Long.BYTES; offset = Long.BYTES;
} else { } else {
offset = Integer.BYTES; offset = Integer.BYTES;
@ -769,7 +784,7 @@ public class BKDWriter implements Closeable {
assert lastWriter[0] != null; assert lastWriter[0] != null;
return new OfflinePointWriter(tempDir, lastWriter[0], packedBytesLength, pointCount, longOrds); return new OfflinePointWriter(tempDir, lastWriter[0], packedBytesLength, pointCount, longOrds, singleValuePerDoc);
} }
} }
@ -800,7 +815,11 @@ public class BKDWriter implements Closeable {
LongBitSet ordBitSet; LongBitSet ordBitSet;
if (numDims > 1) { if (numDims > 1) {
if (singleValuePerDoc) {
ordBitSet = new LongBitSet(maxDoc);
} else {
ordBitSet = new LongBitSet(pointCount); ordBitSet = new LongBitSet(pointCount);
}
} else { } else {
ordBitSet = null; ordBitSet = null;
} }
@ -867,7 +886,11 @@ public class BKDWriter implements Closeable {
success = true; success = true;
} finally { } finally {
if (success == false) { if (success == false) {
if (tempInput != null) {
IOUtils.closeWhileHandlingException(tempInput);
}
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempDir.getCreatedFiles()); IOUtils.deleteFilesIgnoringExceptions(tempDir, tempDir.getCreatedFiles());
tempInput = null;
} }
} }
@ -1049,7 +1072,7 @@ public class BKDWriter implements Closeable {
private PathSlice switchToHeap(PathSlice source) throws IOException { private PathSlice switchToHeap(PathSlice source) throws IOException {
int count = Math.toIntExact(source.count); int count = Math.toIntExact(source.count);
try ( try (
PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds); PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds, singleValuePerDoc);
PointReader reader = source.writer.getReader(source.start, source.count); PointReader reader = source.writer.getReader(source.start, source.count);
) { ) {
for(int i=0;i<count;i++) { for(int i=0;i<count;i++) {
@ -1252,9 +1275,9 @@ public class BKDWriter implements Closeable {
PointWriter getPointWriter(long count, String desc) throws IOException { PointWriter getPointWriter(long count, String desc) throws IOException {
if (count <= maxPointsSortInHeap) { if (count <= maxPointsSortInHeap) {
int size = Math.toIntExact(count); int size = Math.toIntExact(count);
return new HeapPointWriter(size, size, packedBytesLength, longOrds); return new HeapPointWriter(size, size, packedBytesLength, longOrds, singleValuePerDoc);
} else { } else {
return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc); return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc, singleValuePerDoc);
} }
} }
} }

View File

@ -28,10 +28,12 @@ final class HeapPointReader extends PointReader {
final int[] docIDs; final int[] docIDs;
final int end; final int end;
final byte[] scratch; final byte[] scratch;
final boolean singleValuePerDoc;
HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end) { HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end, boolean singleValuePerDoc) {
this.blocks = blocks; this.blocks = blocks;
this.valuesPerBlock = valuesPerBlock; this.valuesPerBlock = valuesPerBlock;
this.singleValuePerDoc = singleValuePerDoc;
this.ords = ords; this.ords = ords;
this.ordsLong = ordsLong; this.ordsLong = ordsLong;
this.docIDs = docIDs; this.docIDs = docIDs;
@ -75,7 +77,9 @@ final class HeapPointReader extends PointReader {
@Override @Override
public long ord() { public long ord() {
if (ordsLong != null) { if (singleValuePerDoc) {
return docIDs[curRead];
} else if (ordsLong != null) {
return ordsLong[curRead]; return ordsLong[curRead];
} else { } else {
return ords[curRead]; return ords[curRead];

View File

@ -31,18 +31,25 @@ final class HeapPointWriter implements PointWriter {
final int maxSize; final int maxSize;
final int valuesPerBlock; final int valuesPerBlock;
final int packedBytesLength; final int packedBytesLength;
final boolean singleValuePerDoc;
// NOTE: can't use ByteBlockPool because we need random-write access when sorting in heap // NOTE: can't use ByteBlockPool because we need random-write access when sorting in heap
final List<byte[]> blocks = new ArrayList<>(); final List<byte[]> blocks = new ArrayList<>();
public HeapPointWriter(int initSize, int maxSize, int packedBytesLength, boolean longOrds) { public HeapPointWriter(int initSize, int maxSize, int packedBytesLength, boolean longOrds, boolean singleValuePerDoc) {
docIDs = new int[initSize]; docIDs = new int[initSize];
this.maxSize = maxSize; this.maxSize = maxSize;
this.packedBytesLength = packedBytesLength; this.packedBytesLength = packedBytesLength;
this.singleValuePerDoc = singleValuePerDoc;
if (singleValuePerDoc) {
this.ordsLong = null;
this.ords = null;
} else {
if (longOrds) { if (longOrds) {
this.ordsLong = new long[initSize]; this.ordsLong = new long[initSize];
} else { } else {
this.ords = new int[initSize]; this.ords = new int[initSize];
} }
}
// 4K per page, unless each value is > 4K: // 4K per page, unless each value is > 4K:
valuesPerBlock = Math.max(1, 4096/packedBytesLength); valuesPerBlock = Math.max(1, 4096/packedBytesLength);
} }
@ -52,6 +59,7 @@ final class HeapPointWriter implements PointWriter {
throw new IllegalStateException("docIDs.length=" + docIDs.length + " other.nextWrite=" + other.nextWrite); throw new IllegalStateException("docIDs.length=" + docIDs.length + " other.nextWrite=" + other.nextWrite);
} }
System.arraycopy(other.docIDs, 0, docIDs, 0, other.nextWrite); System.arraycopy(other.docIDs, 0, docIDs, 0, other.nextWrite);
if (singleValuePerDoc == false) {
if (other.ords != null) { if (other.ords != null) {
assert this.ords != null; assert this.ords != null;
System.arraycopy(other.ords, 0, ords, 0, other.nextWrite); System.arraycopy(other.ords, 0, ords, 0, other.nextWrite);
@ -59,6 +67,7 @@ final class HeapPointWriter implements PointWriter {
assert this.ordsLong != null; assert this.ordsLong != null;
System.arraycopy(other.ordsLong, 0, ordsLong, 0, other.nextWrite); System.arraycopy(other.ordsLong, 0, ordsLong, 0, other.nextWrite);
} }
}
for(byte[] block : other.blocks) { for(byte[] block : other.blocks) {
blocks.add(block.clone()); blocks.add(block.clone());
@ -117,19 +126,23 @@ final class HeapPointWriter implements PointWriter {
int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, Integer.BYTES)); int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, Integer.BYTES));
assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite; assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
docIDs = growExact(docIDs, nextSize); docIDs = growExact(docIDs, nextSize);
if (singleValuePerDoc == false) {
if (ordsLong != null) { if (ordsLong != null) {
ordsLong = growExact(ordsLong, nextSize); ordsLong = growExact(ordsLong, nextSize);
} else { } else {
ords = growExact(ords, nextSize); ords = growExact(ords, nextSize);
} }
} }
}
writePackedValue(nextWrite, packedValue); writePackedValue(nextWrite, packedValue);
if (singleValuePerDoc == false) {
if (ordsLong != null) { if (ordsLong != null) {
ordsLong[nextWrite] = ord; ordsLong[nextWrite] = ord;
} else { } else {
assert ord <= Integer.MAX_VALUE; assert ord <= Integer.MAX_VALUE;
ords[nextWrite] = (int) ord; ords[nextWrite] = (int) ord;
} }
}
docIDs[nextWrite] = docID; docIDs[nextWrite] = docID;
nextWrite++; nextWrite++;
} }
@ -137,7 +150,7 @@ final class HeapPointWriter implements PointWriter {
@Override @Override
public PointReader getReader(long start, long length) { public PointReader getReader(long start, long length) {
assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length; assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length;
return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, nextWrite); return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, nextWrite, singleValuePerDoc);
} }
@Override @Override

View File

@ -32,19 +32,24 @@ final class OfflinePointReader extends PointReader {
long countLeft; long countLeft;
final IndexInput in; final IndexInput in;
private final byte[] packedValue; private final byte[] packedValue;
final boolean singleValuePerDoc;
private long ord; private long ord;
private int docID; private int docID;
// true if ords are written as long (8 bytes), else 4 bytes // true if ords are written as long (8 bytes), else 4 bytes
private boolean longOrds; private boolean longOrds;
private boolean checked; private boolean checked;
OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length, boolean longOrds) throws IOException { OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length,
boolean longOrds, boolean singleValuePerDoc) throws IOException {
this.singleValuePerDoc = singleValuePerDoc;
int bytesPerDoc = packedBytesLength + Integer.BYTES; int bytesPerDoc = packedBytesLength + Integer.BYTES;
if (singleValuePerDoc == false) {
if (longOrds) { if (longOrds) {
bytesPerDoc += Long.BYTES; bytesPerDoc += Long.BYTES;
} else { } else {
bytesPerDoc += Integer.BYTES; bytesPerDoc += Integer.BYTES;
} }
}
if ((start + length) * bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) { if ((start + length) * bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) {
throw new IllegalArgumentException("requested slice is beyond the length of this file: start=" + start + " length=" + length + " bytesPerDoc=" + bytesPerDoc + " fileLength=" + tempDir.fileLength(tempFileName) + " tempFileName=" + tempFileName); throw new IllegalArgumentException("requested slice is beyond the length of this file: start=" + start + " length=" + length + " bytesPerDoc=" + bytesPerDoc + " fileLength=" + tempDir.fileLength(tempFileName) + " tempFileName=" + tempFileName);
@ -84,12 +89,16 @@ final class OfflinePointReader extends PointReader {
assert countLeft == -1; assert countLeft == -1;
return false; return false;
} }
if (singleValuePerDoc == false) {
if (longOrds) { if (longOrds) {
ord = in.readLong(); ord = in.readLong();
} else { } else {
ord = in.readInt(); ord = in.readInt();
} }
docID = in.readInt(); docID = in.readInt();
} else {
ord = docID = in.readInt();
}
return true; return true;
} }
@ -135,11 +144,13 @@ final class OfflinePointReader extends PointReader {
int packedBytesLength = packedValue.length; int packedBytesLength = packedValue.length;
int bytesPerDoc = packedBytesLength + Integer.BYTES; int bytesPerDoc = packedBytesLength + Integer.BYTES;
if (singleValuePerDoc == false) {
if (longOrds) { if (longOrds) {
bytesPerDoc += Long.BYTES; bytesPerDoc += Long.BYTES;
} else { } else {
bytesPerDoc += Integer.BYTES; bytesPerDoc += Integer.BYTES;
} }
}
long rightCount = 0; long rightCount = 0;
@ -159,8 +170,10 @@ final class OfflinePointReader extends PointReader {
if (longOrds) { if (longOrds) {
ord = readLong(buffer, packedBytesLength); ord = readLong(buffer, packedBytesLength);
} else { } else {
// This is either ord (multi-valued case) or docID (which we use as ord in the single valued case):
ord = readInt(buffer, packedBytesLength); ord = readInt(buffer, packedBytesLength);
} }
if (rightTree.get(ord)) { if (rightTree.get(ord)) {
rightOut.writeBytes(buffer, 0, bytesPerDoc); rightOut.writeBytes(buffer, 0, bytesPerDoc);
if (doClearBits) { if (doClearBits) {

View File

@ -19,7 +19,6 @@ package org.apache.lucene.util.bkd;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
@ -30,38 +29,43 @@ final class OfflinePointWriter implements PointWriter {
final Directory tempDir; final Directory tempDir;
final IndexOutput out; final IndexOutput out;
final int packedBytesLength; final int packedBytesLength;
final boolean singleValuePerDoc;
long count; long count;
private boolean closed; private boolean closed;
// true if ords are written as long (8 bytes), else 4 bytes // true if ords are written as long (8 bytes), else 4 bytes
private boolean longOrds; private boolean longOrds;
public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength, boolean longOrds, String desc) throws IOException { public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength, boolean longOrds, String desc, boolean singleValuePerDoc) throws IOException {
this.out = tempDir.createTempOutput(tempFileNamePrefix, "bkd_" + desc, IOContext.DEFAULT); this.out = tempDir.createTempOutput(tempFileNamePrefix, "bkd_" + desc, IOContext.DEFAULT);
this.tempDir = tempDir; this.tempDir = tempDir;
this.packedBytesLength = packedBytesLength; this.packedBytesLength = packedBytesLength;
this.longOrds = longOrds; this.longOrds = longOrds;
this.singleValuePerDoc = singleValuePerDoc;
} }
/** Initializes on an already written/closed file, just so consumers can use {@link #getReader} to read the file. */ /** Initializes on an already written/closed file, just so consumers can use {@link #getReader} to read the file. */
public OfflinePointWriter(Directory tempDir, IndexOutput out, int packedBytesLength, long count, boolean longOrds) { public OfflinePointWriter(Directory tempDir, IndexOutput out, int packedBytesLength, long count, boolean longOrds, boolean singleValuePerDoc) {
this.out = out; this.out = out;
this.tempDir = tempDir; this.tempDir = tempDir;
this.packedBytesLength = packedBytesLength; this.packedBytesLength = packedBytesLength;
this.count = count; this.count = count;
closed = true; closed = true;
this.longOrds = longOrds; this.longOrds = longOrds;
this.singleValuePerDoc = singleValuePerDoc;
} }
@Override @Override
public void append(byte[] packedValue, long ord, int docID) throws IOException { public void append(byte[] packedValue, long ord, int docID) throws IOException {
assert packedValue.length == packedBytesLength; assert packedValue.length == packedBytesLength;
out.writeBytes(packedValue, 0, packedValue.length); out.writeBytes(packedValue, 0, packedValue.length);
if (singleValuePerDoc == false) {
if (longOrds) { if (longOrds) {
out.writeLong(ord); out.writeLong(ord);
} else { } else {
assert ord <= Integer.MAX_VALUE; assert ord <= Integer.MAX_VALUE;
out.writeInt((int) ord); out.writeInt((int) ord);
} }
}
out.writeInt(docID); out.writeInt(docID);
count++; count++;
} }
@ -70,7 +74,7 @@ final class OfflinePointWriter implements PointWriter {
public PointReader getReader(long start, long length) throws IOException { public PointReader getReader(long start, long length) throws IOException {
assert closed; assert closed;
assert start + length <= count: "start=" + start + " length=" + length + " count=" + count; assert start + length <= count: "start=" + start + " length=" + length + " count=" + count;
return new OfflinePointReader(tempDir, out.getName(), packedBytesLength, start, length, longOrds); return new OfflinePointReader(tempDir, out.getName(), packedBytesLength, start, length, longOrds, singleValuePerDoc);
} }
@Override @Override

View File

@ -16,29 +16,14 @@
*/ */
package org.apache.lucene.util.bkd; package org.apache.lucene.util.bkd;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.LuceneTestCase.Monster; import org.apache.lucene.util.LuceneTestCase.Monster;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TimeUnits; import org.apache.lucene.util.TimeUnits;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@ -55,7 +40,7 @@ public class Test2BBKDPoints extends LuceneTestCase {
final int numDocs = (Integer.MAX_VALUE / 26) + 100; final int numDocs = (Integer.MAX_VALUE / 26) + 100;
BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, Long.BYTES, 26L * numDocs); BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, Long.BYTES, 26L * numDocs, false);
int counter = 0; int counter = 0;
byte[] packedBytes = new byte[Long.BYTES]; byte[] packedBytes = new byte[Long.BYTES];
for (int docID = 0; docID < numDocs; docID++) { for (int docID = 0; docID < numDocs; docID++) {
@ -88,7 +73,7 @@ public class Test2BBKDPoints extends LuceneTestCase {
final int numDocs = (Integer.MAX_VALUE / 26) + 100; final int numDocs = (Integer.MAX_VALUE / 26) + 100;
BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, Long.BYTES, 26L * numDocs); BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, Long.BYTES, 26L * numDocs, false);
int counter = 0; int counter = 0;
byte[] packedBytes = new byte[2*Long.BYTES]; byte[] packedBytes = new byte[2*Long.BYTES];
for (int docID = 0; docID < numDocs; docID++) { for (int docID = 0; docID < numDocs; docID++) {

View File

@ -43,8 +43,8 @@ import org.apache.lucene.util.TestUtil;
public class TestBKD extends LuceneTestCase { public class TestBKD extends LuceneTestCase {
private long randomPointCount() { private long randomPointCount(boolean singleValuePerDoc) {
if (random().nextBoolean()) { if (singleValuePerDoc || random().nextBoolean()) {
return random().nextInt(Integer.MAX_VALUE); return random().nextInt(Integer.MAX_VALUE);
} else { } else {
return random().nextLong() & Long.MAX_VALUE; return random().nextLong() & Long.MAX_VALUE;
@ -53,7 +53,7 @@ public class TestBKD extends LuceneTestCase {
public void testBasicInts1D() throws Exception { public void testBasicInts1D() throws Exception {
try (Directory dir = getDirectory(100)) { try (Directory dir = getDirectory(100)) {
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, randomPointCount()); BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, randomPointCount(true), true);
byte[] scratch = new byte[4]; byte[] scratch = new byte[4];
for(int docID=0;docID<100;docID++) { for(int docID=0;docID<100;docID++) {
NumericUtils.intToSortableBytes(docID, scratch, 0); NumericUtils.intToSortableBytes(docID, scratch, 0);
@ -128,7 +128,7 @@ public class TestBKD extends LuceneTestCase {
int numDims = TestUtil.nextInt(random(), 1, 5); int numDims = TestUtil.nextInt(random(), 1, 5);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100); int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
float maxMB = (float) 3.0 + (3*random().nextFloat()); float maxMB = (float) 3.0 + (3*random().nextFloat());
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB, randomPointCount()); BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB, randomPointCount(true), true);
if (VERBOSE) { if (VERBOSE) {
System.out.println("TEST: numDims=" + numDims + " numDocs=" + numDocs); System.out.println("TEST: numDims=" + numDims + " numDocs=" + numDocs);
@ -269,7 +269,7 @@ public class TestBKD extends LuceneTestCase {
int numDims = TestUtil.nextInt(random(), 1, 5); int numDims = TestUtil.nextInt(random(), 1, 5);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100); int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
float maxMB = (float) 3.0 + (3*random().nextFloat()); float maxMB = (float) 3.0 + (3*random().nextFloat());
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount()); BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(true), true);
BigInteger[][] docs = new BigInteger[numDocs][]; BigInteger[][] docs = new BigInteger[numDocs][];
byte[] scratch = new byte[numBytesPerDim*numDims]; byte[] scratch = new byte[numBytesPerDim*numDims];
@ -442,7 +442,7 @@ public class TestBKD extends LuceneTestCase {
public void testTooLittleHeap() throws Exception { public void testTooLittleHeap() throws Exception {
try (Directory dir = getDirectory(0)) { try (Directory dir = getDirectory(0)) {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
new BKDWriter(1, dir, "bkd", 1, 16, 1000000, 0.001, randomPointCount()); new BKDWriter(1, dir, "bkd", 1, 16, 1000000, 0.001, randomPointCount(true), true);
}); });
assertTrue(expected.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode")); assertTrue(expected.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
} }
@ -565,7 +565,7 @@ public class TestBKD extends LuceneTestCase {
List<Integer> docIDBases = null; List<Integer> docIDBases = null;
int seg = 0; int seg = 0;
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount()); BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(false), false);
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT); IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
IndexInput in = null; IndexInput in = null;
@ -619,7 +619,7 @@ public class TestBKD extends LuceneTestCase {
seg++; seg++;
maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000); maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000);
maxMB = (float) 3.0 + (3*random().nextDouble()); maxMB = (float) 3.0 + (3*random().nextDouble());
w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount()); w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(false), false);
lastDocIDBase = docID; lastDocIDBase = docID;
} }
} }
@ -634,7 +634,7 @@ public class TestBKD extends LuceneTestCase {
out.close(); out.close();
in = dir.openInput("bkd", IOContext.DEFAULT); in = dir.openInput("bkd", IOContext.DEFAULT);
seg++; seg++;
w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount()); w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(false), false);
List<BKDReader> readers = new ArrayList<>(); List<BKDReader> readers = new ArrayList<>();
for(long fp : toMerge) { for(long fp : toMerge) {
in.seek(fp); in.seek(fp);

View File

@ -229,7 +229,7 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
throw ise; throw ise;
} }
} catch (AssertionError ae) { } catch (AssertionError ae) {
if (ae.getMessage().contains("does not exist; files=")) { if (ae.getMessage() != null && ae.getMessage().contains("does not exist; files=")) {
// OK: likely we threw the random IOExc when IW was asserting the commit files exist // OK: likely we threw the random IOExc when IW was asserting the commit files exist
done = true; done = true;
} else { } else {