mirror of https://github.com/apache/lucene.git
LUCENE-7121: don't write ord for single-valued points, saving 4 bytes per point
This commit is contained in:
parent
3a4e1d1142
commit
d392940092
|
@ -69,6 +69,8 @@ class SimpleTextPointsWriter extends PointsWriter {
|
|||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
|
||||
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
|
||||
|
||||
// We use the normal BKDWriter, but subclass to customize how it writes the index and blocks to disk:
|
||||
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
|
||||
writeState.directory,
|
||||
|
@ -77,7 +79,8 @@ class SimpleTextPointsWriter extends PointsWriter {
|
|||
fieldInfo.getPointNumBytes(),
|
||||
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
|
||||
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
|
||||
values.size(fieldInfo.name)) {
|
||||
values.size(fieldInfo.name),
|
||||
singleValuePerDoc) {
|
||||
|
||||
@Override
|
||||
protected void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
|
||||
|
|
|
@ -42,16 +42,19 @@ public abstract class PointsWriter implements Closeable {
|
|||
* a faster but more complex implementation. */
|
||||
protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException {
|
||||
long maxPointCount = 0;
|
||||
int docCount = 0;
|
||||
for (int i=0;i<mergeState.pointsReaders.length;i++) {
|
||||
PointsReader pointsReader = mergeState.pointsReaders[i];
|
||||
if (pointsReader != null) {
|
||||
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
|
||||
if (readerFieldInfo != null) {
|
||||
maxPointCount += pointsReader.size(fieldInfo.name);
|
||||
docCount += pointsReader.getDocCount(fieldInfo.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
final long finalMaxPointCount = maxPointCount;
|
||||
final int finalDocCount = docCount;
|
||||
writeField(fieldInfo,
|
||||
new PointsReader() {
|
||||
@Override
|
||||
|
@ -141,7 +144,7 @@ public abstract class PointsWriter implements Closeable {
|
|||
|
||||
@Override
|
||||
public int getDocCount(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
return finalDocCount;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -82,6 +82,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
|
||||
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
|
||||
|
||||
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
|
||||
writeState.directory,
|
||||
writeState.segmentInfo.name,
|
||||
|
@ -89,7 +91,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
fieldInfo.getPointNumBytes(),
|
||||
maxPointsInLeafNode,
|
||||
maxMBSortInHeap,
|
||||
values.size(fieldInfo.name))) {
|
||||
values.size(fieldInfo.name),
|
||||
singleValuePerDoc)) {
|
||||
|
||||
values.intersect(fieldInfo.name, new IntersectVisitor() {
|
||||
@Override
|
||||
|
@ -133,6 +136,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
if (fieldInfo.getPointDimensionCount() != 0) {
|
||||
if (fieldInfo.getPointDimensionCount() == 1) {
|
||||
|
||||
boolean singleValuePerDoc = true;
|
||||
|
||||
// Worst case total maximum size (if none of the points are deleted):
|
||||
long totMaxSize = 0;
|
||||
for(int i=0;i<mergeState.pointsReaders.length;i++) {
|
||||
|
@ -142,6 +147,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
|
||||
if (readerFieldInfo != null) {
|
||||
totMaxSize += reader.size(fieldInfo.name);
|
||||
singleValuePerDoc &= reader.size(fieldInfo.name) == reader.getDocCount(fieldInfo.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -157,7 +163,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
fieldInfo.getPointNumBytes(),
|
||||
maxPointsInLeafNode,
|
||||
maxMBSortInHeap,
|
||||
totMaxSize)) {
|
||||
totMaxSize,
|
||||
singleValuePerDoc)) {
|
||||
List<BKDReader> bkdReaders = new ArrayList<>();
|
||||
List<MergeState.DocMap> docMaps = new ArrayList<>();
|
||||
List<Integer> docIDBases = new ArrayList<>();
|
||||
|
|
|
@ -32,6 +32,8 @@ class PointValuesWriter {
|
|||
private final Counter iwBytesUsed;
|
||||
private int[] docIDs;
|
||||
private int numPoints;
|
||||
private int numDocs;
|
||||
private int lastDocID = -1;
|
||||
private final byte[] packedValue;
|
||||
|
||||
public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) {
|
||||
|
@ -57,6 +59,10 @@ class PointValuesWriter {
|
|||
}
|
||||
bytes.append(value);
|
||||
docIDs[numPoints] = docID;
|
||||
if (docID != lastDocID) {
|
||||
numDocs++;
|
||||
lastDocID = docID;
|
||||
}
|
||||
numPoints++;
|
||||
}
|
||||
|
||||
|
@ -116,7 +122,7 @@ class PointValuesWriter {
|
|||
|
||||
@Override
|
||||
public int getDocCount(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
return numDocs;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -134,11 +134,16 @@ public class BKDWriter implements Closeable {
|
|||
/** An upper bound on how many points the caller will add (includes deletions) */
|
||||
private final long totalPointCount;
|
||||
|
||||
public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, long totalPointCount) throws IOException {
|
||||
this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP, totalPointCount);
|
||||
/** True if every document has at most one value. We specialize this case by not bothering to store the ord since it's redundant with docID. */
|
||||
private final boolean singleValuePerDoc;
|
||||
|
||||
private final int maxDoc;
|
||||
|
||||
public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, long totalPointCount, boolean singleValuePerDoc) throws IOException {
|
||||
this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP, totalPointCount, singleValuePerDoc);
|
||||
}
|
||||
|
||||
public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount) throws IOException {
|
||||
public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount, boolean singleValuePerDoc) throws IOException {
|
||||
verifyParams(numDims, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount);
|
||||
// We use tracking dir to deal with removing files on exception, so each place that
|
||||
// creates temp files doesn't need crazy try/finally/sucess logic:
|
||||
|
@ -148,6 +153,7 @@ public class BKDWriter implements Closeable {
|
|||
this.numDims = numDims;
|
||||
this.bytesPerDim = bytesPerDim;
|
||||
this.totalPointCount = totalPointCount;
|
||||
this.maxDoc = maxDoc;
|
||||
docsSeen = new FixedBitSet(maxDoc);
|
||||
packedBytesLength = numDims * bytesPerDim;
|
||||
|
||||
|
@ -162,9 +168,14 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
// If we may have more than 1+Integer.MAX_VALUE values, then we must encode ords with long (8 bytes), else we can use int (4 bytes).
|
||||
longOrds = totalPointCount > Integer.MAX_VALUE;
|
||||
this.singleValuePerDoc = singleValuePerDoc;
|
||||
|
||||
// dimensional values (numDims * bytesPerDim) + ord (int or long) + docID (int)
|
||||
if (longOrds) {
|
||||
if (singleValuePerDoc) {
|
||||
// Lucene only supports up to 2.1 docs, so we better not need longOrds in this case:
|
||||
assert longOrds == false;
|
||||
bytesPerDoc = packedBytesLength + Integer.BYTES;
|
||||
} else if (longOrds) {
|
||||
bytesPerDoc = packedBytesLength + Long.BYTES + Integer.BYTES;
|
||||
} else {
|
||||
bytesPerDoc = packedBytesLength + Integer.BYTES + Integer.BYTES;
|
||||
|
@ -187,7 +198,7 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
// We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
|
||||
heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength, longOrds);
|
||||
heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength, longOrds, singleValuePerDoc);
|
||||
|
||||
this.maxMBSortInHeap = maxMBSortInHeap;
|
||||
}
|
||||
|
@ -216,7 +227,7 @@ public class BKDWriter implements Closeable {
|
|||
private void spillToOffline() throws IOException {
|
||||
|
||||
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
|
||||
offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill");
|
||||
offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill", singleValuePerDoc);
|
||||
tempInput = offlinePointWriter.out;
|
||||
PointReader reader = heapPointWriter.getReader(0, pointCount);
|
||||
for(int i=0;i<pointCount;i++) {
|
||||
|
@ -622,14 +633,16 @@ public class BKDWriter implements Closeable {
|
|||
writer.docIDs[i] = writer.docIDs[j];
|
||||
writer.docIDs[j] = docID;
|
||||
|
||||
if (longOrds) {
|
||||
long ord = writer.ordsLong[i];
|
||||
writer.ordsLong[i] = writer.ordsLong[j];
|
||||
writer.ordsLong[j] = ord;
|
||||
} else {
|
||||
int ord = writer.ords[i];
|
||||
writer.ords[i] = writer.ords[j];
|
||||
writer.ords[j] = ord;
|
||||
if (singleValuePerDoc == false) {
|
||||
if (longOrds) {
|
||||
long ord = writer.ordsLong[i];
|
||||
writer.ordsLong[i] = writer.ordsLong[j];
|
||||
writer.ordsLong[j] = ord;
|
||||
} else {
|
||||
int ord = writer.ords[i];
|
||||
writer.ords[i] = writer.ords[j];
|
||||
writer.ords[j] = ord;
|
||||
}
|
||||
}
|
||||
|
||||
byte[] blockI = writer.blocks.get(i / writer.valuesPerBlock);
|
||||
|
@ -681,7 +694,7 @@ public class BKDWriter implements Closeable {
|
|||
sorted = heapPointWriter;
|
||||
} else {
|
||||
// Subsequent dims need a private copy
|
||||
sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds);
|
||||
sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds, singleValuePerDoc);
|
||||
sorted.copyFrom(heapPointWriter);
|
||||
}
|
||||
|
||||
|
@ -713,7 +726,9 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
// Tie-break by docID:
|
||||
int offset;
|
||||
if (longOrds) {
|
||||
if (singleValuePerDoc) {
|
||||
offset = 0;
|
||||
} else if (longOrds) {
|
||||
offset = Long.BYTES;
|
||||
} else {
|
||||
offset = Integer.BYTES;
|
||||
|
@ -769,7 +784,7 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
assert lastWriter[0] != null;
|
||||
|
||||
return new OfflinePointWriter(tempDir, lastWriter[0], packedBytesLength, pointCount, longOrds);
|
||||
return new OfflinePointWriter(tempDir, lastWriter[0], packedBytesLength, pointCount, longOrds, singleValuePerDoc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -800,7 +815,11 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
LongBitSet ordBitSet;
|
||||
if (numDims > 1) {
|
||||
ordBitSet = new LongBitSet(pointCount);
|
||||
if (singleValuePerDoc) {
|
||||
ordBitSet = new LongBitSet(maxDoc);
|
||||
} else {
|
||||
ordBitSet = new LongBitSet(pointCount);
|
||||
}
|
||||
} else {
|
||||
ordBitSet = null;
|
||||
}
|
||||
|
@ -867,7 +886,11 @@ public class BKDWriter implements Closeable {
|
|||
success = true;
|
||||
} finally {
|
||||
if (success == false) {
|
||||
if (tempInput != null) {
|
||||
IOUtils.closeWhileHandlingException(tempInput);
|
||||
}
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempDir.getCreatedFiles());
|
||||
tempInput = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1049,7 +1072,7 @@ public class BKDWriter implements Closeable {
|
|||
private PathSlice switchToHeap(PathSlice source) throws IOException {
|
||||
int count = Math.toIntExact(source.count);
|
||||
try (
|
||||
PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds);
|
||||
PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds, singleValuePerDoc);
|
||||
PointReader reader = source.writer.getReader(source.start, source.count);
|
||||
) {
|
||||
for(int i=0;i<count;i++) {
|
||||
|
@ -1252,9 +1275,9 @@ public class BKDWriter implements Closeable {
|
|||
PointWriter getPointWriter(long count, String desc) throws IOException {
|
||||
if (count <= maxPointsSortInHeap) {
|
||||
int size = Math.toIntExact(count);
|
||||
return new HeapPointWriter(size, size, packedBytesLength, longOrds);
|
||||
return new HeapPointWriter(size, size, packedBytesLength, longOrds, singleValuePerDoc);
|
||||
} else {
|
||||
return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc);
|
||||
return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc, singleValuePerDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,10 +28,12 @@ final class HeapPointReader extends PointReader {
|
|||
final int[] docIDs;
|
||||
final int end;
|
||||
final byte[] scratch;
|
||||
final boolean singleValuePerDoc;
|
||||
|
||||
HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end) {
|
||||
HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end, boolean singleValuePerDoc) {
|
||||
this.blocks = blocks;
|
||||
this.valuesPerBlock = valuesPerBlock;
|
||||
this.singleValuePerDoc = singleValuePerDoc;
|
||||
this.ords = ords;
|
||||
this.ordsLong = ordsLong;
|
||||
this.docIDs = docIDs;
|
||||
|
@ -75,7 +77,9 @@ final class HeapPointReader extends PointReader {
|
|||
|
||||
@Override
|
||||
public long ord() {
|
||||
if (ordsLong != null) {
|
||||
if (singleValuePerDoc) {
|
||||
return docIDs[curRead];
|
||||
} else if (ordsLong != null) {
|
||||
return ordsLong[curRead];
|
||||
} else {
|
||||
return ords[curRead];
|
||||
|
|
|
@ -31,17 +31,24 @@ final class HeapPointWriter implements PointWriter {
|
|||
final int maxSize;
|
||||
final int valuesPerBlock;
|
||||
final int packedBytesLength;
|
||||
final boolean singleValuePerDoc;
|
||||
// NOTE: can't use ByteBlockPool because we need random-write access when sorting in heap
|
||||
final List<byte[]> blocks = new ArrayList<>();
|
||||
|
||||
public HeapPointWriter(int initSize, int maxSize, int packedBytesLength, boolean longOrds) {
|
||||
public HeapPointWriter(int initSize, int maxSize, int packedBytesLength, boolean longOrds, boolean singleValuePerDoc) {
|
||||
docIDs = new int[initSize];
|
||||
this.maxSize = maxSize;
|
||||
this.packedBytesLength = packedBytesLength;
|
||||
if (longOrds) {
|
||||
this.ordsLong = new long[initSize];
|
||||
this.singleValuePerDoc = singleValuePerDoc;
|
||||
if (singleValuePerDoc) {
|
||||
this.ordsLong = null;
|
||||
this.ords = null;
|
||||
} else {
|
||||
this.ords = new int[initSize];
|
||||
if (longOrds) {
|
||||
this.ordsLong = new long[initSize];
|
||||
} else {
|
||||
this.ords = new int[initSize];
|
||||
}
|
||||
}
|
||||
// 4K per page, unless each value is > 4K:
|
||||
valuesPerBlock = Math.max(1, 4096/packedBytesLength);
|
||||
|
@ -52,12 +59,14 @@ final class HeapPointWriter implements PointWriter {
|
|||
throw new IllegalStateException("docIDs.length=" + docIDs.length + " other.nextWrite=" + other.nextWrite);
|
||||
}
|
||||
System.arraycopy(other.docIDs, 0, docIDs, 0, other.nextWrite);
|
||||
if (other.ords != null) {
|
||||
assert this.ords != null;
|
||||
System.arraycopy(other.ords, 0, ords, 0, other.nextWrite);
|
||||
} else {
|
||||
assert this.ordsLong != null;
|
||||
System.arraycopy(other.ordsLong, 0, ordsLong, 0, other.nextWrite);
|
||||
if (singleValuePerDoc == false) {
|
||||
if (other.ords != null) {
|
||||
assert this.ords != null;
|
||||
System.arraycopy(other.ords, 0, ords, 0, other.nextWrite);
|
||||
} else {
|
||||
assert this.ordsLong != null;
|
||||
System.arraycopy(other.ordsLong, 0, ordsLong, 0, other.nextWrite);
|
||||
}
|
||||
}
|
||||
|
||||
for(byte[] block : other.blocks) {
|
||||
|
@ -117,18 +126,22 @@ final class HeapPointWriter implements PointWriter {
|
|||
int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, Integer.BYTES));
|
||||
assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
|
||||
docIDs = growExact(docIDs, nextSize);
|
||||
if (ordsLong != null) {
|
||||
ordsLong = growExact(ordsLong, nextSize);
|
||||
} else {
|
||||
ords = growExact(ords, nextSize);
|
||||
if (singleValuePerDoc == false) {
|
||||
if (ordsLong != null) {
|
||||
ordsLong = growExact(ordsLong, nextSize);
|
||||
} else {
|
||||
ords = growExact(ords, nextSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
writePackedValue(nextWrite, packedValue);
|
||||
if (ordsLong != null) {
|
||||
ordsLong[nextWrite] = ord;
|
||||
} else {
|
||||
assert ord <= Integer.MAX_VALUE;
|
||||
ords[nextWrite] = (int) ord;
|
||||
if (singleValuePerDoc == false) {
|
||||
if (ordsLong != null) {
|
||||
ordsLong[nextWrite] = ord;
|
||||
} else {
|
||||
assert ord <= Integer.MAX_VALUE;
|
||||
ords[nextWrite] = (int) ord;
|
||||
}
|
||||
}
|
||||
docIDs[nextWrite] = docID;
|
||||
nextWrite++;
|
||||
|
@ -137,7 +150,7 @@ final class HeapPointWriter implements PointWriter {
|
|||
@Override
|
||||
public PointReader getReader(long start, long length) {
|
||||
assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length;
|
||||
return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, nextWrite);
|
||||
return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, nextWrite, singleValuePerDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -32,18 +32,23 @@ final class OfflinePointReader extends PointReader {
|
|||
long countLeft;
|
||||
final IndexInput in;
|
||||
private final byte[] packedValue;
|
||||
final boolean singleValuePerDoc;
|
||||
private long ord;
|
||||
private int docID;
|
||||
// true if ords are written as long (8 bytes), else 4 bytes
|
||||
private boolean longOrds;
|
||||
private boolean checked;
|
||||
|
||||
OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length, boolean longOrds) throws IOException {
|
||||
OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length,
|
||||
boolean longOrds, boolean singleValuePerDoc) throws IOException {
|
||||
this.singleValuePerDoc = singleValuePerDoc;
|
||||
int bytesPerDoc = packedBytesLength + Integer.BYTES;
|
||||
if (longOrds) {
|
||||
bytesPerDoc += Long.BYTES;
|
||||
} else {
|
||||
bytesPerDoc += Integer.BYTES;
|
||||
if (singleValuePerDoc == false) {
|
||||
if (longOrds) {
|
||||
bytesPerDoc += Long.BYTES;
|
||||
} else {
|
||||
bytesPerDoc += Integer.BYTES;
|
||||
}
|
||||
}
|
||||
|
||||
if ((start + length) * bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) {
|
||||
|
@ -84,12 +89,16 @@ final class OfflinePointReader extends PointReader {
|
|||
assert countLeft == -1;
|
||||
return false;
|
||||
}
|
||||
if (longOrds) {
|
||||
ord = in.readLong();
|
||||
if (singleValuePerDoc == false) {
|
||||
if (longOrds) {
|
||||
ord = in.readLong();
|
||||
} else {
|
||||
ord = in.readInt();
|
||||
}
|
||||
docID = in.readInt();
|
||||
} else {
|
||||
ord = in.readInt();
|
||||
ord = docID = in.readInt();
|
||||
}
|
||||
docID = in.readInt();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -135,10 +144,12 @@ final class OfflinePointReader extends PointReader {
|
|||
int packedBytesLength = packedValue.length;
|
||||
|
||||
int bytesPerDoc = packedBytesLength + Integer.BYTES;
|
||||
if (longOrds) {
|
||||
bytesPerDoc += Long.BYTES;
|
||||
} else {
|
||||
bytesPerDoc += Integer.BYTES;
|
||||
if (singleValuePerDoc == false) {
|
||||
if (longOrds) {
|
||||
bytesPerDoc += Long.BYTES;
|
||||
} else {
|
||||
bytesPerDoc += Integer.BYTES;
|
||||
}
|
||||
}
|
||||
|
||||
long rightCount = 0;
|
||||
|
@ -159,8 +170,10 @@ final class OfflinePointReader extends PointReader {
|
|||
if (longOrds) {
|
||||
ord = readLong(buffer, packedBytesLength);
|
||||
} else {
|
||||
// This is either ord (multi-valued case) or docID (which we use as ord in the single valued case):
|
||||
ord = readInt(buffer, packedBytesLength);
|
||||
}
|
||||
|
||||
if (rightTree.get(ord)) {
|
||||
rightOut.writeBytes(buffer, 0, bytesPerDoc);
|
||||
if (doClearBits) {
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.util.bkd;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -30,37 +29,42 @@ final class OfflinePointWriter implements PointWriter {
|
|||
final Directory tempDir;
|
||||
final IndexOutput out;
|
||||
final int packedBytesLength;
|
||||
final boolean singleValuePerDoc;
|
||||
long count;
|
||||
private boolean closed;
|
||||
// true if ords are written as long (8 bytes), else 4 bytes
|
||||
private boolean longOrds;
|
||||
|
||||
public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength, boolean longOrds, String desc) throws IOException {
|
||||
public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength, boolean longOrds, String desc, boolean singleValuePerDoc) throws IOException {
|
||||
this.out = tempDir.createTempOutput(tempFileNamePrefix, "bkd_" + desc, IOContext.DEFAULT);
|
||||
this.tempDir = tempDir;
|
||||
this.packedBytesLength = packedBytesLength;
|
||||
this.longOrds = longOrds;
|
||||
this.singleValuePerDoc = singleValuePerDoc;
|
||||
}
|
||||
|
||||
/** Initializes on an already written/closed file, just so consumers can use {@link #getReader} to read the file. */
|
||||
public OfflinePointWriter(Directory tempDir, IndexOutput out, int packedBytesLength, long count, boolean longOrds) {
|
||||
public OfflinePointWriter(Directory tempDir, IndexOutput out, int packedBytesLength, long count, boolean longOrds, boolean singleValuePerDoc) {
|
||||
this.out = out;
|
||||
this.tempDir = tempDir;
|
||||
this.packedBytesLength = packedBytesLength;
|
||||
this.count = count;
|
||||
closed = true;
|
||||
this.longOrds = longOrds;
|
||||
this.singleValuePerDoc = singleValuePerDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(byte[] packedValue, long ord, int docID) throws IOException {
|
||||
assert packedValue.length == packedBytesLength;
|
||||
out.writeBytes(packedValue, 0, packedValue.length);
|
||||
if (longOrds) {
|
||||
out.writeLong(ord);
|
||||
} else {
|
||||
assert ord <= Integer.MAX_VALUE;
|
||||
out.writeInt((int) ord);
|
||||
if (singleValuePerDoc == false) {
|
||||
if (longOrds) {
|
||||
out.writeLong(ord);
|
||||
} else {
|
||||
assert ord <= Integer.MAX_VALUE;
|
||||
out.writeInt((int) ord);
|
||||
}
|
||||
}
|
||||
out.writeInt(docID);
|
||||
count++;
|
||||
|
@ -70,7 +74,7 @@ final class OfflinePointWriter implements PointWriter {
|
|||
public PointReader getReader(long start, long length) throws IOException {
|
||||
assert closed;
|
||||
assert start + length <= count: "start=" + start + " length=" + length + " count=" + count;
|
||||
return new OfflinePointReader(tempDir, out.getName(), packedBytesLength, start, length, longOrds);
|
||||
return new OfflinePointReader(tempDir, out.getName(), packedBytesLength, start, length, longOrds, singleValuePerDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -16,29 +16,14 @@
|
|||
*/
|
||||
package org.apache.lucene.util.bkd;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.PointsWriter;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.LuceneTestCase.Monster;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TimeUnits;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||
|
@ -55,7 +40,7 @@ public class Test2BBKDPoints extends LuceneTestCase {
|
|||
|
||||
final int numDocs = (Integer.MAX_VALUE / 26) + 100;
|
||||
|
||||
BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, Long.BYTES, 26L * numDocs);
|
||||
BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, Long.BYTES, 26L * numDocs, false);
|
||||
int counter = 0;
|
||||
byte[] packedBytes = new byte[Long.BYTES];
|
||||
for (int docID = 0; docID < numDocs; docID++) {
|
||||
|
@ -88,7 +73,7 @@ public class Test2BBKDPoints extends LuceneTestCase {
|
|||
|
||||
final int numDocs = (Integer.MAX_VALUE / 26) + 100;
|
||||
|
||||
BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, Long.BYTES, 26L * numDocs);
|
||||
BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, Long.BYTES, 26L * numDocs, false);
|
||||
int counter = 0;
|
||||
byte[] packedBytes = new byte[2*Long.BYTES];
|
||||
for (int docID = 0; docID < numDocs; docID++) {
|
||||
|
|
|
@ -43,8 +43,8 @@ import org.apache.lucene.util.TestUtil;
|
|||
|
||||
public class TestBKD extends LuceneTestCase {
|
||||
|
||||
private long randomPointCount() {
|
||||
if (random().nextBoolean()) {
|
||||
private long randomPointCount(boolean singleValuePerDoc) {
|
||||
if (singleValuePerDoc || random().nextBoolean()) {
|
||||
return random().nextInt(Integer.MAX_VALUE);
|
||||
} else {
|
||||
return random().nextLong() & Long.MAX_VALUE;
|
||||
|
@ -53,7 +53,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
|
||||
public void testBasicInts1D() throws Exception {
|
||||
try (Directory dir = getDirectory(100)) {
|
||||
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, randomPointCount());
|
||||
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, randomPointCount(true), true);
|
||||
byte[] scratch = new byte[4];
|
||||
for(int docID=0;docID<100;docID++) {
|
||||
NumericUtils.intToSortableBytes(docID, scratch, 0);
|
||||
|
@ -128,7 +128,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
int numDims = TestUtil.nextInt(random(), 1, 5);
|
||||
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
|
||||
float maxMB = (float) 3.0 + (3*random().nextFloat());
|
||||
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB, randomPointCount());
|
||||
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB, randomPointCount(true), true);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: numDims=" + numDims + " numDocs=" + numDocs);
|
||||
|
@ -269,7 +269,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
int numDims = TestUtil.nextInt(random(), 1, 5);
|
||||
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
|
||||
float maxMB = (float) 3.0 + (3*random().nextFloat());
|
||||
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount());
|
||||
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(true), true);
|
||||
BigInteger[][] docs = new BigInteger[numDocs][];
|
||||
|
||||
byte[] scratch = new byte[numBytesPerDim*numDims];
|
||||
|
@ -442,7 +442,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
public void testTooLittleHeap() throws Exception {
|
||||
try (Directory dir = getDirectory(0)) {
|
||||
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
new BKDWriter(1, dir, "bkd", 1, 16, 1000000, 0.001, randomPointCount());
|
||||
new BKDWriter(1, dir, "bkd", 1, 16, 1000000, 0.001, randomPointCount(true), true);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
|
||||
}
|
||||
|
@ -565,7 +565,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
List<Integer> docIDBases = null;
|
||||
int seg = 0;
|
||||
|
||||
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount());
|
||||
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(false), false);
|
||||
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
|
||||
IndexInput in = null;
|
||||
|
||||
|
@ -619,7 +619,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
seg++;
|
||||
maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000);
|
||||
maxMB = (float) 3.0 + (3*random().nextDouble());
|
||||
w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount());
|
||||
w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(false), false);
|
||||
lastDocIDBase = docID;
|
||||
}
|
||||
}
|
||||
|
@ -634,7 +634,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
out.close();
|
||||
in = dir.openInput("bkd", IOContext.DEFAULT);
|
||||
seg++;
|
||||
w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount());
|
||||
w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, randomPointCount(false), false);
|
||||
List<BKDReader> readers = new ArrayList<>();
|
||||
for(long fp : toMerge) {
|
||||
in.seek(fp);
|
||||
|
|
|
@ -229,7 +229,7 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
throw ise;
|
||||
}
|
||||
} catch (AssertionError ae) {
|
||||
if (ae.getMessage().contains("does not exist; files=")) {
|
||||
if (ae.getMessage() != null && ae.getMessage().contains("does not exist; files=")) {
|
||||
// OK: likely we threw the random IOExc when IW was asserting the commit files exist
|
||||
done = true;
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue