LUCENE-6962: add min/max per dimension to dimensional values

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1723682 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2016-01-08 10:52:15 +00:00
parent 965100a167
commit 9bc5058f7d
18 changed files with 520 additions and 26 deletions

View File

@ -55,6 +55,9 @@ New Features
* LUCENE-6837: Add N-best output support to JapaneseTokenizer.
(Hiroharu Konno via Christian Moen)
* LUCENE-6962: Add per-dimension min/max to dimensional values
(Mike McCandless)
API Changes
* LUCENE-3312: The API of oal.document was restructured to

View File

@ -33,8 +33,9 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.BL
class SimpleTextBKDReader extends BKDReader {
public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues);
public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
byte[] minPackedValue, byte[] maxPackedValue) throws IOException {
super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue);
}
@Override

View File

@ -43,6 +43,8 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.FI
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.FIELD_FP_NAME;
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.INDEX_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MAX_LEAF_POINTS;
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MAX_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MIN_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.NUM_DIMS;
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.SPLIT_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.SPLIT_DIM;
@ -89,6 +91,17 @@ class SimpleTextDimensionalReader extends DimensionalReader {
readLine(dataIn);
int count = parseInt(INDEX_COUNT);
readLine(dataIn);
assert startsWith(MIN_VALUE);
BytesRef minValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MIN_VALUE));
assert minValue.length == numDims*bytesPerDim;
readLine(dataIn);
assert startsWith(MAX_VALUE);
BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE));
assert maxValue.length == numDims*bytesPerDim;
long[] leafBlockFPs = new long[count];
for(int i=0;i<count;i++) {
readLine(dataIn);
@ -108,7 +121,7 @@ class SimpleTextDimensionalReader extends DimensionalReader {
System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
}
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues);
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes);
}
private void readLine(IndexInput in) throws IOException {
@ -133,17 +146,21 @@ class SimpleTextDimensionalReader extends DimensionalReader {
return new String(scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8);
}
/** Finds all documents and points matching the provided visitor */
@Override
public void intersect(String field, IntersectVisitor visitor) throws IOException {
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(field);
private BKDReader getBKDReader(String fieldName) {
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName);
if (fieldInfo == null) {
throw new IllegalArgumentException("field=\"" + field + "\" is unrecognized");
throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized");
}
if (fieldInfo.getDimensionCount() == 0) {
throw new IllegalArgumentException("field=\"" + field + "\" did not index dimensional values");
throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values");
}
BKDReader bkdReader = readers.get(field);
return readers.get(fieldName);
}
/** Finds all documents and points matching the provided visitor */
@Override
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
@ -188,4 +205,48 @@ class SimpleTextDimensionalReader extends DimensionalReader {
public String toString() {
return "SimpleTextDimensionalReader(segment=" + readState.segmentInfo.name + " maxDoc=" + readState.segmentInfo.maxDoc() + ")";
}
@Override
public byte[] getMinPackedValue(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
return null;
}
return bkdReader.getMinPackedValue();
}
@Override
public byte[] getMaxPackedValue(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
return null;
}
return bkdReader.getMinPackedValue();
}
@Override
public int getNumDimensions(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
return 0;
}
return bkdReader.getNumDimensions();
}
@Override
public int getBytesPerDimension(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
return 0;
}
return bkdReader.getBytesPerDimension();
}
}

View File

@ -49,6 +49,8 @@ class SimpleTextDimensionalWriter extends DimensionalWriter {
final static BytesRef FIELD_COUNT = new BytesRef("field count ");
final static BytesRef FIELD_FP_NAME = new BytesRef(" field fp name ");
final static BytesRef FIELD_FP = new BytesRef(" field fp ");
final static BytesRef MIN_VALUE = new BytesRef("min value ");
final static BytesRef MAX_VALUE = new BytesRef("max value ");
private IndexOutput dataOut;
final BytesRefBuilder scratch = new BytesRefBuilder();
@ -90,6 +92,14 @@ class SimpleTextDimensionalWriter extends DimensionalWriter {
writeInt(out, leafBlockFPs.length);
newline(out);
write(out, MIN_VALUE);
BytesRef br = new BytesRef(minPackedValue, 0, minPackedValue.length);
write(out, br.toString());
write(out, MAX_VALUE);
br = new BytesRef(maxPackedValue, 0, maxPackedValue.length);
write(out, br.toString());
for(int i=0;i<leafBlockFPs.length;i++) {
write(out, BLOCK_FP);
writeLong(out, leafBlockFPs[i]);
@ -109,7 +119,7 @@ class SimpleTextDimensionalWriter extends DimensionalWriter {
writeInt(out, splitPackedValues[i * (1 + fieldInfo.getDimensionNumBytes())] & 0xff);
newline(out);
write(out, SPLIT_VALUE);
BytesRef br = new BytesRef(splitPackedValues, 1+(i * (1+fieldInfo.getDimensionNumBytes())), fieldInfo.getDimensionNumBytes());
br = new BytesRef(splitPackedValues, 1+(i * (1+fieldInfo.getDimensionNumBytes())), fieldInfo.getDimensionNumBytes());
write(out, br.toString());
newline(out);
}

View File

@ -75,6 +75,26 @@ public abstract class DimensionalFormat {
public void intersect(String fieldName, IntersectVisitor visitor) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
}
@Override
public byte[] getMinPackedValue(String fieldName) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
}
@Override
public byte[] getMaxPackedValue(String fieldName) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
}
@Override
public int getNumDimensions(String fieldName) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
}
@Override
public int getBytesPerDimension(String fieldName) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
}
};
}
};

View File

@ -95,6 +95,26 @@ public abstract class DimensionalWriter implements Closeable {
@Override
public void close() {
}
@Override
public byte[] getMinPackedValue(String fieldName) {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue(String fieldName) {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions(String fieldName) {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension(String fieldName) {
throw new UnsupportedOperationException();
}
});
}

View File

@ -84,24 +84,29 @@ public class Lucene60DimensionalReader extends DimensionalReader implements Clos
}
}
@Override
public void intersect(String field, IntersectVisitor visitor) throws IOException {
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(field);
private BKDReader getBKDReader(String fieldName) {
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName);
if (fieldInfo == null) {
throw new IllegalArgumentException("field=\"" + field + "\" is unrecognized");
throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized");
}
if (fieldInfo.getDimensionCount() == 0) {
throw new IllegalArgumentException("field=\"" + field + "\" did not index dimensional values");
throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values");
}
BKDReader reader = readers.get(fieldInfo.number);
if (reader == null) {
return readers.get(fieldInfo.number);
}
@Override
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
return;
}
reader.intersect(visitor);
bkdReader.intersect(visitor);
}
@Override
@ -134,5 +139,51 @@ public class Lucene60DimensionalReader extends DimensionalReader implements Clos
// Free up heap:
readers.clear();
}
@Override
public byte[] getMinPackedValue(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
return null;
}
return bkdReader.getMinPackedValue();
}
@Override
public byte[] getMaxPackedValue(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
return null;
}
return bkdReader.getMaxPackedValue();
}
@Override
public int getNumDimensions(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
return 0;
}
return bkdReader.getNumDimensions();
}
@Override
public int getBytesPerDimension(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index dimensional values in the past, but
// now all docs having this dimensional field were deleted in this segment:
return 0;
}
return bkdReader.getBytesPerDimension();
}
}

View File

@ -39,7 +39,7 @@ public abstract class DimensionalValues {
/** Maximum number of dimensions */
public static final int MAX_DIMENSIONS = BKDWriter.MAX_DIMS;
/** Defautl constructor */
/** Default constructor */
protected DimensionalValues() {
}
@ -78,4 +78,16 @@ public abstract class DimensionalValues {
* This method does not enforce live docs, so it's up to the caller
* to test whether each document is deleted, if necessary. */
public abstract void intersect(String fieldName, IntersectVisitor visitor) throws IOException;
/** Returns minimum value for each dimension, packed, or null if no points were indexed */
public abstract byte[] getMinPackedValue(String fieldName) throws IOException;
/** Returns maximum value for each dimension, packed, or null if no points were indexed */
public abstract byte[] getMaxPackedValue(String fieldName) throws IOException;
/** Returns how many dimensions were indexed */
public abstract int getNumDimensions(String fieldName) throws IOException;
/** Returns the number of bytes per dimension */
public abstract int getBytesPerDimension(String fieldName) throws IOException;
}

View File

@ -63,8 +63,6 @@ class DimensionalValuesWriter {
public void flush(SegmentWriteState state, DimensionalWriter writer) throws IOException {
final int maxDoc = state.segmentInfo.maxDoc();
writer.writeField(fieldInfo,
new DimensionalReader() {
@Override
@ -91,6 +89,26 @@ class DimensionalValuesWriter {
@Override
public void close() {
}
@Override
public byte[] getMinPackedValue(String fieldName) {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue(String fieldName) {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions(String fieldName) {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension(String fieldName) {
throw new UnsupportedOperationException();
}
});
}
}

View File

@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.StringHelper;
class MultiDimensionalValues extends DimensionalValues {
private final List<DimensionalValues> subs;
@ -95,4 +97,74 @@ class MultiDimensionalValues extends DimensionalValues {
b.append(')');
return b.toString();
}
@Override
public byte[] getMinPackedValue(String fieldName) throws IOException {
byte[] result = null;
for(int i=0;i<subs.size();i++) {
byte[] minPackedValue = subs.get(i).getMinPackedValue(fieldName);
if (result == null) {
if (minPackedValue != null) {
result = minPackedValue.clone();
}
} else {
int numDims = subs.get(0).getNumDimensions(fieldName);
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, result, offset) < 0) {
System.arraycopy(minPackedValue, offset, result, offset, bytesPerDim);
}
}
}
}
return result;
}
@Override
public byte[] getMaxPackedValue(String fieldName) throws IOException {
byte[] result = null;
for(int i=0;i<subs.size();i++) {
byte[] maxPackedValue = subs.get(i).getMaxPackedValue(fieldName);
if (result == null) {
if (maxPackedValue != null) {
result = maxPackedValue.clone();
}
} else {
int numDims = subs.get(0).getNumDimensions(fieldName);
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, result, offset) > 0) {
System.arraycopy(maxPackedValue, offset, result, offset, bytesPerDim);
}
}
}
}
return result;
}
@Override
public int getNumDimensions(String fieldName) throws IOException {
for(int i=0;i<subs.size();i++) {
int result = subs.get(i).getNumDimensions(fieldName);
if (result != 0) {
return result;
}
}
return 0;
}
@Override
public int getBytesPerDimension(String fieldName) throws IOException {
for(int i=0;i<subs.size();i++) {
int result = subs.get(i).getBytesPerDimension(fieldName);
if (result != 0) {
return result;
}
}
return 0;
}
}

View File

@ -329,6 +329,58 @@ public class ParallelLeafReader extends LeafReader {
}
dimValues.intersect(fieldName, visitor);
}
@Override
public byte[] getMinPackedValue(String fieldName) throws IOException {
LeafReader reader = fieldToReader.get(fieldName);
if (reader == null) {
return null;
}
DimensionalValues dimValues = reader.getDimensionalValues();
if (dimValues == null) {
return null;
}
return dimValues.getMinPackedValue(fieldName);
}
@Override
public byte[] getMaxPackedValue(String fieldName) throws IOException {
LeafReader reader = fieldToReader.get(fieldName);
if (reader == null) {
return null;
}
DimensionalValues dimValues = reader.getDimensionalValues();
if (dimValues == null) {
return null;
}
return dimValues.getMaxPackedValue(fieldName);
}
@Override
public int getNumDimensions(String fieldName) throws IOException {
LeafReader reader = fieldToReader.get(fieldName);
if (reader == null) {
return 0;
}
DimensionalValues dimValues = reader.getDimensionalValues();
if (dimValues == null) {
return 0;
}
return dimValues.getNumDimensions(fieldName);
}
@Override
public int getBytesPerDimension(String fieldName) throws IOException {
LeafReader reader = fieldToReader.get(fieldName);
if (reader == null) {
return 0;
}
DimensionalValues dimValues = reader.getDimensionalValues();
if (dimValues == null) {
return 0;
}
return dimValues.getBytesPerDimension(fieldName);
}
};
}

View File

@ -152,6 +152,26 @@ public final class SlowCodecReaderWrapper {
public long ramBytesUsed() {
return 0;
}
@Override
public byte[] getMinPackedValue(String fieldName) throws IOException {
return values.getMinPackedValue(fieldName);
}
@Override
public byte[] getMaxPackedValue(String fieldName) throws IOException {
return values.getMaxPackedValue(fieldName);
}
@Override
public int getNumDimensions(String fieldName) throws IOException {
return values.getNumDimensions(fieldName);
}
@Override
public int getBytesPerDimension(String fieldName) throws IOException {
return values.getBytesPerDimension(fieldName);
}
};
}

View File

@ -43,6 +43,8 @@ public class BKDReader implements Accountable {
final int bytesPerDim;
final IndexInput in;
final int maxPointsInLeafNode;
final byte[] minPackedValue;
final byte[] maxPackedValue;
protected final int packedBytesLength;
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
@ -58,6 +60,11 @@ public class BKDReader implements Accountable {
assert numLeaves > 0;
leafNodeOffset = numLeaves;
minPackedValue = new byte[packedBytesLength];
maxPackedValue = new byte[packedBytesLength];
in.readBytes(minPackedValue, 0, packedBytesLength);
in.readBytes(maxPackedValue, 0, packedBytesLength);
splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
// TODO: don't write split packed values[0]!
@ -116,8 +123,9 @@ public class BKDReader implements Accountable {
this.in = in;
}
/** Called by consumers that have their own on-disk format for the index */
protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
/** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */
protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
byte[] minPackedValue, byte[] maxPackedValue) throws IOException {
this.in = in;
this.numDims = numDims;
this.maxPointsInLeafNode = maxPointsInLeafNode;
@ -126,6 +134,10 @@ public class BKDReader implements Accountable {
this.leafNodeOffset = leafBlockFPs.length;
this.leafBlockFPs = leafBlockFPs;
this.splitPackedValues = splitPackedValues;
this.minPackedValue = minPackedValue;
this.maxPackedValue = maxPackedValue;
assert minPackedValue.length == packedBytesLength;
assert maxPackedValue.length == packedBytesLength;
}
private static class VerifyVisitor implements IntersectVisitor {
@ -405,4 +417,20 @@ public class BKDReader implements Accountable {
return splitPackedValues.length +
leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG;
}
public byte[] getMinPackedValue() {
return minPackedValue.clone();
}
public byte[] getMaxPackedValue() {
return maxPackedValue.clone();
}
public int getNumDimensions() {
return numDims;
}
public int getBytesPerDimension() {
return bytesPerDim;
}
}

View File

@ -119,6 +119,12 @@ public class BKDWriter implements Closeable {
protected final int maxPointsInLeafNode;
private final int maxPointsSortInHeap;
/** Minimum per-dim values, packed */
protected final byte[] minPackedValue;
/** Maximum per-dim values, packed */
protected final byte[] maxPackedValue;
private long pointCount;
public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException {
@ -142,6 +148,9 @@ public class BKDWriter implements Closeable {
scratch2 = new byte[packedBytesLength];
commonPrefixLengths = new int[numDims];
minPackedValue = new byte[packedBytesLength];
maxPackedValue = new byte[packedBytesLength];
// dimensional values (numDims * bytesPerDim) + ord (long) + docID (int)
bytesPerDoc = packedBytesLength + RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT;
@ -213,6 +222,22 @@ public class BKDWriter implements Closeable {
heapPointWriter.append(packedValue, pointCount, docID);
}
// TODO: we could specialize for the 1D case:
if (pointCount == 0) {
System.arraycopy(packedValue, 0, minPackedValue, 0, packedBytesLength);
System.arraycopy(packedValue, 0, maxPackedValue, 0, packedBytesLength);
} else {
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, packedValue, offset, minPackedValue, offset) < 0) {
System.arraycopy(packedValue, offset, minPackedValue, offset, bytesPerDim);
}
if (StringHelper.compare(bytesPerDim, packedValue, offset, maxPackedValue, offset) > 0) {
System.arraycopy(packedValue, offset, maxPackedValue, offset, bytesPerDim);
}
}
}
pointCount++;
}
@ -398,6 +423,11 @@ public class BKDWriter implements Closeable {
leafBlockDocIDs[leafCount] = reader.docIDBase + reader.docID;
System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
if (valueCount == 0) {
System.arraycopy(reader.state.scratchPackedValue, 0, minPackedValue, 0, packedBytesLength);
}
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
assert numDims > 1 || valueInOrder(valueCount++, lastPackedValue, reader.state.scratchPackedValue);
if (leafCount == 0) {
@ -836,6 +866,8 @@ public class BKDWriter implements Closeable {
assert leafBlockFPs.length > 0;
out.writeVInt(leafBlockFPs.length);
out.writeBytes(minPackedValue, 0, packedBytesLength);
out.writeBytes(maxPackedValue, 0, packedBytesLength);
// TODO: for 1D case, don't waste the first byte of each split value (it's always 0)

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
@ -46,6 +47,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
// TODO: factor out a BaseTestDimensionFormat
@ -906,6 +908,28 @@ public class TestDimensionalValues extends LuceneTestCase {
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
DirectoryReader r = null;
// Compute actual min/max values:
byte[][] expectedMinValues = new byte[numDims][];
byte[][] expectedMaxValues = new byte[numDims][];
for(int ord=0;ord<docValues.length;ord++) {
for(int dim=0;dim<numDims;dim++) {
if (ord == 0) {
expectedMinValues[dim] = new byte[numBytesPerDim];
System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], 0, numBytesPerDim);
expectedMaxValues[dim] = new byte[numBytesPerDim];
System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim);
} else {
// TODO: it's cheating that we use StringHelper.compare for "truth": what if it's buggy?
if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, expectedMinValues[dim], 0) < 0) {
System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], 0, numBytesPerDim);
}
if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, expectedMaxValues[dim], 0) > 0) {
System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim);
}
}
}
}
// 20% of the time we add into a separate directory, then at some point use
// addIndexes to bring the indexed dimensional values to the main directory:
Directory saveDir;
@ -1036,6 +1060,19 @@ public class TestDimensionalValues extends LuceneTestCase {
NumericDocValues idValues = MultiDocValues.getNumericValues(r, "id");
Bits liveDocs = MultiFields.getLiveDocs(r);
// Verify min/max values are correct:
byte[] minValues = dimValues.getMinPackedValue("field");
byte[] maxValues = dimValues.getMaxPackedValue("field");
byte[] scratch = new byte[numBytesPerDim];
for(int dim=0;dim<numDims;dim++) {
System.arraycopy(minValues, dim*numBytesPerDim, scratch, 0, scratch.length);
//System.out.println("dim=" + dim + " expectedMin=" + new BytesRef(expectedMinValues[dim]) + " min=" + new BytesRef(scratch));
assertTrue(Arrays.equals(expectedMinValues[dim], scratch));
System.arraycopy(maxValues, dim*numBytesPerDim, scratch, 0, scratch.length);
//System.out.println("dim=" + dim + " expectedMax=" + new BytesRef(expectedMaxValues[dim]) + " max=" + new BytesRef(scratch));
assertTrue(Arrays.equals(expectedMaxValues[dim], scratch));
}
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {

View File

@ -123,6 +123,10 @@ public class TestBKD extends LuceneTestCase {
}
int[][] docs = new int[numDocs][];
byte[] scratch = new byte[4*numDims];
int[] minValue = new int[numDims];
int[] maxValue = new int[numDims];
Arrays.fill(minValue, Integer.MAX_VALUE);
Arrays.fill(maxValue, Integer.MIN_VALUE);
for(int docID=0;docID<numDocs;docID++) {
int[] values = new int[numDims];
if (VERBOSE) {
@ -130,6 +134,12 @@ public class TestBKD extends LuceneTestCase {
}
for(int dim=0;dim<numDims;dim++) {
values[dim] = random().nextInt();
if (values[dim] < minValue[dim]) {
minValue[dim] = values[dim];
}
if (values[dim] > maxValue[dim]) {
maxValue[dim] = values[dim];
}
NumericUtils.intToBytes(values[dim], scratch, dim);
if (VERBOSE) {
System.out.println(" " + dim + " -> " + values[dim]);
@ -148,6 +158,13 @@ public class TestBKD extends LuceneTestCase {
in.seek(indexFP);
BKDReader r = new BKDReader(in);
byte[] minPackedValue = r.getMinPackedValue();
byte[] maxPackedValue = r.getMaxPackedValue();
for(int dim=0;dim<numDims;dim++) {
assertEquals(minValue[dim], NumericUtils.bytesToInt(minPackedValue, dim));
assertEquals(maxValue[dim], NumericUtils.bytesToInt(maxPackedValue, dim));
}
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {

View File

@ -265,8 +265,8 @@ public class SortingLeafReader extends FilterLeafReader {
}
@Override
public void intersect(String field, IntersectVisitor visitor) throws IOException {
in.intersect(field,
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
in.intersect(fieldName,
new IntersectVisitor() {
@Override
public void visit(int docID) throws IOException {
@ -284,6 +284,26 @@ public class SortingLeafReader extends FilterLeafReader {
}
});
}
@Override
public byte[] getMinPackedValue(String fieldName) throws IOException {
return in.getMinPackedValue(fieldName);
}
@Override
public byte[] getMaxPackedValue(String fieldName) throws IOException {
return in.getMaxPackedValue(fieldName);
}
@Override
public int getNumDimensions(String fieldName) throws IOException {
return in.getNumDimensions(fieldName);
}
@Override
public int getBytesPerDimension(String fieldName) throws IOException {
return in.getBytesPerDimension(fieldName);
}
}
private static class SortingSortedDocValues extends SortedDocValues {

View File

@ -98,6 +98,26 @@ public final class AssertingDimensionalFormat extends DimensionalFormat {
public String toString() {
return getClass().getSimpleName() + "(" + in.toString() + ")";
}
@Override
public byte[] getMinPackedValue(String fieldName) throws IOException {
return in.getMinPackedValue(fieldName);
}
@Override
public byte[] getMaxPackedValue(String fieldName) throws IOException {
return in.getMaxPackedValue(fieldName);
}
@Override
public int getNumDimensions(String fieldName) throws IOException {
return in.getNumDimensions(fieldName);
}
@Override
public int getBytesPerDimension(String fieldName) throws IOException {
return in.getBytesPerDimension(fieldName);
}
}
static class AssertingDimensionalWriter extends DimensionalWriter {