mirror of https://github.com/apache/lucene.git
LUCENE-6962: add min/max per dimension to dimensional values
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1723682 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
965100a167
commit
9bc5058f7d
|
@ -55,6 +55,9 @@ New Features
|
|||
* LUCENE-6837: Add N-best output support to JapaneseTokenizer.
|
||||
(Hiroharu Konno via Christian Moen)
|
||||
|
||||
* LUCENE-6962: Add per-dimension min/max to dimensional values
|
||||
(Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3312: The API of oal.document was restructured to
|
||||
|
|
|
@ -33,8 +33,9 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.BL
|
|||
|
||||
class SimpleTextBKDReader extends BKDReader {
|
||||
|
||||
public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
|
||||
super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues);
|
||||
public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
|
||||
byte[] minPackedValue, byte[] maxPackedValue) throws IOException {
|
||||
super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -43,6 +43,8 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.FI
|
|||
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.FIELD_FP_NAME;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.INDEX_COUNT;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MAX_LEAF_POINTS;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MAX_VALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MIN_VALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.NUM_DIMS;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.SPLIT_COUNT;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.SPLIT_DIM;
|
||||
|
@ -89,6 +91,17 @@ class SimpleTextDimensionalReader extends DimensionalReader {
|
|||
|
||||
readLine(dataIn);
|
||||
int count = parseInt(INDEX_COUNT);
|
||||
|
||||
readLine(dataIn);
|
||||
assert startsWith(MIN_VALUE);
|
||||
BytesRef minValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MIN_VALUE));
|
||||
assert minValue.length == numDims*bytesPerDim;
|
||||
|
||||
readLine(dataIn);
|
||||
assert startsWith(MAX_VALUE);
|
||||
BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE));
|
||||
assert maxValue.length == numDims*bytesPerDim;
|
||||
|
||||
long[] leafBlockFPs = new long[count];
|
||||
for(int i=0;i<count;i++) {
|
||||
readLine(dataIn);
|
||||
|
@ -108,7 +121,7 @@ class SimpleTextDimensionalReader extends DimensionalReader {
|
|||
System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
|
||||
}
|
||||
|
||||
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues);
|
||||
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes);
|
||||
}
|
||||
|
||||
private void readLine(IndexInput in) throws IOException {
|
||||
|
@ -133,17 +146,21 @@ class SimpleTextDimensionalReader extends DimensionalReader {
|
|||
return new String(scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
/** Finds all documents and points matching the provided visitor */
|
||||
@Override
|
||||
public void intersect(String field, IntersectVisitor visitor) throws IOException {
|
||||
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(field);
|
||||
private BKDReader getBKDReader(String fieldName) {
|
||||
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName);
|
||||
if (fieldInfo == null) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" is unrecognized");
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized");
|
||||
}
|
||||
if (fieldInfo.getDimensionCount() == 0) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" did not index dimensional values");
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values");
|
||||
}
|
||||
BKDReader bkdReader = readers.get(field);
|
||||
return readers.get(fieldName);
|
||||
}
|
||||
|
||||
/** Finds all documents and points matching the provided visitor */
|
||||
@Override
|
||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
|
@ -188,4 +205,48 @@ class SimpleTextDimensionalReader extends DimensionalReader {
|
|||
public String toString() {
|
||||
return "SimpleTextDimensionalReader(segment=" + readState.segmentInfo.name + " maxDoc=" + readState.segmentInfo.maxDoc() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
return null;
|
||||
}
|
||||
return bkdReader.getMinPackedValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
return null;
|
||||
}
|
||||
return bkdReader.getMinPackedValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
return 0;
|
||||
}
|
||||
return bkdReader.getNumDimensions();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
return 0;
|
||||
}
|
||||
return bkdReader.getBytesPerDimension();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,6 +49,8 @@ class SimpleTextDimensionalWriter extends DimensionalWriter {
|
|||
final static BytesRef FIELD_COUNT = new BytesRef("field count ");
|
||||
final static BytesRef FIELD_FP_NAME = new BytesRef(" field fp name ");
|
||||
final static BytesRef FIELD_FP = new BytesRef(" field fp ");
|
||||
final static BytesRef MIN_VALUE = new BytesRef("min value ");
|
||||
final static BytesRef MAX_VALUE = new BytesRef("max value ");
|
||||
|
||||
private IndexOutput dataOut;
|
||||
final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
@ -90,6 +92,14 @@ class SimpleTextDimensionalWriter extends DimensionalWriter {
|
|||
writeInt(out, leafBlockFPs.length);
|
||||
newline(out);
|
||||
|
||||
write(out, MIN_VALUE);
|
||||
BytesRef br = new BytesRef(minPackedValue, 0, minPackedValue.length);
|
||||
write(out, br.toString());
|
||||
|
||||
write(out, MAX_VALUE);
|
||||
br = new BytesRef(maxPackedValue, 0, maxPackedValue.length);
|
||||
write(out, br.toString());
|
||||
|
||||
for(int i=0;i<leafBlockFPs.length;i++) {
|
||||
write(out, BLOCK_FP);
|
||||
writeLong(out, leafBlockFPs[i]);
|
||||
|
@ -109,7 +119,7 @@ class SimpleTextDimensionalWriter extends DimensionalWriter {
|
|||
writeInt(out, splitPackedValues[i * (1 + fieldInfo.getDimensionNumBytes())] & 0xff);
|
||||
newline(out);
|
||||
write(out, SPLIT_VALUE);
|
||||
BytesRef br = new BytesRef(splitPackedValues, 1+(i * (1+fieldInfo.getDimensionNumBytes())), fieldInfo.getDimensionNumBytes());
|
||||
br = new BytesRef(splitPackedValues, 1+(i * (1+fieldInfo.getDimensionNumBytes())), fieldInfo.getDimensionNumBytes());
|
||||
write(out, br.toString());
|
||||
newline(out);
|
||||
}
|
||||
|
|
|
@ -75,6 +75,26 @@ public abstract class DimensionalFormat {
|
|||
public void intersect(String fieldName, IntersectVisitor visitor) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values");
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
|
@ -95,6 +95,26 @@ public abstract class DimensionalWriter implements Closeable {
|
|||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -84,24 +84,29 @@ public class Lucene60DimensionalReader extends DimensionalReader implements Clos
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void intersect(String field, IntersectVisitor visitor) throws IOException {
|
||||
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(field);
|
||||
private BKDReader getBKDReader(String fieldName) {
|
||||
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName);
|
||||
if (fieldInfo == null) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" is unrecognized");
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized");
|
||||
}
|
||||
if (fieldInfo.getDimensionCount() == 0) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" did not index dimensional values");
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values");
|
||||
}
|
||||
|
||||
BKDReader reader = readers.get(fieldInfo.number);
|
||||
if (reader == null) {
|
||||
return readers.get(fieldInfo.number);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
return;
|
||||
}
|
||||
|
||||
reader.intersect(visitor);
|
||||
bkdReader.intersect(visitor);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -134,5 +139,51 @@ public class Lucene60DimensionalReader extends DimensionalReader implements Clos
|
|||
// Free up heap:
|
||||
readers.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
return null;
|
||||
}
|
||||
|
||||
return bkdReader.getMinPackedValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
return null;
|
||||
}
|
||||
|
||||
return bkdReader.getMaxPackedValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
return 0;
|
||||
}
|
||||
return bkdReader.getNumDimensions();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index dimensional values in the past, but
|
||||
// now all docs having this dimensional field were deleted in this segment:
|
||||
return 0;
|
||||
}
|
||||
return bkdReader.getBytesPerDimension();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ public abstract class DimensionalValues {
|
|||
/** Maximum number of dimensions */
|
||||
public static final int MAX_DIMENSIONS = BKDWriter.MAX_DIMS;
|
||||
|
||||
/** Defautl constructor */
|
||||
/** Default constructor */
|
||||
protected DimensionalValues() {
|
||||
}
|
||||
|
||||
|
@ -78,4 +78,16 @@ public abstract class DimensionalValues {
|
|||
* This method does not enforce live docs, so it's up to the caller
|
||||
* to test whether each document is deleted, if necessary. */
|
||||
public abstract void intersect(String fieldName, IntersectVisitor visitor) throws IOException;
|
||||
|
||||
/** Returns minimum value for each dimension, packed, or null if no points were indexed */
|
||||
public abstract byte[] getMinPackedValue(String fieldName) throws IOException;
|
||||
|
||||
/** Returns maximum value for each dimension, packed, or null if no points were indexed */
|
||||
public abstract byte[] getMaxPackedValue(String fieldName) throws IOException;
|
||||
|
||||
/** Returns how many dimensions were indexed */
|
||||
public abstract int getNumDimensions(String fieldName) throws IOException;
|
||||
|
||||
/** Returns the number of bytes per dimension */
|
||||
public abstract int getBytesPerDimension(String fieldName) throws IOException;
|
||||
}
|
||||
|
|
|
@ -63,8 +63,6 @@ class DimensionalValuesWriter {
|
|||
|
||||
public void flush(SegmentWriteState state, DimensionalWriter writer) throws IOException {
|
||||
|
||||
final int maxDoc = state.segmentInfo.maxDoc();
|
||||
|
||||
writer.writeField(fieldInfo,
|
||||
new DimensionalReader() {
|
||||
@Override
|
||||
|
@ -91,6 +89,26 @@ class DimensionalValuesWriter {
|
|||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
class MultiDimensionalValues extends DimensionalValues {
|
||||
|
||||
private final List<DimensionalValues> subs;
|
||||
|
@ -95,4 +97,74 @@ class MultiDimensionalValues extends DimensionalValues {
|
|||
b.append(')');
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
||||
byte[] result = null;
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
byte[] minPackedValue = subs.get(i).getMinPackedValue(fieldName);
|
||||
if (result == null) {
|
||||
if (minPackedValue != null) {
|
||||
result = minPackedValue.clone();
|
||||
}
|
||||
} else {
|
||||
int numDims = subs.get(0).getNumDimensions(fieldName);
|
||||
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, result, offset) < 0) {
|
||||
System.arraycopy(minPackedValue, offset, result, offset, bytesPerDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
||||
byte[] result = null;
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
byte[] maxPackedValue = subs.get(i).getMaxPackedValue(fieldName);
|
||||
if (result == null) {
|
||||
if (maxPackedValue != null) {
|
||||
result = maxPackedValue.clone();
|
||||
}
|
||||
} else {
|
||||
int numDims = subs.get(0).getNumDimensions(fieldName);
|
||||
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, result, offset) > 0) {
|
||||
System.arraycopy(maxPackedValue, offset, result, offset, bytesPerDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) throws IOException {
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
int result = subs.get(i).getNumDimensions(fieldName);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
int result = subs.get(i).getBytesPerDimension(fieldName);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -329,6 +329,58 @@ public class ParallelLeafReader extends LeafReader {
|
|||
}
|
||||
dimValues.intersect(fieldName, visitor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
||||
LeafReader reader = fieldToReader.get(fieldName);
|
||||
if (reader == null) {
|
||||
return null;
|
||||
}
|
||||
DimensionalValues dimValues = reader.getDimensionalValues();
|
||||
if (dimValues == null) {
|
||||
return null;
|
||||
}
|
||||
return dimValues.getMinPackedValue(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
||||
LeafReader reader = fieldToReader.get(fieldName);
|
||||
if (reader == null) {
|
||||
return null;
|
||||
}
|
||||
DimensionalValues dimValues = reader.getDimensionalValues();
|
||||
if (dimValues == null) {
|
||||
return null;
|
||||
}
|
||||
return dimValues.getMaxPackedValue(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) throws IOException {
|
||||
LeafReader reader = fieldToReader.get(fieldName);
|
||||
if (reader == null) {
|
||||
return 0;
|
||||
}
|
||||
DimensionalValues dimValues = reader.getDimensionalValues();
|
||||
if (dimValues == null) {
|
||||
return 0;
|
||||
}
|
||||
return dimValues.getNumDimensions(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
LeafReader reader = fieldToReader.get(fieldName);
|
||||
if (reader == null) {
|
||||
return 0;
|
||||
}
|
||||
DimensionalValues dimValues = reader.getDimensionalValues();
|
||||
if (dimValues == null) {
|
||||
return 0;
|
||||
}
|
||||
return dimValues.getBytesPerDimension(fieldName);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -152,6 +152,26 @@ public final class SlowCodecReaderWrapper {
|
|||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
||||
return values.getMinPackedValue(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
||||
return values.getMaxPackedValue(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) throws IOException {
|
||||
return values.getNumDimensions(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
return values.getBytesPerDimension(fieldName);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -43,6 +43,8 @@ public class BKDReader implements Accountable {
|
|||
final int bytesPerDim;
|
||||
final IndexInput in;
|
||||
final int maxPointsInLeafNode;
|
||||
final byte[] minPackedValue;
|
||||
final byte[] maxPackedValue;
|
||||
protected final int packedBytesLength;
|
||||
|
||||
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
|
||||
|
@ -58,6 +60,11 @@ public class BKDReader implements Accountable {
|
|||
assert numLeaves > 0;
|
||||
leafNodeOffset = numLeaves;
|
||||
|
||||
minPackedValue = new byte[packedBytesLength];
|
||||
maxPackedValue = new byte[packedBytesLength];
|
||||
in.readBytes(minPackedValue, 0, packedBytesLength);
|
||||
in.readBytes(maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
|
||||
|
||||
// TODO: don't write split packed values[0]!
|
||||
|
@ -116,8 +123,9 @@ public class BKDReader implements Accountable {
|
|||
this.in = in;
|
||||
}
|
||||
|
||||
/** Called by consumers that have their own on-disk format for the index */
|
||||
protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
|
||||
/** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */
|
||||
protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
|
||||
byte[] minPackedValue, byte[] maxPackedValue) throws IOException {
|
||||
this.in = in;
|
||||
this.numDims = numDims;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
|
@ -126,6 +134,10 @@ public class BKDReader implements Accountable {
|
|||
this.leafNodeOffset = leafBlockFPs.length;
|
||||
this.leafBlockFPs = leafBlockFPs;
|
||||
this.splitPackedValues = splitPackedValues;
|
||||
this.minPackedValue = minPackedValue;
|
||||
this.maxPackedValue = maxPackedValue;
|
||||
assert minPackedValue.length == packedBytesLength;
|
||||
assert maxPackedValue.length == packedBytesLength;
|
||||
}
|
||||
|
||||
private static class VerifyVisitor implements IntersectVisitor {
|
||||
|
@ -405,4 +417,20 @@ public class BKDReader implements Accountable {
|
|||
return splitPackedValues.length +
|
||||
leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
}
|
||||
|
||||
public byte[] getMinPackedValue() {
|
||||
return minPackedValue.clone();
|
||||
}
|
||||
|
||||
public byte[] getMaxPackedValue() {
|
||||
return maxPackedValue.clone();
|
||||
}
|
||||
|
||||
public int getNumDimensions() {
|
||||
return numDims;
|
||||
}
|
||||
|
||||
public int getBytesPerDimension() {
|
||||
return bytesPerDim;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -119,6 +119,12 @@ public class BKDWriter implements Closeable {
|
|||
protected final int maxPointsInLeafNode;
|
||||
private final int maxPointsSortInHeap;
|
||||
|
||||
/** Minimum per-dim values, packed */
|
||||
protected final byte[] minPackedValue;
|
||||
|
||||
/** Maximum per-dim values, packed */
|
||||
protected final byte[] maxPackedValue;
|
||||
|
||||
private long pointCount;
|
||||
|
||||
public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException {
|
||||
|
@ -142,6 +148,9 @@ public class BKDWriter implements Closeable {
|
|||
scratch2 = new byte[packedBytesLength];
|
||||
commonPrefixLengths = new int[numDims];
|
||||
|
||||
minPackedValue = new byte[packedBytesLength];
|
||||
maxPackedValue = new byte[packedBytesLength];
|
||||
|
||||
// dimensional values (numDims * bytesPerDim) + ord (long) + docID (int)
|
||||
bytesPerDoc = packedBytesLength + RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT;
|
||||
|
||||
|
@ -213,6 +222,22 @@ public class BKDWriter implements Closeable {
|
|||
heapPointWriter.append(packedValue, pointCount, docID);
|
||||
}
|
||||
|
||||
// TODO: we could specialize for the 1D case:
|
||||
if (pointCount == 0) {
|
||||
System.arraycopy(packedValue, 0, minPackedValue, 0, packedBytesLength);
|
||||
System.arraycopy(packedValue, 0, maxPackedValue, 0, packedBytesLength);
|
||||
} else {
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, minPackedValue, offset) < 0) {
|
||||
System.arraycopy(packedValue, offset, minPackedValue, offset, bytesPerDim);
|
||||
}
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, maxPackedValue, offset) > 0) {
|
||||
System.arraycopy(packedValue, offset, maxPackedValue, offset, bytesPerDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pointCount++;
|
||||
}
|
||||
|
||||
|
@ -398,6 +423,11 @@ public class BKDWriter implements Closeable {
|
|||
leafBlockDocIDs[leafCount] = reader.docIDBase + reader.docID;
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
|
||||
|
||||
if (valueCount == 0) {
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, minPackedValue, 0, packedBytesLength);
|
||||
}
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
assert numDims > 1 || valueInOrder(valueCount++, lastPackedValue, reader.state.scratchPackedValue);
|
||||
|
||||
if (leafCount == 0) {
|
||||
|
@ -836,6 +866,8 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
assert leafBlockFPs.length > 0;
|
||||
out.writeVInt(leafBlockFPs.length);
|
||||
out.writeBytes(minPackedValue, 0, packedBytesLength);
|
||||
out.writeBytes(maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
// TODO: for 1D case, don't waste the first byte of each split value (it's always 0)
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -46,6 +47,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
// TODO: factor out a BaseTestDimensionFormat
|
||||
|
@ -906,6 +908,28 @@ public class TestDimensionalValues extends LuceneTestCase {
|
|||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
DirectoryReader r = null;
|
||||
|
||||
// Compute actual min/max values:
|
||||
byte[][] expectedMinValues = new byte[numDims][];
|
||||
byte[][] expectedMaxValues = new byte[numDims][];
|
||||
for(int ord=0;ord<docValues.length;ord++) {
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
if (ord == 0) {
|
||||
expectedMinValues[dim] = new byte[numBytesPerDim];
|
||||
System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], 0, numBytesPerDim);
|
||||
expectedMaxValues[dim] = new byte[numBytesPerDim];
|
||||
System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim);
|
||||
} else {
|
||||
// TODO: it's cheating that we use StringHelper.compare for "truth": what if it's buggy?
|
||||
if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, expectedMinValues[dim], 0) < 0) {
|
||||
System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], 0, numBytesPerDim);
|
||||
}
|
||||
if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, expectedMaxValues[dim], 0) > 0) {
|
||||
System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 20% of the time we add into a separate directory, then at some point use
|
||||
// addIndexes to bring the indexed dimensional values to the main directory:
|
||||
Directory saveDir;
|
||||
|
@ -1036,6 +1060,19 @@ public class TestDimensionalValues extends LuceneTestCase {
|
|||
NumericDocValues idValues = MultiDocValues.getNumericValues(r, "id");
|
||||
Bits liveDocs = MultiFields.getLiveDocs(r);
|
||||
|
||||
// Verify min/max values are correct:
|
||||
byte[] minValues = dimValues.getMinPackedValue("field");
|
||||
byte[] maxValues = dimValues.getMaxPackedValue("field");
|
||||
byte[] scratch = new byte[numBytesPerDim];
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
System.arraycopy(minValues, dim*numBytesPerDim, scratch, 0, scratch.length);
|
||||
//System.out.println("dim=" + dim + " expectedMin=" + new BytesRef(expectedMinValues[dim]) + " min=" + new BytesRef(scratch));
|
||||
assertTrue(Arrays.equals(expectedMinValues[dim], scratch));
|
||||
System.arraycopy(maxValues, dim*numBytesPerDim, scratch, 0, scratch.length);
|
||||
//System.out.println("dim=" + dim + " expectedMax=" + new BytesRef(expectedMaxValues[dim]) + " max=" + new BytesRef(scratch));
|
||||
assertTrue(Arrays.equals(expectedMaxValues[dim], scratch));
|
||||
}
|
||||
|
||||
int iters = atLeast(100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
if (VERBOSE) {
|
||||
|
|
|
@ -123,6 +123,10 @@ public class TestBKD extends LuceneTestCase {
|
|||
}
|
||||
int[][] docs = new int[numDocs][];
|
||||
byte[] scratch = new byte[4*numDims];
|
||||
int[] minValue = new int[numDims];
|
||||
int[] maxValue = new int[numDims];
|
||||
Arrays.fill(minValue, Integer.MAX_VALUE);
|
||||
Arrays.fill(maxValue, Integer.MIN_VALUE);
|
||||
for(int docID=0;docID<numDocs;docID++) {
|
||||
int[] values = new int[numDims];
|
||||
if (VERBOSE) {
|
||||
|
@ -130,6 +134,12 @@ public class TestBKD extends LuceneTestCase {
|
|||
}
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
values[dim] = random().nextInt();
|
||||
if (values[dim] < minValue[dim]) {
|
||||
minValue[dim] = values[dim];
|
||||
}
|
||||
if (values[dim] > maxValue[dim]) {
|
||||
maxValue[dim] = values[dim];
|
||||
}
|
||||
NumericUtils.intToBytes(values[dim], scratch, dim);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + dim + " -> " + values[dim]);
|
||||
|
@ -148,6 +158,13 @@ public class TestBKD extends LuceneTestCase {
|
|||
in.seek(indexFP);
|
||||
BKDReader r = new BKDReader(in);
|
||||
|
||||
byte[] minPackedValue = r.getMinPackedValue();
|
||||
byte[] maxPackedValue = r.getMaxPackedValue();
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
assertEquals(minValue[dim], NumericUtils.bytesToInt(minPackedValue, dim));
|
||||
assertEquals(maxValue[dim], NumericUtils.bytesToInt(maxPackedValue, dim));
|
||||
}
|
||||
|
||||
int iters = atLeast(100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
if (VERBOSE) {
|
||||
|
|
|
@ -265,8 +265,8 @@ public class SortingLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void intersect(String field, IntersectVisitor visitor) throws IOException {
|
||||
in.intersect(field,
|
||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
in.intersect(fieldName,
|
||||
new IntersectVisitor() {
|
||||
@Override
|
||||
public void visit(int docID) throws IOException {
|
||||
|
@ -284,6 +284,26 @@ public class SortingLeafReader extends FilterLeafReader {
|
|||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
||||
return in.getMinPackedValue(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
||||
return in.getMaxPackedValue(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) throws IOException {
|
||||
return in.getNumDimensions(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
return in.getBytesPerDimension(fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
private static class SortingSortedDocValues extends SortedDocValues {
|
||||
|
|
|
@ -98,6 +98,26 @@ public final class AssertingDimensionalFormat extends DimensionalFormat {
|
|||
public String toString() {
|
||||
return getClass().getSimpleName() + "(" + in.toString() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
||||
return in.getMinPackedValue(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
||||
return in.getMaxPackedValue(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) throws IOException {
|
||||
return in.getNumDimensions(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
return in.getBytesPerDimension(fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
static class AssertingDimensionalWriter extends DimensionalWriter {
|
||||
|
|
Loading…
Reference in New Issue