mirror of https://github.com/apache/lucene.git
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
fe17b4e2bb
|
@ -89,6 +89,9 @@ Bug Fixes
|
|||
* LUCENE-6914: Fixed DecimalDigitFilter in case of supplementary code points.
|
||||
(Hossman)
|
||||
|
||||
* LUCENE-7493: FacetCollector.search threw an unexpected exception if
|
||||
you asked for zero hits but wanted facets (Mahesh via Mike McCandless)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7439: FuzzyQuery now matches all terms within the specified
|
||||
|
@ -96,6 +99,9 @@ Improvements
|
|||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7501: BKDReader should not store the split dimension explicitly in the
|
||||
1D case. (Adrien Grand)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7452: Block join query exception suggests how to find a doc, which
|
||||
|
|
|
@ -139,15 +139,26 @@ class SimpleTextPointsReader extends PointsReader {
|
|||
readLine(dataIn);
|
||||
count = parseInt(SPLIT_COUNT);
|
||||
|
||||
byte[] splitPackedValues = new byte[count * (1 + bytesPerDim)];
|
||||
byte[] splitPackedValues;
|
||||
int bytesPerIndexEntry;
|
||||
if (numDims == 1) {
|
||||
bytesPerIndexEntry = bytesPerDim;
|
||||
} else {
|
||||
bytesPerIndexEntry = 1 + bytesPerDim;
|
||||
}
|
||||
splitPackedValues = new byte[count * bytesPerIndexEntry];
|
||||
for(int i=0;i<count;i++) {
|
||||
readLine(dataIn);
|
||||
splitPackedValues[(1 + bytesPerDim) * i] = (byte) parseInt(SPLIT_DIM);
|
||||
int address = bytesPerIndexEntry * i;
|
||||
int splitDim = parseInt(SPLIT_DIM);
|
||||
if (numDims != 1) {
|
||||
splitPackedValues[address++] = (byte) splitDim;
|
||||
}
|
||||
readLine(dataIn);
|
||||
assert startsWith(SPLIT_VALUE);
|
||||
BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(SPLIT_VALUE));
|
||||
assert br.length == bytesPerDim;
|
||||
System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
|
||||
System.arraycopy(br.bytes, br.offset, splitPackedValues, address, bytesPerDim);
|
||||
}
|
||||
|
||||
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes, pointCount, docCount);
|
||||
|
|
|
@ -424,45 +424,36 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
|
|||
};
|
||||
} else {
|
||||
final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
|
||||
LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
|
||||
if (entry.gcd != 1) {
|
||||
values = applyGcd(values, entry.gcd);
|
||||
}
|
||||
if (entry.minValue != 0) {
|
||||
values = applyDelta(values, entry.minValue);
|
||||
}
|
||||
final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
|
||||
if (entry.table != null) {
|
||||
values = applyTable(values, entry.table);
|
||||
}
|
||||
return values;
|
||||
}
|
||||
}
|
||||
|
||||
private LongValues applyDelta(LongValues values, long delta) {
|
||||
return new LongValues() {
|
||||
@Override
|
||||
public long get(long index) {
|
||||
return delta + values.get(index);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private LongValues applyGcd(LongValues values, long gcd) {
|
||||
return new LongValues() {
|
||||
@Override
|
||||
public long get(long index) {
|
||||
return values.get(index) * gcd;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private LongValues applyTable(LongValues values, long[] table) {
|
||||
final long[] table = entry.table;
|
||||
return new LongValues() {
|
||||
@Override
|
||||
public long get(long index) {
|
||||
return table[(int) values.get(index)];
|
||||
}
|
||||
};
|
||||
} else if (entry.gcd != 1) {
|
||||
final long gcd = entry.gcd;
|
||||
final long minValue = entry.minValue;
|
||||
return new LongValues() {
|
||||
@Override
|
||||
public long get(long index) {
|
||||
return values.get(index) * gcd + minValue;
|
||||
}
|
||||
};
|
||||
} else if (entry.minValue != 0) {
|
||||
final long minValue = entry.minValue;
|
||||
return new LongValues() {
|
||||
@Override
|
||||
public long get(long index) {
|
||||
return values.get(index) + minValue;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return values;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.PointValues;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/** Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree previously written with {@link BKDWriter}.
|
||||
|
@ -38,6 +39,7 @@ public class BKDReader extends PointValues implements Accountable {
|
|||
final private int leafNodeOffset;
|
||||
final int numDims;
|
||||
final int bytesPerDim;
|
||||
final int bytesPerIndexEntry;
|
||||
final IndexInput in;
|
||||
final int maxPointsInLeafNode;
|
||||
final byte[] minPackedValue;
|
||||
|
@ -53,6 +55,7 @@ public class BKDReader extends PointValues implements Accountable {
|
|||
numDims = in.readVInt();
|
||||
maxPointsInLeafNode = in.readVInt();
|
||||
bytesPerDim = in.readVInt();
|
||||
bytesPerIndexEntry = numDims == 1 && version >= BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D ? bytesPerDim : bytesPerDim + 1;
|
||||
packedBytesLength = numDims * bytesPerDim;
|
||||
|
||||
// Read index:
|
||||
|
@ -69,7 +72,7 @@ public class BKDReader extends PointValues implements Accountable {
|
|||
pointCount = in.readVLong();
|
||||
docCount = in.readVInt();
|
||||
|
||||
splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
|
||||
splitPackedValues = new byte[bytesPerIndexEntry*numLeaves];
|
||||
|
||||
// TODO: don't write split packed values[0]!
|
||||
in.readBytes(splitPackedValues, 0, splitPackedValues.length);
|
||||
|
@ -134,6 +137,7 @@ public class BKDReader extends PointValues implements Accountable {
|
|||
this.numDims = numDims;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.bytesPerDim = bytesPerDim;
|
||||
bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1;
|
||||
packedBytesLength = numDims * bytesPerDim;
|
||||
this.leafNodeOffset = leafBlockFPs.length;
|
||||
this.leafBlockFPs = leafBlockFPs;
|
||||
|
@ -233,22 +237,22 @@ public class BKDReader extends PointValues implements Accountable {
|
|||
} else {
|
||||
// Non-leaf node:
|
||||
|
||||
int address = nodeID * (bytesPerDim+1);
|
||||
int splitDim = splitPackedValues[address] & 0xff;
|
||||
int address = nodeID * bytesPerIndexEntry;
|
||||
int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
|
||||
assert splitDim < numDims;
|
||||
|
||||
byte[] splitPackedValue = new byte[packedBytesLength];
|
||||
|
||||
// Recurse on left sub-tree:
|
||||
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
|
||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
verify(state,
|
||||
2*nodeID,
|
||||
cellMinPacked, splitPackedValue);
|
||||
|
||||
// Recurse on right sub-tree:
|
||||
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
|
||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
verify(state,
|
||||
2*nodeID+1,
|
||||
splitPackedValue, cellMaxPacked);
|
||||
|
@ -456,8 +460,8 @@ public class BKDReader extends PointValues implements Accountable {
|
|||
// Non-leaf node: recurse on the split left and right nodes
|
||||
|
||||
// TODO: save the unused 1 byte prefix (it's always 0) in the 1d case here:
|
||||
int address = nodeID * (bytesPerDim+1);
|
||||
int splitDim = splitPackedValues[address] & 0xff;
|
||||
int address = nodeID * bytesPerIndexEntry;
|
||||
int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
|
||||
assert splitDim < numDims;
|
||||
|
||||
// TODO: can we alloc & reuse this up front?
|
||||
|
@ -467,14 +471,14 @@ public class BKDReader extends PointValues implements Accountable {
|
|||
|
||||
// Recurse on left sub-tree:
|
||||
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
|
||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
intersect(state,
|
||||
2*nodeID,
|
||||
cellMinPacked, splitPackedValue);
|
||||
|
||||
// Recurse on right sub-tree:
|
||||
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
|
||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
intersect(state,
|
||||
2*nodeID+1,
|
||||
splitPackedValue, cellMaxPacked);
|
||||
|
@ -483,16 +487,16 @@ public class BKDReader extends PointValues implements Accountable {
|
|||
|
||||
/** Copies the split value for this node into the provided byte array */
|
||||
public void copySplitValue(int nodeID, byte[] splitPackedValue) {
|
||||
int address = nodeID * (bytesPerDim+1);
|
||||
int splitDim = splitPackedValues[address] & 0xff;
|
||||
int address = nodeID * bytesPerIndexEntry;
|
||||
int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
|
||||
assert splitDim < numDims;
|
||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return splitPackedValues.length +
|
||||
leafBlockFPs.length * Long.BYTES;
|
||||
return RamUsageEstimator.sizeOf(splitPackedValues) +
|
||||
RamUsageEstimator.sizeOf(leafBlockFPs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -82,7 +82,8 @@ public class BKDWriter implements Closeable {
|
|||
public static final int VERSION_START = 0;
|
||||
public static final int VERSION_COMPRESSED_DOC_IDS = 1;
|
||||
public static final int VERSION_COMPRESSED_VALUES = 2;
|
||||
public static final int VERSION_CURRENT = VERSION_COMPRESSED_VALUES;
|
||||
public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
|
||||
public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D;
|
||||
|
||||
/** How many bytes each docs takes in the fixed-width offline format */
|
||||
private final int bytesPerDoc;
|
||||
|
@ -1033,10 +1034,15 @@ public class BKDWriter implements Closeable {
|
|||
out.writeVLong(pointCount);
|
||||
out.writeVInt(docsSeen.cardinality());
|
||||
|
||||
// TODO: for 1D case, don't waste the first byte of each split value (it's always 0)
|
||||
|
||||
// NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
|
||||
if (numDims == 1) {
|
||||
// write the index, skipping the byte used to store the split dim since it is always 0
|
||||
for (int i = 1; i < splitPackedValues.length; i += 1 + bytesPerDim) {
|
||||
out.writeBytes(splitPackedValues, i, bytesPerDim);
|
||||
}
|
||||
} else {
|
||||
out.writeBytes(splitPackedValues, 0, splitPackedValues.length);
|
||||
}
|
||||
|
||||
long lastFP = 0;
|
||||
for (int i=0;i<leafBlockFPs.length;i++) {
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.search.TopDocsCollector;
|
|||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -251,6 +252,12 @@ public class FacetsCollector extends SimpleCollector implements Collector {
|
|||
+ after.doc + " limit=" + limit);
|
||||
}
|
||||
|
||||
TopDocs topDocs = null;
|
||||
if (n==0) {
|
||||
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
|
||||
searcher.search(q, MultiCollector.wrap(totalHitCountCollector, fc));
|
||||
topDocs = new TopDocs(totalHitCountCollector.getTotalHits(), new ScoreDoc[0], Float.NaN);
|
||||
} else {
|
||||
TopDocsCollector<?> hitsCollector;
|
||||
if (sort != null) {
|
||||
if (after != null && !(after instanceof FieldDoc)) {
|
||||
|
@ -268,6 +275,9 @@ public class FacetsCollector extends SimpleCollector implements Collector {
|
|||
hitsCollector = TopScoreDocCollector.create(n, after);
|
||||
}
|
||||
searcher.search(q, MultiCollector.wrap(hitsCollector, fc));
|
||||
return hitsCollector.topDocs();
|
||||
|
||||
topDocs = hitsCollector.topDocs();
|
||||
}
|
||||
return topDocs;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -182,6 +182,17 @@ public class TestDrillDownQuery extends FacetTestCase {
|
|||
assertEquals(10, docs.totalHits);
|
||||
}
|
||||
|
||||
public void testZeroLimit() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
DrillDownQuery q = new DrillDownQuery(config);
|
||||
q.add("b", "1");
|
||||
int limit = 0;
|
||||
FacetsCollector facetCollector = new FacetsCollector();
|
||||
FacetsCollector.search(searcher, q, limit, facetCollector);
|
||||
Facets facets = getTaxonomyFacetCounts(taxo, config, facetCollector, config.getDimConfig("b").indexFieldName);
|
||||
assertNotNull(facets.getTopChildren(10, "b"));
|
||||
}
|
||||
|
||||
public void testScoring() throws IOException {
|
||||
// verify that drill-down queries do not modify scores
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
|
Loading…
Reference in New Issue