Merge remote-tracking branch 'origin/master'

This commit is contained in:
Noble Paul 2016-10-18 19:45:06 +05:30
commit fe17b4e2bb
7 changed files with 112 additions and 73 deletions

View File

@ -89,6 +89,9 @@ Bug Fixes
* LUCENE-6914: Fixed DecimalDigitFilter in case of supplementary code points.
(Hossman)
* LUCENE-7493: FacetCollector.search threw an unexpected exception if
you asked for zero hits but wanted facets (Mahesh via Mike McCandless)
Improvements
* LUCENE-7439: FuzzyQuery now matches all terms within the specified
@ -96,6 +99,9 @@ Improvements
Optimizations
* LUCENE-7501: BKDReader should not store the split dimension explicitly in the
1D case. (Adrien Grand)
Other
* LUCENE-7452: Block join query exception suggests how to find a doc, which

View File

@ -139,15 +139,26 @@ class SimpleTextPointsReader extends PointsReader {
readLine(dataIn);
count = parseInt(SPLIT_COUNT);
byte[] splitPackedValues = new byte[count * (1 + bytesPerDim)];
byte[] splitPackedValues;
int bytesPerIndexEntry;
if (numDims == 1) {
bytesPerIndexEntry = bytesPerDim;
} else {
bytesPerIndexEntry = 1 + bytesPerDim;
}
splitPackedValues = new byte[count * bytesPerIndexEntry];
for(int i=0;i<count;i++) {
readLine(dataIn);
splitPackedValues[(1 + bytesPerDim) * i] = (byte) parseInt(SPLIT_DIM);
int address = bytesPerIndexEntry * i;
int splitDim = parseInt(SPLIT_DIM);
if (numDims != 1) {
splitPackedValues[address++] = (byte) splitDim;
}
readLine(dataIn);
assert startsWith(SPLIT_VALUE);
BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(SPLIT_VALUE));
assert br.length == bytesPerDim;
System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
System.arraycopy(br.bytes, br.offset, splitPackedValues, address, bytesPerDim);
}
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes, pointCount, docCount);

View File

@ -424,45 +424,36 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
};
} else {
final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
if (entry.gcd != 1) {
values = applyGcd(values, entry.gcd);
}
if (entry.minValue != 0) {
values = applyDelta(values, entry.minValue);
}
final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
if (entry.table != null) {
values = applyTable(values, entry.table);
}
return values;
}
}
private LongValues applyDelta(LongValues values, long delta) {
return new LongValues() {
@Override
public long get(long index) {
return delta + values.get(index);
}
};
}
private LongValues applyGcd(LongValues values, long gcd) {
return new LongValues() {
@Override
public long get(long index) {
return values.get(index) * gcd;
}
};
}
private LongValues applyTable(LongValues values, long[] table) {
final long[] table = entry.table;
return new LongValues() {
@Override
public long get(long index) {
return table[(int) values.get(index)];
}
};
} else if (entry.gcd != 1) {
final long gcd = entry.gcd;
final long minValue = entry.minValue;
return new LongValues() {
@Override
public long get(long index) {
return values.get(index) * gcd + minValue;
}
};
} else if (entry.minValue != 0) {
final long minValue = entry.minValue;
return new LongValues() {
@Override
public long get(long index) {
return values.get(index) + minValue;
}
};
} else {
return values;
}
}
}
@Override

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
/** Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree previously written with {@link BKDWriter}.
@ -38,6 +39,7 @@ public class BKDReader extends PointValues implements Accountable {
final private int leafNodeOffset;
final int numDims;
final int bytesPerDim;
final int bytesPerIndexEntry;
final IndexInput in;
final int maxPointsInLeafNode;
final byte[] minPackedValue;
@ -53,6 +55,7 @@ public class BKDReader extends PointValues implements Accountable {
numDims = in.readVInt();
maxPointsInLeafNode = in.readVInt();
bytesPerDim = in.readVInt();
bytesPerIndexEntry = numDims == 1 && version >= BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D ? bytesPerDim : bytesPerDim + 1;
packedBytesLength = numDims * bytesPerDim;
// Read index:
@ -69,7 +72,7 @@ public class BKDReader extends PointValues implements Accountable {
pointCount = in.readVLong();
docCount = in.readVInt();
splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
splitPackedValues = new byte[bytesPerIndexEntry*numLeaves];
// TODO: don't write split packed values[0]!
in.readBytes(splitPackedValues, 0, splitPackedValues.length);
@ -134,6 +137,7 @@ public class BKDReader extends PointValues implements Accountable {
this.numDims = numDims;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.bytesPerDim = bytesPerDim;
bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1;
packedBytesLength = numDims * bytesPerDim;
this.leafNodeOffset = leafBlockFPs.length;
this.leafBlockFPs = leafBlockFPs;
@ -233,22 +237,22 @@ public class BKDReader extends PointValues implements Accountable {
} else {
// Non-leaf node:
int address = nodeID * (bytesPerDim+1);
int splitDim = splitPackedValues[address] & 0xff;
int address = nodeID * bytesPerIndexEntry;
int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
assert splitDim < numDims;
byte[] splitPackedValue = new byte[packedBytesLength];
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
verify(state,
2*nodeID,
cellMinPacked, splitPackedValue);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
verify(state,
2*nodeID+1,
splitPackedValue, cellMaxPacked);
@ -456,8 +460,8 @@ public class BKDReader extends PointValues implements Accountable {
// Non-leaf node: recurse on the split left and right nodes
// TODO: save the unused 1 byte prefix (it's always 0) in the 1d case here:
int address = nodeID * (bytesPerDim+1);
int splitDim = splitPackedValues[address] & 0xff;
int address = nodeID * bytesPerIndexEntry;
int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
assert splitDim < numDims;
// TODO: can we alloc & reuse this up front?
@ -467,14 +471,14 @@ public class BKDReader extends PointValues implements Accountable {
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
intersect(state,
2*nodeID,
cellMinPacked, splitPackedValue);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
intersect(state,
2*nodeID+1,
splitPackedValue, cellMaxPacked);
@ -483,16 +487,16 @@ public class BKDReader extends PointValues implements Accountable {
/** Copies the split value for this node into the provided byte array */
public void copySplitValue(int nodeID, byte[] splitPackedValue) {
int address = nodeID * (bytesPerDim+1);
int splitDim = splitPackedValues[address] & 0xff;
int address = nodeID * bytesPerIndexEntry;
int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
assert splitDim < numDims;
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
}
@Override
public long ramBytesUsed() {
return splitPackedValues.length +
leafBlockFPs.length * Long.BYTES;
return RamUsageEstimator.sizeOf(splitPackedValues) +
RamUsageEstimator.sizeOf(leafBlockFPs);
}
@Override

View File

@ -82,7 +82,8 @@ public class BKDWriter implements Closeable {
public static final int VERSION_START = 0;
public static final int VERSION_COMPRESSED_DOC_IDS = 1;
public static final int VERSION_COMPRESSED_VALUES = 2;
public static final int VERSION_CURRENT = VERSION_COMPRESSED_VALUES;
public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D;
/** How many bytes each docs takes in the fixed-width offline format */
private final int bytesPerDoc;
@ -1033,10 +1034,15 @@ public class BKDWriter implements Closeable {
out.writeVLong(pointCount);
out.writeVInt(docsSeen.cardinality());
// TODO: for 1D case, don't waste the first byte of each split value (it's always 0)
// NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
if (numDims == 1) {
// write the index, skipping the byte used to store the split dim since it is always 0
for (int i = 1; i < splitPackedValues.length; i += 1 + bytesPerDim) {
out.writeBytes(splitPackedValues, i, bytesPerDim);
}
} else {
out.writeBytes(splitPackedValues, 0, splitPackedValues.length);
}
long lastFP = 0;
for (int i=0;i<leafBlockFPs.length;i++) {

View File

@ -36,6 +36,7 @@ import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.FixedBitSet;
@ -251,6 +252,12 @@ public class FacetsCollector extends SimpleCollector implements Collector {
+ after.doc + " limit=" + limit);
}
TopDocs topDocs = null;
if (n==0) {
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
searcher.search(q, MultiCollector.wrap(totalHitCountCollector, fc));
topDocs = new TopDocs(totalHitCountCollector.getTotalHits(), new ScoreDoc[0], Float.NaN);
} else {
TopDocsCollector<?> hitsCollector;
if (sort != null) {
if (after != null && !(after instanceof FieldDoc)) {
@ -268,6 +275,9 @@ public class FacetsCollector extends SimpleCollector implements Collector {
hitsCollector = TopScoreDocCollector.create(n, after);
}
searcher.search(q, MultiCollector.wrap(hitsCollector, fc));
return hitsCollector.topDocs();
topDocs = hitsCollector.topDocs();
}
return topDocs;
}
}

View File

@ -182,6 +182,17 @@ public class TestDrillDownQuery extends FacetTestCase {
assertEquals(10, docs.totalHits);
}
public void testZeroLimit() throws IOException {
IndexSearcher searcher = newSearcher(reader);
DrillDownQuery q = new DrillDownQuery(config);
q.add("b", "1");
int limit = 0;
FacetsCollector facetCollector = new FacetsCollector();
FacetsCollector.search(searcher, q, limit, facetCollector);
Facets facets = getTaxonomyFacetCounts(taxo, config, facetCollector, config.getDimConfig("b").indexFieldName);
assertNotNull(facets.getTopChildren(10, "b"));
}
public void testScoring() throws IOException {
// verify that drill-down queries do not modify scores
IndexSearcher searcher = newSearcher(reader);