From e8360714fae8bac3c705297641a735ed33cf47f9 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 18 Oct 2016 09:33:36 +0200 Subject: [PATCH 1/3] LUCENE-7501: BKDReader should not store the split dimension explicitly in the 1D case. --- lucene/CHANGES.txt | 3 ++ .../simpletext/SimpleTextPointsReader.java | 17 ++++++++-- .../org/apache/lucene/util/bkd/BKDReader.java | 32 +++++++++++-------- .../org/apache/lucene/util/bkd/BKDWriter.java | 14 +++++--- 4 files changed, 45 insertions(+), 21 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f3501cc356c..745d8fd2fbe 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -96,6 +96,9 @@ Improvements Optimizations +* LUCENE-7501: BKDReader should not store the split dimension explicitly in the + 1D case. (Adrien Grand) + Other * LUCENE-7452: Block join query exception suggests how to find a doc, which diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java index e3b880afb4e..f7ff16ecbc2 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java @@ -139,15 +139,26 @@ class SimpleTextPointsReader extends PointsReader { readLine(dataIn); count = parseInt(SPLIT_COUNT); - byte[] splitPackedValues = new byte[count * (1 + bytesPerDim)]; + byte[] splitPackedValues; + int bytesPerIndexEntry; + if (numDims == 1) { + bytesPerIndexEntry = bytesPerDim; + } else { + bytesPerIndexEntry = 1 + bytesPerDim; + } + splitPackedValues = new byte[count * bytesPerIndexEntry]; for(int i=0;i= BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D ? bytesPerDim : bytesPerDim + 1; packedBytesLength = numDims * bytesPerDim; // Read index: @@ -69,7 +72,7 @@ public class BKDReader extends PointValues implements Accountable { pointCount = in.readVLong(); docCount = in.readVInt(); - splitPackedValues = new byte[(1+bytesPerDim)*numLeaves]; + splitPackedValues = new byte[bytesPerIndexEntry*numLeaves]; // TODO: don't write split packed values[0]! in.readBytes(splitPackedValues, 0, splitPackedValues.length); @@ -134,6 +137,7 @@ public class BKDReader extends PointValues implements Accountable { this.numDims = numDims; this.maxPointsInLeafNode = maxPointsInLeafNode; this.bytesPerDim = bytesPerDim; + bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1; packedBytesLength = numDims * bytesPerDim; this.leafNodeOffset = leafBlockFPs.length; this.leafBlockFPs = leafBlockFPs; @@ -233,22 +237,22 @@ public class BKDReader extends PointValues implements Accountable { } else { // Non-leaf node: - int address = nodeID * (bytesPerDim+1); - int splitDim = splitPackedValues[address] & 0xff; + int address = nodeID * bytesPerIndexEntry; + int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff; assert splitDim < numDims; byte[] splitPackedValue = new byte[packedBytesLength]; // Recurse on left sub-tree: System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength); - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); verify(state, 2*nodeID, cellMinPacked, splitPackedValue); // Recurse on right sub-tree: System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength); - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); verify(state, 2*nodeID+1, splitPackedValue, cellMaxPacked); @@ -456,8 +460,8 @@ public class BKDReader extends PointValues implements Accountable { // Non-leaf node: recurse on the split left and right nodes // TODO: save the unused 1 byte prefix (it's always 0) in the 1d case here: - int address = nodeID * (bytesPerDim+1); - int splitDim = splitPackedValues[address] & 0xff; + int address = nodeID * bytesPerIndexEntry; + int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff; assert splitDim < numDims; // TODO: can we alloc & reuse this up front? @@ -467,14 +471,14 @@ public class BKDReader extends PointValues implements Accountable { // Recurse on left sub-tree: System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength); - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); intersect(state, 2*nodeID, cellMinPacked, splitPackedValue); // Recurse on right sub-tree: System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength); - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); intersect(state, 2*nodeID+1, splitPackedValue, cellMaxPacked); @@ -483,16 +487,16 @@ public class BKDReader extends PointValues implements Accountable { /** Copies the split value for this node into the provided byte array */ public void copySplitValue(int nodeID, byte[] splitPackedValue) { - int address = nodeID * (bytesPerDim+1); - int splitDim = splitPackedValues[address] & 0xff; + int address = nodeID * bytesPerIndexEntry; + int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff; assert splitDim < numDims; - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); } @Override public long ramBytesUsed() { - return splitPackedValues.length + - leafBlockFPs.length * Long.BYTES; + return RamUsageEstimator.sizeOf(splitPackedValues) + + RamUsageEstimator.sizeOf(leafBlockFPs); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 6ee178b837f..5526624a961 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -82,7 +82,8 @@ public class BKDWriter implements Closeable { public static final int VERSION_START = 0; public static final int VERSION_COMPRESSED_DOC_IDS = 1; public static final int VERSION_COMPRESSED_VALUES = 2; - public static final int VERSION_CURRENT = VERSION_COMPRESSED_VALUES; + public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3; + public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D; /** How many bytes each docs takes in the fixed-width offline format */ private final int bytesPerDoc; @@ -1033,10 +1034,15 @@ public class BKDWriter implements Closeable { out.writeVLong(pointCount); out.writeVInt(docsSeen.cardinality()); - // TODO: for 1D case, don't waste the first byte of each split value (it's always 0) - // NOTE: splitPackedValues[0] is unused, because nodeID is 1-based: - out.writeBytes(splitPackedValues, 0, splitPackedValues.length); + if (numDims == 1) { + // write the index, skipping the byte used to store the split dim since it is always 0 + for (int i = 1; i < splitPackedValues.length; i += 1 + bytesPerDim) { + out.writeBytes(splitPackedValues, i, bytesPerDim); + } + } else { + out.writeBytes(splitPackedValues, 0, splitPackedValues.length); + } long lastFP = 0; for (int i=0;i Date: Tue, 18 Oct 2016 09:23:24 -0400 Subject: [PATCH 2/3] LUCENE-7493: FacetCollector.search now accepts limit=0, for getting facets but not search hits --- lucene/CHANGES.txt | 3 ++ .../apache/lucene/facet/FacetsCollector.java | 42 ++++++++++++------- .../lucene/facet/TestDrillDownQuery.java | 11 +++++ 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 745d8fd2fbe..6d83c534b32 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -89,6 +89,9 @@ Bug Fixes * LUCENE-6914: Fixed DecimalDigitFilter in case of supplementary code points. (Hossman) +* LUCENE-7493: FacetCollector.search threw an unexpected exception if + you asked for zero hits but wanted facets (Mahesh via Mike McCandless) + Improvements * LUCENE-7439: FuzzyQuery now matches all terms within the specified diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java index d3f2eb871b4..b942f7e5944 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java @@ -36,6 +36,7 @@ import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.FixedBitSet; @@ -251,23 +252,32 @@ public class FacetsCollector extends SimpleCollector implements Collector { + after.doc + " limit=" + limit); } - TopDocsCollector hitsCollector; - if (sort != null) { - if (after != null && !(after instanceof FieldDoc)) { - // TODO: if we fix type safety of TopFieldDocs we can - // remove this - throw new IllegalArgumentException("after must be a FieldDoc; got " + after); - } - boolean fillFields = true; - hitsCollector = TopFieldCollector.create(sort, n, - (FieldDoc) after, - fillFields, - doDocScores, - doMaxScore); + TopDocs topDocs = null; + if (n==0) { + TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); + searcher.search(q, MultiCollector.wrap(totalHitCountCollector, fc)); + topDocs = new TopDocs(totalHitCountCollector.getTotalHits(), new ScoreDoc[0], Float.NaN); } else { - hitsCollector = TopScoreDocCollector.create(n, after); + TopDocsCollector hitsCollector; + if (sort != null) { + if (after != null && !(after instanceof FieldDoc)) { + // TODO: if we fix type safety of TopFieldDocs we can + // remove this + throw new IllegalArgumentException("after must be a FieldDoc; got " + after); + } + boolean fillFields = true; + hitsCollector = TopFieldCollector.create(sort, n, + (FieldDoc) after, + fillFields, + doDocScores, + doMaxScore); + } else { + hitsCollector = TopScoreDocCollector.create(n, after); + } + searcher.search(q, MultiCollector.wrap(hitsCollector, fc)); + + topDocs = hitsCollector.topDocs(); } - searcher.search(q, MultiCollector.wrap(hitsCollector, fc)); - return hitsCollector.topDocs(); + return topDocs; } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillDownQuery.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillDownQuery.java index f76e839a04b..bf8d0f41574 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillDownQuery.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillDownQuery.java @@ -182,6 +182,17 @@ public class TestDrillDownQuery extends FacetTestCase { assertEquals(10, docs.totalHits); } + public void testZeroLimit() throws IOException { + IndexSearcher searcher = newSearcher(reader); + DrillDownQuery q = new DrillDownQuery(config); + q.add("b", "1"); + int limit = 0; + FacetsCollector facetCollector = new FacetsCollector(); + FacetsCollector.search(searcher, q, limit, facetCollector); + Facets facets = getTaxonomyFacetCounts(taxo, config, facetCollector, config.getDimConfig("b").indexFieldName); + assertNotNull(facets.getTopChildren(10, "b")); + } + public void testScoring() throws IOException { // verify that drill-down queries do not modify scores IndexSearcher searcher = newSearcher(reader); From a17e92006f087a0601d9329bf9b9c946ca72478b Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 18 Oct 2016 16:07:52 +0200 Subject: [PATCH 3/3] LUCENE-7489: Wrap only once in case GCD compression is used. --- .../lucene70/Lucene70DocValuesProducer.java | 63 ++++++++----------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java index 755da79ac86..637c8eef11f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java @@ -424,47 +424,38 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close }; } else { final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength); - LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue); - if (entry.gcd != 1) { - values = applyGcd(values, entry.gcd); - } - if (entry.minValue != 0) { - values = applyDelta(values, entry.minValue); - } + final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue); if (entry.table != null) { - values = applyTable(values, entry.table); + final long[] table = entry.table; + return new LongValues() { + @Override + public long get(long index) { + return table[(int) values.get(index)]; + } + }; + } else if (entry.gcd != 1) { + final long gcd = entry.gcd; + final long minValue = entry.minValue; + return new LongValues() { + @Override + public long get(long index) { + return values.get(index) * gcd + minValue; + } + }; + } else if (entry.minValue != 0) { + final long minValue = entry.minValue; + return new LongValues() { + @Override + public long get(long index) { + return values.get(index) + minValue; + } + }; + } else { + return values; } - return values; } } - private LongValues applyDelta(LongValues values, long delta) { - return new LongValues() { - @Override - public long get(long index) { - return delta + values.get(index); - } - }; - } - - private LongValues applyGcd(LongValues values, long gcd) { - return new LongValues() { - @Override - public long get(long index) { - return values.get(index) * gcd; - } - }; - } - - private LongValues applyTable(LongValues values, long[] table) { - return new LongValues() { - @Override - public long get(long index) { - return table[(int) values.get(index)]; - } - }; - } - @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { BinaryEntry entry = binaries.get(field.name);