From 7e257c79fcea532f4c8c93c4c9bc07534e06fa2c Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Wed, 10 Apr 2013 19:10:37 +0000 Subject: [PATCH] LUCENE-4885: FacetsAccumulator did not set the correct value for FacetResult.numValidDescendants git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466629 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 ++ .../search/DepthOneFacetResultsHandler.java | 15 ++++---- .../lucene/facet/search/FacetResult.java | 12 +++---- .../lucene/facet/search/FacetResultNode.java | 4 --- .../facet/search/FacetsAccumulator.java | 7 ++-- .../search/FloatFacetResultsHandler.java | 11 +++--- .../facet/search/IntFacetResultsHandler.java | 10 +++--- .../search/StandardFacetsAccumulator.java | 6 +--- .../facet/search/TopKFacetResultsHandler.java | 1 - .../facet/search/TopKInEachNodeHandler.java | 3 +- .../SortedSetDocValuesAccumulator.java | 33 +++++++++-------- .../facet/search/TestDrillSideways.java | 25 +++++++++++-- .../facet/search/TestFacetsCollector.java | 36 +++++++++++++++++++ 13 files changed, 108 insertions(+), 58 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0cd9d7dc81a..2542b17fb3f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -254,6 +254,9 @@ Bug Fixes * LUCENE-4880: Fix MemoryIndex to consume empty terms from the tokenstream consistent with IndexWriter. Previously it discarded them. (Timothy Allison via Robert Muir) +* LUCENE-4885: FacetsAccumulator did not set the correct value for + FacetResult.numValidDescendants. (Mike McCandless, Shai Erera) + Documentation * LUCENE-4841: Added example SimpleSortedSetFacetsExample to show how diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java b/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java index 1369f681a99..90bb3d6f299 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java @@ -46,7 +46,7 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler { @Override protected FacetResultNode getSentinelObject() { - return new FacetResultNode(); + return new FacetResultNode(TaxonomyReader.INVALID_ORDINAL, 0); } @Override @@ -80,7 +80,8 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler { * Add the siblings of {@code ordinal} to the given {@link PriorityQueue}. The * given {@link PriorityQueue} is already filled with sentinel objects, so * implementations are encouraged to use {@link PriorityQueue#top()} and - * {@link PriorityQueue#updateTop()} for best performance. + * {@link PriorityQueue#updateTop()} for best performance. Returns the total + * number of siblings. */ protected abstract int addSiblings(int ordinal, int[] siblings, PriorityQueue pq); @@ -92,10 +93,8 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler { int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath); - FacetResultNode root = new FacetResultNode(); - root.ordinal = rootOrd; + FacetResultNode root = new FacetResultNode(rootOrd, valueOf(rootOrd)); root.label = facetRequest.categoryPath; - root.value = valueOf(rootOrd); if (facetRequest.numResults > taxonomyReader.getSize()) { // specialize this case, user is interested in all available results ArrayList nodes = new ArrayList(); @@ -118,11 +117,11 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler { // since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big PriorityQueue pq = new FacetResultNodeQueue(facetRequest.numResults, true); - int numResults = addSiblings(children[rootOrd], siblings, pq); + int numSiblings = addSiblings(children[rootOrd], siblings, pq); // pop() the least (sentinel) elements int pqsize = pq.size(); - int size = numResults < pqsize ? numResults : pqsize; + int size = numSiblings < pqsize ? numSiblings : pqsize; for (int i = pqsize - size; i > 0; i--) { pq.pop(); } // create the FacetResultNodes. @@ -133,7 +132,7 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler { subResults[i] = node; } root.subResults = Arrays.asList(subResults); - return new FacetResult(facetRequest, root, size); + return new FacetResult(facetRequest, root, numSiblings); } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java index 0d0522f1b8f..9a010fac4ff 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java @@ -40,19 +40,15 @@ public class FacetResult { * @see FacetRequest#categoryPath */ public final FacetResultNode getFacetResultNode() { - return this.rootNode; + return rootNode; } /** * Number of descendants of {@link #getFacetResultNode() root facet result - * node}, up till the requested depth. Typically -- have value != 0. This - * number does not include the root node. - * - * @see #getFacetRequest() - * @see FacetRequest#getDepth() + * node}, up till the requested depth. */ public final int getNumValidDescendants() { - return this.numValidDescendants; + return numValidDescendants; } /** @@ -61,7 +57,7 @@ public class FacetResult { public final FacetRequest getFacetRequest() { return this.facetRequest; } - + /** * String representation of this facet result. * Use with caution: might return a very long string. diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java index 2a7733c81f4..c6110b3b140 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java @@ -67,10 +67,6 @@ public class FacetResultNode { */ public List subResults = EMPTY_SUB_RESULTS; - public FacetResultNode() { - // empty constructor - } - public FacetResultNode(int ordinal, double value) { this.ordinal = ordinal; this.value = value; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java index 5aff919dfc1..9f51b23efe6 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java @@ -81,11 +81,10 @@ public class FacetsAccumulator { return new FacetsAccumulator(fsp, indexReader, taxoReader); } - private static FacetResult emptyResult(int ordinal, FacetRequest fr) { - FacetResultNode root = new FacetResultNode(); - root.ordinal = ordinal; + /** Returns an empty {@link FacetResult}. */ + protected static FacetResult emptyResult(int ordinal, FacetRequest fr) { + FacetResultNode root = new FacetResultNode(ordinal, 0); root.label = fr.categoryPath; - root.value = 0; return new FacetResult(fr, root, 0); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java index d8587e7482c..004bedfecc0 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java @@ -43,18 +43,19 @@ public final class FloatFacetResultsHandler extends DepthOneFacetResultsHandler return values[ordinal]; } - @Override protected final int addSiblings(int ordinal, int[] siblings, PriorityQueue pq) { FacetResultNode top = pq.top(); int numResults = 0; while (ordinal != TaxonomyReader.INVALID_ORDINAL) { float value = values[ordinal]; - if (value > top.value) { - top.value = value; - top.ordinal = ordinal; - top = pq.updateTop(); + if (value > 0.0f) { ++numResults; + if (value > top.value) { + top.value = value; + top.ordinal = ordinal; + top = pq.updateTop(); + } } ordinal = siblings[ordinal]; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java b/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java index bc3e4488378..51cda6a5328 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java @@ -49,11 +49,13 @@ public final class IntFacetResultsHandler extends DepthOneFacetResultsHandler { int numResults = 0; while (ordinal != TaxonomyReader.INVALID_ORDINAL) { int value = values[ordinal]; - if (value > top.value) { - top.value = value; - top.ordinal = ordinal; - top = pq.updateTop(); + if (value > 0) { ++numResults; + if (value > top.value) { + top.value = value; + top.ordinal = ordinal; + top = pq.updateTop(); + } } ordinal = siblings[ordinal]; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java index 03495403e15..25769db990b 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java @@ -198,11 +198,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator { IntermediateFacetResult tmpResult = fr2tmpRes.get(fr); if (tmpResult == null) { // Add empty FacetResult: - FacetResultNode root = new FacetResultNode(); - root.ordinal = TaxonomyReader.INVALID_ORDINAL; - root.label = fr.categoryPath; - root.value = 0; - res.add(new FacetResult(fr, root, 0)); + res.add(emptyResult(taxonomyReader.getOrdinal(fr.categoryPath), fr)); continue; } FacetResult facetRes = frHndlr.renderFacetResult(tmpResult); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java b/lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java index 9c173242999..7b2816bc469 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java @@ -256,7 +256,6 @@ public class TopKFacetResultsHandler extends PartitionsFacetResultsHandler { * Create a Facet Result. * @param facetRequest Request for which this result was obtained. * @param facetResultNode top result node for this facet result. - * @param totalFacets - number of children of the targetFacet, up till the requested depth. */ TopKFacetResult(FacetRequest facetRequest, FacetResultNode facetResultNode, int totalFacets) { super(facetRequest, facetResultNode, totalFacets); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java b/lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java index 9a226320ec2..936edb9a544 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java @@ -706,8 +706,7 @@ public class TopKInEachNodeHandler extends PartitionsFacetResultsHandler { value = tmp.rootNodeValue; } FacetResultNode root = generateNode(ordinal, value, tmp.mapToAACOs); - return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered); - + return new FacetResult(tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered); } private FacetResultNode generateNode(int ordinal, double val, IntToObjectMap mapToAACOs) { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesAccumulator.java index 9e7b8ca9b5d..99c65e529ff 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesAccumulator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesAccumulator.java @@ -107,7 +107,6 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { if (matchingDocs.totalHits < numSegOrds/10) { // Remap every ord to global ord as we iterate: - final int[] segCounts = new int[numSegOrds]; int doc = 0; while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { segValues.setDocument(doc); @@ -259,22 +258,26 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { //System.out.println("collect"); int dimCount = 0; + int childCount = 0; FacetResultNode reuse = null; for(int ord=ordRange.start; ord<=ordRange.end; ord++) { //System.out.println(" ord=" + ord + " count= "+ counts[ord] + " bottomCount=" + bottomCount); - if (counts[ord] > bottomCount) { - dimCount += counts[ord]; - //System.out.println(" keep"); - if (reuse == null) { - reuse = new FacetResultNode(ord, counts[ord]); - } else { - reuse.ordinal = ord; - reuse.value = counts[ord]; - } - reuse = q.insertWithOverflow(reuse); - if (q.size() == request.numResults) { - bottomCount = (int) q.top().value; - //System.out.println(" new bottom=" + bottomCount); + if (counts[ord] > 0) { + childCount++; + if (counts[ord] > bottomCount) { + dimCount += counts[ord]; + //System.out.println(" keep"); + if (reuse == null) { + reuse = new FacetResultNode(ord, counts[ord]); + } else { + reuse.ordinal = ord; + reuse.value = counts[ord]; + } + reuse = q.insertWithOverflow(reuse); + if (q.size() == request.numResults) { + bottomCount = (int) q.top().value; + //System.out.println(" new bottom=" + bottomCount); + } } } } @@ -295,7 +298,7 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { } rootNode.subResults = Arrays.asList(childNodes); - results.add(new FacetResult(request, rootNode, childNodes.length)); + results.add(new FacetResult(request, rootNode, childCount)); } return results; diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java index ac6b82c6d13..a8d612e7f7b 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java @@ -150,10 +150,13 @@ public class TestDrillSideways extends FacetTestCase { // Publish Date is only drill-down, and Lisa published // one in 2012 and one in 2010: assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0))); + assertEquals(2, r.facetResults.get(0).getNumValidDescendants()); + // Author is drill-sideways + drill-down: Lisa // (drill-down) published twice, and Frank/Susan/Bob // published once: assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1))); + assertEquals(4, r.facetResults.get(1).getNumValidDescendants()); // Another simple case: drill-down on on single fields // but OR of two values @@ -766,6 +769,7 @@ public class TestDrillSideways extends FacetTestCase { ds = new DrillSideways(s, tr); } + // Retrieve all facets: DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp); TopDocs hits = s.search(baseQuery, numDocs); @@ -773,9 +777,12 @@ public class TestDrillSideways extends FacetTestCase { for(ScoreDoc sd : hits.scoreDocs) { scores.put(s.doc(sd.doc).get("id"), sd.score); } + if (VERBOSE) { + System.out.println(" verify all facets"); + } verifyEquals(requests, dimValues, s, expected, actual, scores, -1, doUseDV); - // Make sure topN works: + // Retrieve topN facets: int topN = _TestUtil.nextInt(random(), 1, 20); List newRequests = new ArrayList(); @@ -784,6 +791,9 @@ public class TestDrillSideways extends FacetTestCase { } fsp = new FacetSearchParams(newRequests); actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp); + if (VERBOSE) { + System.out.println(" verify topN=" + topN); + } verifyEquals(newRequests, dimValues, s, expected, actual, scores, topN, doUseDV); // Make sure drill down doesn't change score: @@ -834,6 +844,7 @@ public class TestDrillSideways extends FacetTestCase { private static class SimpleFacetResult { List hits; int[][] counts; + int[] uniqueCounts; } private int[] getTopNOrds(final int[] counts, final String[] values, int topN) { @@ -985,13 +996,21 @@ public class TestDrillSideways extends FacetTestCase { SimpleFacetResult res = new SimpleFacetResult(); res.hits = hits; res.counts = new int[numDims][]; - for(int i=0;i