LUCENE-4885: FacetsAccumulator did not set the correct value for FacetResult.numValidDescendants

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466629 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-04-10 19:10:37 +00:00
parent 8e6108528b
commit 7e257c79fc
13 changed files with 108 additions and 58 deletions

View File

@ -254,6 +254,9 @@ Bug Fixes
* LUCENE-4880: Fix MemoryIndex to consume empty terms from the tokenstream consistent * LUCENE-4880: Fix MemoryIndex to consume empty terms from the tokenstream consistent
with IndexWriter. Previously it discarded them. (Timothy Allison via Robert Muir) with IndexWriter. Previously it discarded them. (Timothy Allison via Robert Muir)
* LUCENE-4885: FacetsAccumulator did not set the correct value for
FacetResult.numValidDescendants. (Mike McCandless, Shai Erera)
Documentation Documentation
* LUCENE-4841: Added example SimpleSortedSetFacetsExample to show how * LUCENE-4841: Added example SimpleSortedSetFacetsExample to show how

View File

@ -46,7 +46,7 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
@Override @Override
protected FacetResultNode getSentinelObject() { protected FacetResultNode getSentinelObject() {
return new FacetResultNode(); return new FacetResultNode(TaxonomyReader.INVALID_ORDINAL, 0);
} }
@Override @Override
@ -80,7 +80,8 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
* Add the siblings of {@code ordinal} to the given {@link PriorityQueue}. The * Add the siblings of {@code ordinal} to the given {@link PriorityQueue}. The
* given {@link PriorityQueue} is already filled with sentinel objects, so * given {@link PriorityQueue} is already filled with sentinel objects, so
* implementations are encouraged to use {@link PriorityQueue#top()} and * implementations are encouraged to use {@link PriorityQueue#top()} and
* {@link PriorityQueue#updateTop()} for best performance. * {@link PriorityQueue#updateTop()} for best performance. Returns the total
* number of siblings.
*/ */
protected abstract int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq); protected abstract int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq);
@ -92,10 +93,8 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath); int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath);
FacetResultNode root = new FacetResultNode(); FacetResultNode root = new FacetResultNode(rootOrd, valueOf(rootOrd));
root.ordinal = rootOrd;
root.label = facetRequest.categoryPath; root.label = facetRequest.categoryPath;
root.value = valueOf(rootOrd);
if (facetRequest.numResults > taxonomyReader.getSize()) { if (facetRequest.numResults > taxonomyReader.getSize()) {
// specialize this case, user is interested in all available results // specialize this case, user is interested in all available results
ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>(); ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
@ -118,11 +117,11 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
// since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big // since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big
PriorityQueue<FacetResultNode> pq = new FacetResultNodeQueue(facetRequest.numResults, true); PriorityQueue<FacetResultNode> pq = new FacetResultNodeQueue(facetRequest.numResults, true);
int numResults = addSiblings(children[rootOrd], siblings, pq); int numSiblings = addSiblings(children[rootOrd], siblings, pq);
// pop() the least (sentinel) elements // pop() the least (sentinel) elements
int pqsize = pq.size(); int pqsize = pq.size();
int size = numResults < pqsize ? numResults : pqsize; int size = numSiblings < pqsize ? numSiblings : pqsize;
for (int i = pqsize - size; i > 0; i--) { pq.pop(); } for (int i = pqsize - size; i > 0; i--) { pq.pop(); }
// create the FacetResultNodes. // create the FacetResultNodes.
@ -133,7 +132,7 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
subResults[i] = node; subResults[i] = node;
} }
root.subResults = Arrays.asList(subResults); root.subResults = Arrays.asList(subResults);
return new FacetResult(facetRequest, root, size); return new FacetResult(facetRequest, root, numSiblings);
} }
} }

View File

@ -40,19 +40,15 @@ public class FacetResult {
* @see FacetRequest#categoryPath * @see FacetRequest#categoryPath
*/ */
public final FacetResultNode getFacetResultNode() { public final FacetResultNode getFacetResultNode() {
return this.rootNode; return rootNode;
} }
/** /**
* Number of descendants of {@link #getFacetResultNode() root facet result * Number of descendants of {@link #getFacetResultNode() root facet result
* node}, up till the requested depth. Typically -- have value != 0. This * node}, up till the requested depth.
* number does not include the root node.
*
* @see #getFacetRequest()
* @see FacetRequest#getDepth()
*/ */
public final int getNumValidDescendants() { public final int getNumValidDescendants() {
return this.numValidDescendants; return numValidDescendants;
} }
/** /**

View File

@ -67,10 +67,6 @@ public class FacetResultNode {
*/ */
public List<FacetResultNode> subResults = EMPTY_SUB_RESULTS; public List<FacetResultNode> subResults = EMPTY_SUB_RESULTS;
public FacetResultNode() {
// empty constructor
}
public FacetResultNode(int ordinal, double value) { public FacetResultNode(int ordinal, double value) {
this.ordinal = ordinal; this.ordinal = ordinal;
this.value = value; this.value = value;

View File

@ -81,11 +81,10 @@ public class FacetsAccumulator {
return new FacetsAccumulator(fsp, indexReader, taxoReader); return new FacetsAccumulator(fsp, indexReader, taxoReader);
} }
private static FacetResult emptyResult(int ordinal, FacetRequest fr) { /** Returns an empty {@link FacetResult}. */
FacetResultNode root = new FacetResultNode(); protected static FacetResult emptyResult(int ordinal, FacetRequest fr) {
root.ordinal = ordinal; FacetResultNode root = new FacetResultNode(ordinal, 0);
root.label = fr.categoryPath; root.label = fr.categoryPath;
root.value = 0;
return new FacetResult(fr, root, 0); return new FacetResult(fr, root, 0);
} }

View File

@ -43,18 +43,19 @@ public final class FloatFacetResultsHandler extends DepthOneFacetResultsHandler
return values[ordinal]; return values[ordinal];
} }
@Override @Override
protected final int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq) { protected final int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq) {
FacetResultNode top = pq.top(); FacetResultNode top = pq.top();
int numResults = 0; int numResults = 0;
while (ordinal != TaxonomyReader.INVALID_ORDINAL) { while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
float value = values[ordinal]; float value = values[ordinal];
if (value > 0.0f) {
++numResults;
if (value > top.value) { if (value > top.value) {
top.value = value; top.value = value;
top.ordinal = ordinal; top.ordinal = ordinal;
top = pq.updateTop(); top = pq.updateTop();
++numResults; }
} }
ordinal = siblings[ordinal]; ordinal = siblings[ordinal];
} }

View File

@ -49,11 +49,13 @@ public final class IntFacetResultsHandler extends DepthOneFacetResultsHandler {
int numResults = 0; int numResults = 0;
while (ordinal != TaxonomyReader.INVALID_ORDINAL) { while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
int value = values[ordinal]; int value = values[ordinal];
if (value > 0) {
++numResults;
if (value > top.value) { if (value > top.value) {
top.value = value; top.value = value;
top.ordinal = ordinal; top.ordinal = ordinal;
top = pq.updateTop(); top = pq.updateTop();
++numResults; }
} }
ordinal = siblings[ordinal]; ordinal = siblings[ordinal];
} }

View File

@ -198,11 +198,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr); IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
if (tmpResult == null) { if (tmpResult == null) {
// Add empty FacetResult: // Add empty FacetResult:
FacetResultNode root = new FacetResultNode(); res.add(emptyResult(taxonomyReader.getOrdinal(fr.categoryPath), fr));
root.ordinal = TaxonomyReader.INVALID_ORDINAL;
root.label = fr.categoryPath;
root.value = 0;
res.add(new FacetResult(fr, root, 0));
continue; continue;
} }
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult); FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);

View File

@ -256,7 +256,6 @@ public class TopKFacetResultsHandler extends PartitionsFacetResultsHandler {
* Create a Facet Result. * Create a Facet Result.
* @param facetRequest Request for which this result was obtained. * @param facetRequest Request for which this result was obtained.
* @param facetResultNode top result node for this facet result. * @param facetResultNode top result node for this facet result.
* @param totalFacets - number of children of the targetFacet, up till the requested depth.
*/ */
TopKFacetResult(FacetRequest facetRequest, FacetResultNode facetResultNode, int totalFacets) { TopKFacetResult(FacetRequest facetRequest, FacetResultNode facetResultNode, int totalFacets) {
super(facetRequest, facetResultNode, totalFacets); super(facetRequest, facetResultNode, totalFacets);

View File

@ -706,8 +706,7 @@ public class TopKInEachNodeHandler extends PartitionsFacetResultsHandler {
value = tmp.rootNodeValue; value = tmp.rootNodeValue;
} }
FacetResultNode root = generateNode(ordinal, value, tmp.mapToAACOs); FacetResultNode root = generateNode(ordinal, value, tmp.mapToAACOs);
return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered); return new FacetResult(tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered);
} }
private FacetResultNode generateNode(int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) { private FacetResultNode generateNode(int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) {

View File

@ -107,7 +107,6 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator {
if (matchingDocs.totalHits < numSegOrds/10) { if (matchingDocs.totalHits < numSegOrds/10) {
// Remap every ord to global ord as we iterate: // Remap every ord to global ord as we iterate:
final int[] segCounts = new int[numSegOrds];
int doc = 0; int doc = 0;
while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
segValues.setDocument(doc); segValues.setDocument(doc);
@ -259,9 +258,12 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator {
//System.out.println("collect"); //System.out.println("collect");
int dimCount = 0; int dimCount = 0;
int childCount = 0;
FacetResultNode reuse = null; FacetResultNode reuse = null;
for(int ord=ordRange.start; ord<=ordRange.end; ord++) { for(int ord=ordRange.start; ord<=ordRange.end; ord++) {
//System.out.println(" ord=" + ord + " count= "+ counts[ord] + " bottomCount=" + bottomCount); //System.out.println(" ord=" + ord + " count= "+ counts[ord] + " bottomCount=" + bottomCount);
if (counts[ord] > 0) {
childCount++;
if (counts[ord] > bottomCount) { if (counts[ord] > bottomCount) {
dimCount += counts[ord]; dimCount += counts[ord];
//System.out.println(" keep"); //System.out.println(" keep");
@ -278,6 +280,7 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator {
} }
} }
} }
}
CategoryListParams.OrdinalPolicy op = searchParams.indexingParams.getCategoryListParams(request.categoryPath).getOrdinalPolicy(dim); CategoryListParams.OrdinalPolicy op = searchParams.indexingParams.getCategoryListParams(request.categoryPath).getOrdinalPolicy(dim);
if (op == CategoryListParams.OrdinalPolicy.ALL_BUT_DIMENSION) { if (op == CategoryListParams.OrdinalPolicy.ALL_BUT_DIMENSION) {
@ -295,7 +298,7 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator {
} }
rootNode.subResults = Arrays.asList(childNodes); rootNode.subResults = Arrays.asList(childNodes);
results.add(new FacetResult(request, rootNode, childNodes.length)); results.add(new FacetResult(request, rootNode, childCount));
} }
return results; return results;

View File

@ -150,10 +150,13 @@ public class TestDrillSideways extends FacetTestCase {
// Publish Date is only drill-down, and Lisa published // Publish Date is only drill-down, and Lisa published
// one in 2012 and one in 2010: // one in 2012 and one in 2010:
assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0))); assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
assertEquals(2, r.facetResults.get(0).getNumValidDescendants());
// Author is drill-sideways + drill-down: Lisa // Author is drill-sideways + drill-down: Lisa
// (drill-down) published twice, and Frank/Susan/Bob // (drill-down) published twice, and Frank/Susan/Bob
// published once: // published once:
assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1))); assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));
assertEquals(4, r.facetResults.get(1).getNumValidDescendants());
// Another simple case: drill-down on on single fields // Another simple case: drill-down on on single fields
// but OR of two values // but OR of two values
@ -766,6 +769,7 @@ public class TestDrillSideways extends FacetTestCase {
ds = new DrillSideways(s, tr); ds = new DrillSideways(s, tr);
} }
// Retrieve all facets:
DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp); DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp);
TopDocs hits = s.search(baseQuery, numDocs); TopDocs hits = s.search(baseQuery, numDocs);
@ -773,9 +777,12 @@ public class TestDrillSideways extends FacetTestCase {
for(ScoreDoc sd : hits.scoreDocs) { for(ScoreDoc sd : hits.scoreDocs) {
scores.put(s.doc(sd.doc).get("id"), sd.score); scores.put(s.doc(sd.doc).get("id"), sd.score);
} }
if (VERBOSE) {
System.out.println(" verify all facets");
}
verifyEquals(requests, dimValues, s, expected, actual, scores, -1, doUseDV); verifyEquals(requests, dimValues, s, expected, actual, scores, -1, doUseDV);
// Make sure topN works: // Retrieve topN facets:
int topN = _TestUtil.nextInt(random(), 1, 20); int topN = _TestUtil.nextInt(random(), 1, 20);
List<FacetRequest> newRequests = new ArrayList<FacetRequest>(); List<FacetRequest> newRequests = new ArrayList<FacetRequest>();
@ -784,6 +791,9 @@ public class TestDrillSideways extends FacetTestCase {
} }
fsp = new FacetSearchParams(newRequests); fsp = new FacetSearchParams(newRequests);
actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp); actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp);
if (VERBOSE) {
System.out.println(" verify topN=" + topN);
}
verifyEquals(newRequests, dimValues, s, expected, actual, scores, topN, doUseDV); verifyEquals(newRequests, dimValues, s, expected, actual, scores, topN, doUseDV);
// Make sure drill down doesn't change score: // Make sure drill down doesn't change score:
@ -834,6 +844,7 @@ public class TestDrillSideways extends FacetTestCase {
private static class SimpleFacetResult { private static class SimpleFacetResult {
List<Doc> hits; List<Doc> hits;
int[][] counts; int[][] counts;
int[] uniqueCounts;
} }
private int[] getTopNOrds(final int[] counts, final String[] values, int topN) { private int[] getTopNOrds(final int[] counts, final String[] values, int topN) {
@ -985,13 +996,21 @@ public class TestDrillSideways extends FacetTestCase {
SimpleFacetResult res = new SimpleFacetResult(); SimpleFacetResult res = new SimpleFacetResult();
res.hits = hits; res.hits = hits;
res.counts = new int[numDims][]; res.counts = new int[numDims][];
for(int i=0;i<requests.size();i++) { res.uniqueCounts = new int[numDims];
for (int i = 0; i < requests.size(); i++) {
int dim = Integer.parseInt(requests.get(i).categoryPath.components[0].substring(3)); int dim = Integer.parseInt(requests.get(i).categoryPath.components[0].substring(3));
if (drillDowns[dim] != null) { if (drillDowns[dim] != null) {
res.counts[dim] = drillSidewaysCounts[dim].counts[dim]; res.counts[dim] = drillSidewaysCounts[dim].counts[dim];
} else { } else {
res.counts[dim] = drillDownCounts.counts[dim]; res.counts[dim] = drillDownCounts.counts[dim];
} }
int uniqueCount = 0;
for (int j = 0; j < res.counts[dim].length; j++) {
if (res.counts[dim][j] != 0) {
uniqueCount++;
}
}
res.uniqueCounts[dim] = uniqueCount;
} }
return res; return res;
@ -1107,6 +1126,8 @@ public class TestDrillSideways extends FacetTestCase {
} }
assertEquals(setCount, actualValues.size()); assertEquals(setCount, actualValues.size());
} }
assertEquals("dim=" + dim, expected.uniqueCounts[dim], fr.getNumValidDescendants());
} }
} }

View File

@ -17,6 +17,7 @@ import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.params.PerDimensionIndexingParams; import org.apache.lucene.facet.params.PerDimensionIndexingParams;
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
@ -349,4 +350,39 @@ public class TestFacetsCollector extends FacetTestCase {
IOUtils.close(taxo, taxoDir, r, indexDir); IOUtils.close(taxo, taxoDir, r, indexDir);
} }
@Test
public void testNumValidDescendants() throws Exception {
// LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetFields facetFields = new FacetFields(taxonomyWriter);
for (int i = 0; i < 10; i++) {
Document doc = new Document();
facetFields.addFields(doc, Arrays.asList(new CategoryPath("a", Integer.toString(i))));
iw.addDocument(doc);
}
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
CountFacetRequest cfr = new CountFacetRequest(new CategoryPath("a"), 2);
cfr.setResultMode(random().nextBoolean() ? ResultMode.GLOBAL_FLAT : ResultMode.PER_NODE_IN_TREE);
FacetSearchParams fsp = new FacetSearchParams(cfr);
final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo);
FacetsCollector fc = FacetsCollector.create(fa);
new IndexSearcher(r).search(new MatchAllDocsQuery(), fc);
FacetResult res = fc.getFacetResults().get(0);
assertEquals(10, res.getNumValidDescendants());
IOUtils.close(taxo, taxoDir, r, indexDir);
}
} }