LUCENE-4885: FacetsAccumulator did not set the correct value for FacetResult.numValidDescendants

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466629 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-04-10 19:10:37 +00:00
parent 8e6108528b
commit 7e257c79fc
13 changed files with 108 additions and 58 deletions

View File

@ -254,6 +254,9 @@ Bug Fixes
* LUCENE-4880: Fix MemoryIndex to consume empty terms from the tokenstream consistent
with IndexWriter. Previously it discarded them. (Timothy Allison via Robert Muir)
* LUCENE-4885: FacetsAccumulator did not set the correct value for
FacetResult.numValidDescendants. (Mike McCandless, Shai Erera)
Documentation
* LUCENE-4841: Added example SimpleSortedSetFacetsExample to show how

View File

@ -46,7 +46,7 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
@Override
protected FacetResultNode getSentinelObject() {
return new FacetResultNode();
return new FacetResultNode(TaxonomyReader.INVALID_ORDINAL, 0);
}
@Override
@ -80,7 +80,8 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
* Add the siblings of {@code ordinal} to the given {@link PriorityQueue}. The
* given {@link PriorityQueue} is already filled with sentinel objects, so
* implementations are encouraged to use {@link PriorityQueue#top()} and
* {@link PriorityQueue#updateTop()} for best performance.
* {@link PriorityQueue#updateTop()} for best performance. Returns the total
* number of siblings.
*/
protected abstract int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq);
@ -92,10 +93,8 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath);
FacetResultNode root = new FacetResultNode();
root.ordinal = rootOrd;
FacetResultNode root = new FacetResultNode(rootOrd, valueOf(rootOrd));
root.label = facetRequest.categoryPath;
root.value = valueOf(rootOrd);
if (facetRequest.numResults > taxonomyReader.getSize()) {
// specialize this case, user is interested in all available results
ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
@ -118,11 +117,11 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
// since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big
PriorityQueue<FacetResultNode> pq = new FacetResultNodeQueue(facetRequest.numResults, true);
int numResults = addSiblings(children[rootOrd], siblings, pq);
int numSiblings = addSiblings(children[rootOrd], siblings, pq);
// pop() the least (sentinel) elements
int pqsize = pq.size();
int size = numResults < pqsize ? numResults : pqsize;
int size = numSiblings < pqsize ? numSiblings : pqsize;
for (int i = pqsize - size; i > 0; i--) { pq.pop(); }
// create the FacetResultNodes.
@ -133,7 +132,7 @@ public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
subResults[i] = node;
}
root.subResults = Arrays.asList(subResults);
return new FacetResult(facetRequest, root, size);
return new FacetResult(facetRequest, root, numSiblings);
}
}

View File

@ -40,19 +40,15 @@ public class FacetResult {
* @see FacetRequest#categoryPath
*/
public final FacetResultNode getFacetResultNode() {
return this.rootNode;
return rootNode;
}
/**
* Number of descendants of {@link #getFacetResultNode() root facet result
* node}, up till the requested depth. Typically -- have value != 0. This
* number does not include the root node.
*
* @see #getFacetRequest()
* @see FacetRequest#getDepth()
* node}, up till the requested depth.
*/
public final int getNumValidDescendants() {
return this.numValidDescendants;
return numValidDescendants;
}
/**
@ -61,7 +57,7 @@ public class FacetResult {
public final FacetRequest getFacetRequest() {
return this.facetRequest;
}
/**
* String representation of this facet result.
* Use with caution: might return a very long string.

View File

@ -67,10 +67,6 @@ public class FacetResultNode {
*/
public List<FacetResultNode> subResults = EMPTY_SUB_RESULTS;
public FacetResultNode() {
// empty constructor
}
public FacetResultNode(int ordinal, double value) {
this.ordinal = ordinal;
this.value = value;

View File

@ -81,11 +81,10 @@ public class FacetsAccumulator {
return new FacetsAccumulator(fsp, indexReader, taxoReader);
}
private static FacetResult emptyResult(int ordinal, FacetRequest fr) {
FacetResultNode root = new FacetResultNode();
root.ordinal = ordinal;
/** Returns an empty {@link FacetResult}. */
protected static FacetResult emptyResult(int ordinal, FacetRequest fr) {
FacetResultNode root = new FacetResultNode(ordinal, 0);
root.label = fr.categoryPath;
root.value = 0;
return new FacetResult(fr, root, 0);
}

View File

@ -43,18 +43,19 @@ public final class FloatFacetResultsHandler extends DepthOneFacetResultsHandler
return values[ordinal];
}
@Override
protected final int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq) {
FacetResultNode top = pq.top();
int numResults = 0;
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
float value = values[ordinal];
if (value > top.value) {
top.value = value;
top.ordinal = ordinal;
top = pq.updateTop();
if (value > 0.0f) {
++numResults;
if (value > top.value) {
top.value = value;
top.ordinal = ordinal;
top = pq.updateTop();
}
}
ordinal = siblings[ordinal];
}

View File

@ -49,11 +49,13 @@ public final class IntFacetResultsHandler extends DepthOneFacetResultsHandler {
int numResults = 0;
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
int value = values[ordinal];
if (value > top.value) {
top.value = value;
top.ordinal = ordinal;
top = pq.updateTop();
if (value > 0) {
++numResults;
if (value > top.value) {
top.value = value;
top.ordinal = ordinal;
top = pq.updateTop();
}
}
ordinal = siblings[ordinal];
}

View File

@ -198,11 +198,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
if (tmpResult == null) {
// Add empty FacetResult:
FacetResultNode root = new FacetResultNode();
root.ordinal = TaxonomyReader.INVALID_ORDINAL;
root.label = fr.categoryPath;
root.value = 0;
res.add(new FacetResult(fr, root, 0));
res.add(emptyResult(taxonomyReader.getOrdinal(fr.categoryPath), fr));
continue;
}
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);

View File

@ -256,7 +256,6 @@ public class TopKFacetResultsHandler extends PartitionsFacetResultsHandler {
* Create a Facet Result.
* @param facetRequest Request for which this result was obtained.
* @param facetResultNode top result node for this facet result.
* @param totalFacets - number of children of the targetFacet, up till the requested depth.
*/
TopKFacetResult(FacetRequest facetRequest, FacetResultNode facetResultNode, int totalFacets) {
super(facetRequest, facetResultNode, totalFacets);

View File

@ -706,8 +706,7 @@ public class TopKInEachNodeHandler extends PartitionsFacetResultsHandler {
value = tmp.rootNodeValue;
}
FacetResultNode root = generateNode(ordinal, value, tmp.mapToAACOs);
return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered);
return new FacetResult(tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered);
}
private FacetResultNode generateNode(int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) {

View File

@ -107,7 +107,6 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator {
if (matchingDocs.totalHits < numSegOrds/10) {
// Remap every ord to global ord as we iterate:
final int[] segCounts = new int[numSegOrds];
int doc = 0;
while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
segValues.setDocument(doc);
@ -259,22 +258,26 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator {
//System.out.println("collect");
int dimCount = 0;
int childCount = 0;
FacetResultNode reuse = null;
for(int ord=ordRange.start; ord<=ordRange.end; ord++) {
//System.out.println(" ord=" + ord + " count= "+ counts[ord] + " bottomCount=" + bottomCount);
if (counts[ord] > bottomCount) {
dimCount += counts[ord];
//System.out.println(" keep");
if (reuse == null) {
reuse = new FacetResultNode(ord, counts[ord]);
} else {
reuse.ordinal = ord;
reuse.value = counts[ord];
}
reuse = q.insertWithOverflow(reuse);
if (q.size() == request.numResults) {
bottomCount = (int) q.top().value;
//System.out.println(" new bottom=" + bottomCount);
if (counts[ord] > 0) {
childCount++;
if (counts[ord] > bottomCount) {
dimCount += counts[ord];
//System.out.println(" keep");
if (reuse == null) {
reuse = new FacetResultNode(ord, counts[ord]);
} else {
reuse.ordinal = ord;
reuse.value = counts[ord];
}
reuse = q.insertWithOverflow(reuse);
if (q.size() == request.numResults) {
bottomCount = (int) q.top().value;
//System.out.println(" new bottom=" + bottomCount);
}
}
}
}
@ -295,7 +298,7 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator {
}
rootNode.subResults = Arrays.asList(childNodes);
results.add(new FacetResult(request, rootNode, childNodes.length));
results.add(new FacetResult(request, rootNode, childCount));
}
return results;

View File

@ -150,10 +150,13 @@ public class TestDrillSideways extends FacetTestCase {
// Publish Date is only drill-down, and Lisa published
// one in 2012 and one in 2010:
assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
assertEquals(2, r.facetResults.get(0).getNumValidDescendants());
// Author is drill-sideways + drill-down: Lisa
// (drill-down) published twice, and Frank/Susan/Bob
// published once:
assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));
assertEquals(4, r.facetResults.get(1).getNumValidDescendants());
// Another simple case: drill-down on on single fields
// but OR of two values
@ -766,6 +769,7 @@ public class TestDrillSideways extends FacetTestCase {
ds = new DrillSideways(s, tr);
}
// Retrieve all facets:
DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp);
TopDocs hits = s.search(baseQuery, numDocs);
@ -773,9 +777,12 @@ public class TestDrillSideways extends FacetTestCase {
for(ScoreDoc sd : hits.scoreDocs) {
scores.put(s.doc(sd.doc).get("id"), sd.score);
}
if (VERBOSE) {
System.out.println(" verify all facets");
}
verifyEquals(requests, dimValues, s, expected, actual, scores, -1, doUseDV);
// Make sure topN works:
// Retrieve topN facets:
int topN = _TestUtil.nextInt(random(), 1, 20);
List<FacetRequest> newRequests = new ArrayList<FacetRequest>();
@ -784,6 +791,9 @@ public class TestDrillSideways extends FacetTestCase {
}
fsp = new FacetSearchParams(newRequests);
actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp);
if (VERBOSE) {
System.out.println(" verify topN=" + topN);
}
verifyEquals(newRequests, dimValues, s, expected, actual, scores, topN, doUseDV);
// Make sure drill down doesn't change score:
@ -834,6 +844,7 @@ public class TestDrillSideways extends FacetTestCase {
private static class SimpleFacetResult {
List<Doc> hits;
int[][] counts;
int[] uniqueCounts;
}
private int[] getTopNOrds(final int[] counts, final String[] values, int topN) {
@ -985,13 +996,21 @@ public class TestDrillSideways extends FacetTestCase {
SimpleFacetResult res = new SimpleFacetResult();
res.hits = hits;
res.counts = new int[numDims][];
for(int i=0;i<requests.size();i++) {
res.uniqueCounts = new int[numDims];
for (int i = 0; i < requests.size(); i++) {
int dim = Integer.parseInt(requests.get(i).categoryPath.components[0].substring(3));
if (drillDowns[dim] != null) {
res.counts[dim] = drillSidewaysCounts[dim].counts[dim];
} else {
res.counts[dim] = drillDownCounts.counts[dim];
}
int uniqueCount = 0;
for (int j = 0; j < res.counts[dim].length; j++) {
if (res.counts[dim][j] != 0) {
uniqueCount++;
}
}
res.uniqueCounts[dim] = uniqueCount;
}
return res;
@ -1107,6 +1126,8 @@ public class TestDrillSideways extends FacetTestCase {
}
assertEquals(setCount, actualValues.size());
}
assertEquals("dim=" + dim, expected.uniqueCounts[dim], fr.getNumValidDescendants());
}
}

View File

@ -17,6 +17,7 @@ import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.params.PerDimensionIndexingParams;
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
@ -349,4 +350,39 @@ public class TestFacetsCollector extends FacetTestCase {
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testNumValidDescendants() throws Exception {
// LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetFields facetFields = new FacetFields(taxonomyWriter);
for (int i = 0; i < 10; i++) {
Document doc = new Document();
facetFields.addFields(doc, Arrays.asList(new CategoryPath("a", Integer.toString(i))));
iw.addDocument(doc);
}
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
CountFacetRequest cfr = new CountFacetRequest(new CategoryPath("a"), 2);
cfr.setResultMode(random().nextBoolean() ? ResultMode.GLOBAL_FLAT : ResultMode.PER_NODE_IN_TREE);
FacetSearchParams fsp = new FacetSearchParams(cfr);
final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo);
FacetsCollector fc = FacetsCollector.create(fa);
new IndexSearcher(r).search(new MatchAllDocsQuery(), fc);
FacetResult res = fc.getFacetResults().get(0);
assertEquals(10, res.getNumValidDescendants());
IOUtils.close(taxo, taxoDir, r, indexDir);
}
}