LUCENE-4709: remove FacetResultNode.residue

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1437345 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-01-23 11:14:06 +00:00
parent af97b15f10
commit a64ef7400f
11 changed files with 26 additions and 141 deletions

View File

@ -64,6 +64,10 @@ New Features
* LUCENE-4599: New oal.codecs.compressing.CompressingTermVectorsFormat which
compresses term vectors into chunks of documents similarly to
CompressingStoredFieldsFormat. (Adrien Grand)
API Changes
* LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
Bug Fixes

View File

@ -5,7 +5,6 @@ import java.util.List;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.sampling.RandomSampler;
import org.apache.lucene.facet.search.sampling.Sampler;
import org.apache.lucene.facet.search.sampling.SamplingAccumulator;
@ -37,8 +36,7 @@ import org.apache.lucene.index.IndexReader;
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()} and
* {@link FacetResultNode#residue}.
* {@link FacetResult#getNumValidDescendants()}.
*
* @lucene.experimental
*/

View File

@ -262,7 +262,6 @@ public class CountingFacetsCollector extends FacetsCollector {
}
child = siblings[child];
}
root.residue = 0;
root.subResults = nodes;
res.add(new FacetResult(fr, root, nodes.size()));
continue;
@ -273,17 +272,13 @@ public class CountingFacetsCollector extends FacetsCollector {
FacetResultNode top = pq.top();
int child = children[rootOrd];
int numResults = 0; // count the number of results
int residue = 0;
while (child != TaxonomyReader.INVALID_ORDINAL) {
int count = counts[child];
if (count > top.value) {
residue += top.value;
top.value = count;
top.ordinal = child;
top = pq.updateTop();
++numResults;
} else {
residue += count;
}
child = siblings[child];
}
@ -300,7 +295,6 @@ public class CountingFacetsCollector extends FacetsCollector {
node.label = taxoReader.getPath(node.ordinal);
subResults[i] = node;
}
root.residue = residue;
root.subResults = Arrays.asList(subResults);
res.add(new FacetResult(fr, root, size));
}

View File

@ -6,7 +6,6 @@ import java.util.List;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.sampling.Sampler;
import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
@ -32,8 +31,7 @@ import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()} and
* {@link FacetResultNode#residue}.
* {@link FacetResult#getNumValidDescendants()}.
*
* @lucene.experimental
*/

View File

@ -62,7 +62,6 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
value = facetRequest.getValueOf(facetArrays, ordinal % partitionSize);
}
// TODO (Facet): should initial value of "residue" depend on aggregator if not sum?
FacetResultNode parentResultNode = new FacetResultNode(ordinal, value);
Heap<FacetResultNode> heap = ResultSortUtils.createSuitableHeap(facetRequest);
@ -97,11 +96,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
}
// bring sub results from heap of tmp res into result heap
for (int i = tmpHeap.size(); i > 0; i--) {
FacetResultNode a = heap.insertWithOverflow(tmpHeap.pop());
if (a != null) {
resNode.residue += a.residue;
}
heap.insertWithOverflow(tmpHeap.pop());
}
}
@ -177,14 +172,9 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
reusable.value = value;
reusable.subResults.clear();
reusable.label = null;
reusable.residue = 0;
}
++childrenCounter;
reusable = pq.insertWithOverflow(reusable);
if (reusable != null) {
// TODO (Facet): is other logic (not add) needed, per aggregator?
parentResultNode.residue += reusable.value;
}
}
}
if (localDepth < depth) {

View File

@ -266,9 +266,7 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
tosOrdinal = siblings[tosOrdinal];
}
// now it is inside. Run it and all its siblings inside the partition through a heap
// and in doing so, count them, find best K, and sum into residue
double residue = 0f; // the sum of all the siblings from this partition that do not make
// it to top K
// and in doing so, count them, find best K
pq.clear();
//reusables are consumed as from a stack. The stack starts full and returns full.
@ -286,10 +284,6 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
ac.value = value;
ac = pq.insertWithOverflow(ac);
if (null != ac) {
residue += ac.value;
// TODO (Facet): could it be that we need to do something
// else, not add, depending on the aggregator?
/* when a facet is excluded from top K, because already in this partition it has
* K better siblings, it is only recursed for count only.
*/
@ -320,7 +314,7 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
// and add ords to sibling stack, and make a note in siblingExplored that these are to
// be visited now
if (ords.length > 0) {
AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals,residue));
AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals));
bestSignlingsStack[localDepth] = ords;
siblingExplored[localDepth] = ords.length-1;
ordinalStack[localDepth] = ords[ords.length-1];
@ -449,8 +443,7 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
IntToObjectMap<AACO> tmpToReturnMapToACCOs = tmpToReturn.mapToAACOs;
IntToObjectMap<AACO> tfrMapToACCOs = tfr.mapToAACOs;
IntIterator tfrIntIterator = tfrMapToACCOs.keyIterator();
//iterate over all ordinals in tfr that are maps to their children (and the residue over
// non included chilren)
//iterate over all ordinals in tfr that are maps to their children
while (tfrIntIterator.hasNext()) {
int tfrkey = tfrIntIterator.next();
AACO tmpToReturnAACO = null;
@ -467,7 +460,6 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
}
int[] resOrds = new int [resLength];
double[] resVals = new double [resLength];
double resResidue = tmpToReturnAACO.residue + tfrAACO.residue;
int indexIntoTmpToReturn = 0;
int indexIntoTFR = 0;
ACComparator merger = getSuitableACComparator(); // by facet Request
@ -504,15 +496,9 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
// altogether yielding no more that best K kids for tfrkey, not to appear in the new shape of
// tmpToReturn
while (indexIntoTmpToReturn < tmpToReturnAACO.ordinals.length) {
resResidue += tmpToReturnAACO.values[indexIntoTmpToReturn++];
}
while (indexIntoTFR < tfrAACO.ordinals.length) {
resResidue += tfrAACO.values[indexIntoTFR++];
}
//update the list of best kids of tfrkey as appear in tmpToReturn
tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals, resResidue));
} // endof need to merge both AACO -- children and residue for same ordinal
tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals));
} // endof need to merge both AACO -- children for same ordinal
} // endof loop over all ordinals in tfr
} // endof loop over all temporary facet results to merge
@ -682,19 +668,15 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
* potential nodes of the {@link FacetResult} tree
* (i.e., the descendants of the root node, no deeper than the specified depth).
* No more than K ( = {@link FacetRequest#getNumResults()})
* siblings are enumerated, and
* <i>residue</i> holds the sum of values of the siblings rejected from the
* enumerated top K.
* siblings are enumerated.
* @lucene.internal
*/
protected static final class AACO {
int [] ordinals; // ordinals of the best K children, sorted from best to least
double [] values; // the respective values for these children
double residue; // sum of values of all other children, that did not get into top K
AACO (int[] ords, double[] vals, double r) {
AACO (int[] ords, double[] vals) {
this.ordinals = ords;
this.values = vals;
this.residue = r;
}
}
@ -787,7 +769,6 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
list.add(generateNode(aaco.ordinals[i], aaco.values[i], mapToAACOs));
}
node.subResults = list;
node.residue = aaco.residue;
return node;
}

View File

@ -58,14 +58,6 @@ public class FacetResultNode {
*/
public double value;
/**
* The total value of screened out sub results. If only part of the results
* were returned (usually because only the top-K categories are requested),
* then this provides information on "what else is there under this result
* node".
*/
public double residue;
/**
* The sub-results of this result. If {@link FacetRequest#getResultMode()} is
* {@link ResultMode#PER_NODE_IN_TREE}, every sub result denotes an immediate
@ -100,9 +92,6 @@ public class FacetResultNode {
sb.append(label.toString());
}
sb.append(" (").append(Double.toString(value)).append(")");
if (residue > 0) {
sb.append(" (residue=").append(residue).append(")");
}
for (FacetResultNode sub : subResults) {
sb.append("\n").append(prefix).append(sub.toString(prefix + " "));
}

View File

@ -39,8 +39,7 @@ import org.apache.lucene.index.IndexReader;
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()} &
* {@link FacetResultNode#residue}.
* {@link FacetResult#getNumValidDescendants()}.
*
* @lucene.experimental
*/
@ -187,17 +186,6 @@ public abstract class Sampler {
trimmed.add(trimmedNode);
}
/*
* If we are trimming, it means Sampling is in effect and the extra
* (over-sampled) results are being trimmed. Although the residue is not
* guaranteed to be accurate for Sampling, we try our best to fix it.
* The node's residue now will take under account the sub-nodes we're
* trimming.
*/
for (int i = size; i < node.subResults.size(); i++) {
node.residue += node.subResults.get(i).value;
}
node.subResults = trimmed;
}

View File

@ -12,7 +12,6 @@ import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.StandardFacetsAccumulator;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
@ -48,8 +47,7 @@ import org.apache.lucene.index.IndexReader;
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()} &
* {@link FacetResultNode#residue}.
* {@link FacetResult#getNumValidDescendants()}.
*
* @see Sampler
* @lucene.experimental

View File

@ -328,39 +328,6 @@ public class CountingFacetsCollectorTest extends LuceneTestCase {
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value);
}
}
IOUtils.close(indexReader, taxoReader);
}
@Test
public void testResidue() throws Exception {
// test the collector's handling of residue
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
// asking for top 1 is the only way to guarantee there will be a residue
// provided that enough children were indexed (see below)
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, 1), new CountFacetRequest(CP_B, 1));
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
TermQuery q = new TermQuery(A);
searcher.search(q, fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertEquals("invalid number of facet results", 2, facetResults.size());
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value);
// make sure randomness didn't pick only one child of root (otherwise there's no residue)
int numChildrenIndexed = res.getFacetRequest().categoryPath == CP_A ? numChildrenIndexedA : numChildrenIndexedB;
if (numChildrenIndexed > 1) {
assertTrue("expected residue", root.residue > 0);
}
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value);
}
@ -385,7 +352,6 @@ public class CountingFacetsCollectorTest extends LuceneTestCase {
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
}
@ -410,7 +376,6 @@ public class CountingFacetsCollectorTest extends LuceneTestCase {
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
}
@ -435,7 +400,6 @@ public class CountingFacetsCollectorTest extends LuceneTestCase {
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
}
@ -501,7 +465,6 @@ public class CountingFacetsCollectorTest extends LuceneTestCase {
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, expCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, expCounts.get(child.label), (int) child.value);
}

View File

@ -182,39 +182,34 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
assertEquals(9, fr.getNumValidDescendants());
FacetResultNode parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
// two nodes sorted by descending values: a/b with 8 and a/c with 6
// a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
// a/c has residue 0, and one child a/c/1 with value 1.
double [] expectedValues0 = { 8.0, 2.0, 3.0, 0.0, 2.0, 0.0, 6.0, 0.0, 1.0, 0.0 };
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
// a/c has one child a/c/1 with value 1.
double [] expectedValues0 = { 8.0, 3.0, 2.0, 6.0, 1.0 };
int i = 0;
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues0[i++], node.value, Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node.residue, Double.MIN_VALUE);
for (FacetResultNode node2 : node.subResults) {
assertEquals(expectedValues0[i++], node2.value, Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node2.residue, Double.MIN_VALUE);
}
}
// now just change the value of the first child of the root to 5, and then rearrange
// expected are: first a/c of value 6 and residue 0, and one child a/c/1 with value 1
// then a/b with value 5 and residue 2, and both children: a/b/2 with value 3, and a/b/1 with value 2.
// expected are: first a/c of value 6, and one child a/c/1 with value 1
// then a/b with value 5, and both children: a/b/2 with value 3, and a/b/1 with value 2.
for (FacetResultNode node : parentRes.subResults) {
node.value = 5.0;
break;
}
// now rearrange
double [] expectedValues00 = { 6.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 0.0, 2.0, 0.0 };
double [] expectedValues00 = { 6.0, 1.0, 5.0, 3.0, 2.0 };
fr = cfra23.createFacetResultsHandler(tr).rearrangeFacetResult(fr);
i = 0;
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues00[i++], node.value, Double.MIN_VALUE);
assertEquals(expectedValues00[i++], node.residue, Double.MIN_VALUE);
for (FacetResultNode node2 : node.subResults) {
assertEquals(expectedValues00[i++], node2.value, Double.MIN_VALUE);
assertEquals(expectedValues00[i++], node2.residue, Double.MIN_VALUE);
}
}
@ -223,18 +218,15 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
assertEquals(9, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
// two nodes sorted by descending values: a/b with 8 and a/c with 6
// a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
// a/c has residue 0, and one child a/c/1 with value 1.
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
// a/c has one child a/c/1 with value 1.
i = 0;
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues0[i++], node.value, Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node.residue, Double.MIN_VALUE);
for (FacetResultNode node2 : node.subResults) {
assertEquals(expectedValues0[i++], node2.value, Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node2.residue, Double.MIN_VALUE);
}
}
@ -243,16 +235,13 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
assertEquals(4, fr.getNumValidDescendants(), 4);
parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
// two nodes sorted by descending values:
// a/b with value 8 and residue 0 (because no children considered),
// and a/c with value 6 and residue 0 (because no children considered)
double [] expectedValues2 = { 8.0, 0.0, 6.0, 0.0 };
// a/b with value 8 and a/c with value 6
double [] expectedValues2 = { 8.0, 6.0, 0.0};
i = 0;
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues2[i++], node.value, Double.MIN_VALUE);
assertEquals(expectedValues2[i++], node.residue, Double.MIN_VALUE);
assertEquals(node.subResults.size(), 0);
}
@ -261,13 +250,11 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
assertEquals(4, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
double [] expectedValues3 = { 3.0, 2.0 };
i = 0;
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
assertEquals(0.0, node.residue, Double.MIN_VALUE);
assertEquals(0, node.subResults.size());
}
@ -276,12 +263,10 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
assertEquals(4, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
i = 0;
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
assertEquals(0.0, node.residue, Double.MIN_VALUE);
assertEquals(0, node.subResults.size());
}
@ -290,12 +275,10 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
assertEquals(4, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
i = 0;
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
assertEquals(0.0, node.residue, Double.MIN_VALUE);
assertEquals(0, node.subResults.size());
}
@ -304,7 +287,6 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
assertEquals(0.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(0, parentRes.subResults.size());
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);