Fix tie-break bug in various Facets implementations (#11768)

This commit is contained in:
Greg Miller 2022-09-26 15:05:57 -07:00 committed by GitHub
parent 734841d6c0
commit 971ae01164
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 32 additions and 17 deletions

View File

@ -104,7 +104,7 @@ Improvements
is now tagged. (Namgyu Kim)
* GITHUB#11785: Improve Tessellator performance by delaying calls to the method
#isIntersectingPolygon (Ignacio Vera)
#isIntersectingPolygon (Ignacio Vera)
* GITHUB#687: speed up IndexSortSortedNumericDocValuesRangeQuery#BoundedDocIdSetIterator
construction using bkd binary search. (Jianping Weng)
@ -115,6 +115,9 @@ Bug Fixes
trying to apply a dictionary whose size is greater than the maximum supported
window size for LZ4. (Adrien Grand)
* GITHUB#11768: Taxonomy and SSDV faceting now correctly breaks ties by preferring smaller ordinal
values. (Greg Miller)
Optimizations
---------------------
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all

View File

@ -170,17 +170,19 @@ public class StringValueFacetCounts extends Facets {
TopOrdAndIntQueue q = null;
TopOrdAndIntQueue.OrdAndValue reuse = null;
int bottomCount = 0;
int bottomOrd = Integer.MAX_VALUE;
int childCount = 0; // total number of labels with non-zero count
if (sparseCounts != null) {
for (IntIntCursor cursor : sparseCounts) {
childCount++; // every count in sparseValues should be non-zero
int ord = cursor.key;
int count = cursor.value;
if (count > bottomCount) {
if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
reuse.ord = cursor.key;
reuse.ord = ord;
reuse.value = count;
if (q == null) {
// Lazy init for sparse case:
@ -189,6 +191,7 @@ public class StringValueFacetCounts extends Facets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
bottomOrd = q.top().ord;
}
}
}
@ -197,7 +200,7 @@ public class StringValueFacetCounts extends Facets {
int count = denseCounts[i];
if (count != 0) {
childCount++;
if (count > bottomCount) {
if (count > bottomCount || (count == bottomCount && i < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@ -210,6 +213,7 @@ public class StringValueFacetCounts extends Facets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
bottomOrd = q.top().ord;
}
}
}

View File

@ -303,6 +303,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
PrimitiveIterator.OfInt childOrds, int topN, DimConfig dimConfig, int pathOrd) {
TopOrdAndIntQueue q = null;
int bottomCount = 0;
int bottomOrd = Integer.MAX_VALUE;
int pathCount = 0;
int childCount = 0;
@ -313,7 +314,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
if (count > 0) {
pathCount += count;
childCount++;
if (count > bottomCount) {
if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@ -327,6 +328,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
bottomOrd = q.top().value;
}
}
}

View File

@ -179,6 +179,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
float bottomValue = 0;
int bottomOrd = Integer.MAX_VALUE;
int[] children = getChildren();
int[] siblings = getSiblings();
@ -189,18 +190,20 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
TopOrdAndFloatQueue.OrdAndValue reuse = null;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, values[ord]);
float value = values[ord];
if (value > 0) {
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
childCount++;
if (values[ord] > bottomValue) {
if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndFloatQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = values[ord];
reuse.value = value;
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
bottomOrd = q.top().ord;
}
}
}

View File

@ -251,6 +251,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
throws IOException {
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomValue = 0;
int bottomOrd = Integer.MAX_VALUE;
int aggregatedValue = 0;
int childCount = 0;
@ -265,7 +266,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
if (parents[ord] == pathOrd && value > 0) {
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
childCount++;
if (value > bottomValue) {
if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@ -274,6 +275,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
bottomOrd = q.top().ord;
}
}
}
@ -287,7 +289,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
if (value > 0) {
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
childCount++;
if (value > bottomValue) {
if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@ -296,6 +298,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
bottomOrd = q.top().ord;
}
}
}

View File

@ -626,7 +626,7 @@ public class TestDrillSideways extends FacetTestCase {
List<FacetResult> topNDimsResult = r.facets.getTopDims(1, 2);
assertEquals(1, topNDimsResult.size());
assertEquals(
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n",
topNDimsResult.get(0).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
@ -1899,7 +1899,7 @@ public class TestDrillSideways extends FacetTestCase {
List<FacetResult> topNDimsResult = facets.getTopDims(1, 2);
assertEquals(1, topNDimsResult.size());
assertEquals(
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n",
topNDimsResult.get(0).toString());
// test getAllDims(0)

View File

@ -253,17 +253,17 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
List<FacetResult> top1results = facets.getAllDims(1);
assertEquals(3, results.size());
assertEquals("dim=a path=[] value=3 childCount=3\n foo3 (1)\n", top1results.get(0).toString());
assertEquals("dim=b path=[] value=3 childCount=3\n bar2 (1)\n", top1results.get(1).toString());
assertEquals("dim=a path=[] value=3 childCount=3\n foo1 (1)\n", top1results.get(0).toString());
assertEquals("dim=b path=[] value=3 childCount=3\n aar1 (1)\n", top1results.get(1).toString());
assertEquals("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", top1results.get(2).toString());
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = facets.getTopDims(2, 1);
assertEquals(2, topNDimsResult.size());
assertEquals(
"dim=a path=[] value=3 childCount=3\n foo3 (1)\n", topNDimsResult.get(0).toString());
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n", topNDimsResult.get(0).toString());
assertEquals(
"dim=b path=[] value=3 childCount=3\n bar2 (1)\n", topNDimsResult.get(1).toString());
"dim=b path=[] value=3 childCount=3\n aar1 (1)\n", topNDimsResult.get(1).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);