diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index da470df6e54..8abaecf3d92 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -104,7 +104,7 @@ Improvements is now tagged. (Namgyu Kim) * GITHUB#11785: Improve Tessellator performance by delaying calls to the method - #isIntersectingPolygon (Ignacio Vera) + #isIntersectingPolygon (Ignacio Vera) * GITHUB#687: speed up IndexSortSortedNumericDocValuesRangeQuery#BoundedDocIdSetIterator construction using bkd binary search. (Jianping Weng) @@ -115,6 +115,9 @@ Bug Fixes trying to apply a dictionary whose size is greater than the maximum supported window size for LZ4. (Adrien Grand) +* GITHUB#11768: Taxonomy and SSDV faceting now correctly breaks ties by preferring smaller ordinal + values. (Greg Miller) + Optimizations --------------------- * GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java index 55a0bb1ccab..957efec0747 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java @@ -170,17 +170,19 @@ public class StringValueFacetCounts extends Facets { TopOrdAndIntQueue q = null; TopOrdAndIntQueue.OrdAndValue reuse = null; int bottomCount = 0; + int bottomOrd = Integer.MAX_VALUE; int childCount = 0; // total number of labels with non-zero count if (sparseCounts != null) { for (IntIntCursor cursor : sparseCounts) { childCount++; // every count in sparseValues should be non-zero + int ord = cursor.key; int count = cursor.value; - if (count > bottomCount) { + if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) { if (reuse == null) { reuse = new TopOrdAndIntQueue.OrdAndValue(); } - reuse.ord = cursor.key; + reuse.ord = ord; reuse.value = count; if (q == null) { // Lazy init for sparse case: @@ -189,6 +191,7 @@ public class StringValueFacetCounts extends Facets { reuse = q.insertWithOverflow(reuse); if (q.size() == topN) { bottomCount = q.top().value; + bottomOrd = q.top().ord; } } } @@ -197,7 +200,7 @@ public class StringValueFacetCounts extends Facets { int count = denseCounts[i]; if (count != 0) { childCount++; - if (count > bottomCount) { + if (count > bottomCount || (count == bottomCount && i < bottomOrd)) { if (reuse == null) { reuse = new TopOrdAndIntQueue.OrdAndValue(); } @@ -210,6 +213,7 @@ public class StringValueFacetCounts extends Facets { reuse = q.insertWithOverflow(reuse); if (q.size() == topN) { bottomCount = q.top().value; + bottomOrd = q.top().ord; } } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java index 66bfc56be23..ad3d412c97e 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java @@ -303,6 +303,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets { PrimitiveIterator.OfInt childOrds, int topN, DimConfig dimConfig, int pathOrd) { TopOrdAndIntQueue q = null; int bottomCount = 0; + int bottomOrd = Integer.MAX_VALUE; int pathCount = 0; int childCount = 0; @@ -313,7 +314,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets { if (count > 0) { pathCount += count; childCount++; - if (count > bottomCount) { + if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) { if (reuse == null) { reuse = new TopOrdAndIntQueue.OrdAndValue(); } @@ -327,6 +328,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets { reuse = q.insertWithOverflow(reuse); if (q.size() == topN) { bottomCount = q.top().value; + bottomOrd = q.top().value; } } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java index 515f60229d3..35c71a4c661 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java @@ -179,6 +179,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN)); float bottomValue = 0; + int bottomOrd = Integer.MAX_VALUE; int[] children = getChildren(); int[] siblings = getSiblings(); @@ -189,18 +190,20 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { TopOrdAndFloatQueue.OrdAndValue reuse = null; while (ord != TaxonomyReader.INVALID_ORDINAL) { - if (values[ord] > 0) { - aggregatedValue = aggregationFunction.aggregate(aggregatedValue, values[ord]); + float value = values[ord]; + if (value > 0) { + aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value); childCount++; - if (values[ord] > bottomValue) { + if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) { if (reuse == null) { reuse = new TopOrdAndFloatQueue.OrdAndValue(); } reuse.ord = ord; - reuse.value = values[ord]; + reuse.value = value; reuse = q.insertWithOverflow(reuse); if (q.size() == topN) { bottomValue = q.top().value; + bottomOrd = q.top().ord; } } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java index 1c7ef0df2dc..9a8b94b573b 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java @@ -251,6 +251,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { throws IOException { TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); int bottomValue = 0; + int bottomOrd = Integer.MAX_VALUE; int aggregatedValue = 0; int childCount = 0; @@ -265,7 +266,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { if (parents[ord] == pathOrd && value > 0) { aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value); childCount++; - if (value > bottomValue) { + if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) { if (reuse == null) { reuse = new TopOrdAndIntQueue.OrdAndValue(); } @@ -274,6 +275,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { reuse = q.insertWithOverflow(reuse); if (q.size() == topN) { bottomValue = q.top().value; + bottomOrd = q.top().ord; } } } @@ -287,7 +289,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { if (value > 0) { aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value); childCount++; - if (value > bottomValue) { + if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) { if (reuse == null) { reuse = new TopOrdAndIntQueue.OrdAndValue(); } @@ -296,6 +298,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { reuse = q.insertWithOverflow(reuse); if (q.size() == topN) { bottomValue = q.top().value; + bottomOrd = q.top().ord; } } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java index 2a6f15ad23b..7274bfe7a45 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java @@ -626,7 +626,7 @@ public class TestDrillSideways extends FacetTestCase { List topNDimsResult = r.facets.getTopDims(1, 2); assertEquals(1, topNDimsResult.size()); assertEquals( - "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n", + "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n", topNDimsResult.get(0).toString()); // test getTopDims(10, 10) and expect same results from getAllDims(10) @@ -1899,7 +1899,7 @@ public class TestDrillSideways extends FacetTestCase { List topNDimsResult = facets.getTopDims(1, 2); assertEquals(1, topNDimsResult.size()); assertEquals( - "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n", + "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n", topNDimsResult.get(0).toString()); // test getAllDims(0) diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java index cfc06269466..6ab5772c689 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java @@ -253,17 +253,17 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { List top1results = facets.getAllDims(1); assertEquals(3, results.size()); - assertEquals("dim=a path=[] value=3 childCount=3\n foo3 (1)\n", top1results.get(0).toString()); - assertEquals("dim=b path=[] value=3 childCount=3\n bar2 (1)\n", top1results.get(1).toString()); + assertEquals("dim=a path=[] value=3 childCount=3\n foo1 (1)\n", top1results.get(0).toString()); + assertEquals("dim=b path=[] value=3 childCount=3\n aar1 (1)\n", top1results.get(1).toString()); assertEquals("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", top1results.get(2).toString()); // test default implementation of getTopDims List topNDimsResult = facets.getTopDims(2, 1); assertEquals(2, topNDimsResult.size()); assertEquals( - "dim=a path=[] value=3 childCount=3\n foo3 (1)\n", topNDimsResult.get(0).toString()); + "dim=a path=[] value=3 childCount=3\n foo1 (1)\n", topNDimsResult.get(0).toString()); assertEquals( - "dim=b path=[] value=3 childCount=3\n bar2 (1)\n", topNDimsResult.get(1).toString()); + "dim=b path=[] value=3 childCount=3\n aar1 (1)\n", topNDimsResult.get(1).toString()); // test getTopDims(10, 10) and expect same results from getAllDims(10) List allDimsResults = facets.getTopDims(10, 10);