diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 2f09b53703b..a6e146c117a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -208,6 +208,10 @@ Bug Fixes * SOLR-11590: Synchronize ZK connect/disconnect handling so that they are processed in linear order (noble, Varun Thacker) +* SOLR-11664: JSON Facet API: range facets containing unique, hll, min, max aggregations over string fields + produced incorrect results since 7.0 (Volodymyr Rudniev, yonik) + + Optimizations ---------------------- * SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr diff --git a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java index 85964f7117b..897dceb5eb2 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java @@ -199,6 +199,7 @@ public class HLLAgg extends StrAggValueSource { @Override public void setNextReader(LeafReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); values = DocValues.getNumeric(readerContext.reader(), sf.getName()); } @@ -224,6 +225,7 @@ public class HLLAgg extends StrAggValueSource { @Override public void setNextReader(LeafReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName()); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java index 008d0fd4445..ac8bf0bdf3e 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java @@ -303,8 +303,8 @@ public class MinMaxAgg extends SimpleAggValueSource { } @Override - public void reset() throws IOException { - super.reset(); + public void resetIterators() throws IOException { + super.resetIterators(); topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null); if (topLevel instanceof MultiDocValues.MultiSortedDocValues) { ordMap = ((MultiDocValues.MultiSortedDocValues)topLevel).mapping; @@ -322,13 +322,11 @@ public class MinMaxAgg extends SimpleAggValueSource { @Override public void setNextReader(LeafReaderContext readerContext) throws IOException { - if (topLevel == null) { - reset(); - } super.setNextReader(readerContext); if (subDvs != null) { subDv = subDvs[readerContext.ord]; toGlobal = ordMap.getGlobalOrds(readerContext.ord); + assert toGlobal != null; } else { assert readerContext.ord==0 || topLevel.getValueCount() == 0; subDv = topLevel; diff --git a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java index 578ef1796a9..9165799846e 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java @@ -43,12 +43,27 @@ import org.apache.solr.search.SolrIndexSearcher; public abstract class SlotAcc implements Closeable { String key; // todo... protected final FacetContext fcontext; + protected LeafReaderContext currentReaderContext; + protected int currentDocBase; public SlotAcc(FacetContext fcontext) { this.fcontext = fcontext; } - public void setNextReader(LeafReaderContext readerContext) throws IOException {} + /** + * NOTE: this currently detects when it is being reused and calls resetIterators by comparing reader ords + * with previous calls to setNextReader. For this reason, current users must call setNextReader + * in segment order. Failure to do so will cause worse performance. + */ + public void setNextReader(LeafReaderContext readerContext) throws IOException { + LeafReaderContext lastReaderContext = currentReaderContext; + currentReaderContext = readerContext; + currentDocBase = currentReaderContext.docBase; + if (lastReaderContext == null || lastReaderContext.ord >= currentReaderContext.ord) { + // if we went backwards (or reused) a reader, we need to reset iterators that can be used only once. + resetIterators(); + } + } public abstract void collect(int doc, int slot) throws IOException; @@ -96,8 +111,12 @@ public abstract class SlotAcc implements Closeable { } } + /** Called to reset the acc to a fresh state, ready for reuse */ public abstract void reset() throws IOException; + /** Typically called from setNextReader to reset docValue iterators */ + protected void resetIterators() throws IOException {}; + public abstract void resize(Resizer resizer); @Override @@ -208,6 +227,7 @@ abstract class FuncSlotAcc extends SlotAcc { @Override public void setNextReader(LeafReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); values = valueSource.getValues(fcontext.qcontext, readerContext); } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java index 0a6eb226d0a..02d457fe412 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java @@ -40,8 +40,7 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc { } @Override - public void reset() throws IOException { - super.reset(); + public void resetIterators() throws IOException { topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, field, null); nTerms = (int) topLevel.getValueCount(); if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) { @@ -60,9 +59,6 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc { @Override public void setNextReader(LeafReaderContext readerContext) throws IOException { - if (topLevel == null) { - reset(); - } super.setNextReader(readerContext); if (subDvs != null) { subDv = subDvs[readerContext.ord]; diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java index 434e680ca2f..ca261eaef1c 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java @@ -38,13 +38,11 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc { public UniqueSinglevaluedSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException { super(fcontext, field, numSlots, factory); - // let setNextReader lazily call reset(), that way an extra call to reset() after creation won't matter } @Override - public void reset() throws IOException { - super.reset(); - SolrIndexSearcher searcher = fcontext.qcontext.searcher(); + public void resetIterators() throws IOException { + super.resetIterators(); topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null); nTerms = topLevel.getValueCount(); if (topLevel instanceof MultiDocValues.MultiSortedDocValues) { @@ -63,17 +61,16 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc { @Override public void setNextReader(LeafReaderContext readerContext) throws IOException { - if (topLevel == null) { - reset(); - } super.setNextReader(readerContext); if (subDvs != null) { subDv = subDvs[readerContext.ord]; toGlobal = ordMap.getGlobalOrds(readerContext.ord); + assert toGlobal != null; } else { assert readerContext.ord==0 || topLevel.getValueCount() == 0; subDv = topLevel; } + assert subDv.docID() == -1; // make sure we haven't used this iterator before } @Override diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java index 607b0674b79..ca95d8f5680 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java @@ -33,7 +33,6 @@ abstract class UniqueSlotAcc extends SlotAcc { HLLAgg.HLLFactory factory; SchemaField field; FixedBitSet[] arr; - int currentDocBase; int[] counts; // populated with the cardinality once int nTerms; @@ -53,11 +52,6 @@ abstract class UniqueSlotAcc extends SlotAcc { } } - @Override - public void setNextReader(LeafReaderContext readerContext) throws IOException { - currentDocBase = readerContext.docBase; - } - @Override public Object getValue(int slot) throws IOException { if (fcontext.isShard()) { diff --git a/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java b/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java index d1be4fe12a1..8eed4c71ba9 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java +++ b/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java @@ -108,6 +108,11 @@ public class DebugAgg extends AggValueSource { sub.reset(); } + @Override + protected void resetIterators() throws IOException { + sub.resetIterators(); + } + @Override public void resize(Resizer resizer) { resizes.addAndGet(1); diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index a1c8cf7ae0d..bf709cfe54a 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -591,6 +591,9 @@ public class TestJsonFacets extends SolrTestCaseHS { iclient.commit(); client.commit(); + + + // test for presence of debugging info ModifiableSolrParams debugP = params(p); debugP.set("debugQuery","true"); @@ -729,7 +732,7 @@ public class TestJsonFacets extends SolrTestCaseHS { // Same thing for dates // test min/max of string field - if (date.equals("date_dt") || date.equals("date_dtd")) { // supports only single valued currently... + if (date.equals("date_dt") || date.equals("date_dtd")) { // supports only single valued currently... see SOLR-11706 client.testJQ(params(p, "q", "*:*" , "json.facet", "{" + " f3:{${terms} type:field, field:${num_is}, facet:{a:'min(${date})'}, sort:'a desc' }" + @@ -1037,6 +1040,30 @@ public class TestJsonFacets extends SolrTestCaseHS { " } }" ); + // range facet with stats on string fields + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f:{type:range, field:${num_d}, start:-5, end:10, gap:5, other:all, facet:{ wn:'unique(${where_s})',wh:'hll(${where_s})' } }}" + ) + , "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,wn:1,wh:1}, {val:0.0,count:2,wn:2,wh:2}, {val:5.0,count:0}]" + + " ,before:{count:1,wn:1,wh:1}" + + " ,after:{count:1,wn:1,wh:1} " + + " ,between:{count:3,wn:2,wh:2} " + + " } }" + ); + + if (where_s.equals("where_s") || where_s.equals("where_sd")) { // min/max only supports only single valued currently... see SOLR-11706 + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f:{type:range, field:${num_d}, start:-5, end:10, gap:5, other:all, facet:{ wmin:'min(${where_s})', wmax:'max(${where_s})' } }}" + ) + , "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,wmin:NY,wmax:NY}, {val:0.0,count:2,wmin:NJ,wmax:NY}, {val:5.0,count:0}]" + + " ,before:{count:1,wmin:NJ,wmax:NJ}" + + " ,after:{count:1,wmin:NJ,wmax:NJ} " + + " ,between:{count:3,wmin:NJ,wmax:NY} " + + " } }" + ); + } + + // stats at top level client.testJQ(params(p, "q", "*:*"