mirror of https://github.com/apache/lucene.git
SOLR-11664: fix range facet issues with sub-aggregations on string fields, adds resetIterator to SlotAcc
This commit is contained in:
parent
2c14b91418
commit
e84cce8ea1
|
@ -208,6 +208,10 @@ Bug Fixes
|
|||
* SOLR-11590: Synchronize ZK connect/disconnect handling so that they are processed in linear order
|
||||
(noble, Varun Thacker)
|
||||
|
||||
* SOLR-11664: JSON Facet API: range facets containing unique, hll, min, max aggregations over string fields
|
||||
produced incorrect results since 7.0 (Volodymyr Rudniev, yonik)
|
||||
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr
|
||||
|
|
|
@ -199,6 +199,7 @@ public class HLLAgg extends StrAggValueSource {
|
|||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
super.setNextReader(readerContext);
|
||||
values = DocValues.getNumeric(readerContext.reader(), sf.getName());
|
||||
}
|
||||
|
||||
|
@ -224,6 +225,7 @@ public class HLLAgg extends StrAggValueSource {
|
|||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
super.setNextReader(readerContext);
|
||||
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
|
||||
}
|
||||
|
||||
|
|
|
@ -303,8 +303,8 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
public void resetIterators() throws IOException {
|
||||
super.resetIterators();
|
||||
topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null);
|
||||
if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
|
||||
ordMap = ((MultiDocValues.MultiSortedDocValues)topLevel).mapping;
|
||||
|
@ -322,13 +322,11 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
|||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
if (topLevel == null) {
|
||||
reset();
|
||||
}
|
||||
super.setNextReader(readerContext);
|
||||
if (subDvs != null) {
|
||||
subDv = subDvs[readerContext.ord];
|
||||
toGlobal = ordMap.getGlobalOrds(readerContext.ord);
|
||||
assert toGlobal != null;
|
||||
} else {
|
||||
assert readerContext.ord==0 || topLevel.getValueCount() == 0;
|
||||
subDv = topLevel;
|
||||
|
|
|
@ -43,12 +43,27 @@ import org.apache.solr.search.SolrIndexSearcher;
|
|||
public abstract class SlotAcc implements Closeable {
|
||||
String key; // todo...
|
||||
protected final FacetContext fcontext;
|
||||
protected LeafReaderContext currentReaderContext;
|
||||
protected int currentDocBase;
|
||||
|
||||
public SlotAcc(FacetContext fcontext) {
|
||||
this.fcontext = fcontext;
|
||||
}
|
||||
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {}
|
||||
/**
|
||||
* NOTE: this currently detects when it is being reused and calls resetIterators by comparing reader ords
|
||||
* with previous calls to setNextReader. For this reason, current users must call setNextReader
|
||||
* in segment order. Failure to do so will cause worse performance.
|
||||
*/
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
LeafReaderContext lastReaderContext = currentReaderContext;
|
||||
currentReaderContext = readerContext;
|
||||
currentDocBase = currentReaderContext.docBase;
|
||||
if (lastReaderContext == null || lastReaderContext.ord >= currentReaderContext.ord) {
|
||||
// if we went backwards (or reused) a reader, we need to reset iterators that can be used only once.
|
||||
resetIterators();
|
||||
}
|
||||
}
|
||||
|
||||
public abstract void collect(int doc, int slot) throws IOException;
|
||||
|
||||
|
@ -96,8 +111,12 @@ public abstract class SlotAcc implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Called to reset the acc to a fresh state, ready for reuse */
|
||||
public abstract void reset() throws IOException;
|
||||
|
||||
/** Typically called from setNextReader to reset docValue iterators */
|
||||
protected void resetIterators() throws IOException {};
|
||||
|
||||
public abstract void resize(Resizer resizer);
|
||||
|
||||
@Override
|
||||
|
@ -208,6 +227,7 @@ abstract class FuncSlotAcc extends SlotAcc {
|
|||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
super.setNextReader(readerContext);
|
||||
values = valueSource.getValues(fcontext.qcontext, readerContext);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,8 +40,7 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
public void resetIterators() throws IOException {
|
||||
topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, field, null);
|
||||
nTerms = (int) topLevel.getValueCount();
|
||||
if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
|
||||
|
@ -60,9 +59,6 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
|
|||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
if (topLevel == null) {
|
||||
reset();
|
||||
}
|
||||
super.setNextReader(readerContext);
|
||||
if (subDvs != null) {
|
||||
subDv = subDvs[readerContext.ord];
|
||||
|
|
|
@ -38,13 +38,11 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
|
|||
|
||||
public UniqueSinglevaluedSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
|
||||
super(fcontext, field, numSlots, factory);
|
||||
// let setNextReader lazily call reset(), that way an extra call to reset() after creation won't matter
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
SolrIndexSearcher searcher = fcontext.qcontext.searcher();
|
||||
public void resetIterators() throws IOException {
|
||||
super.resetIterators();
|
||||
topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null);
|
||||
nTerms = topLevel.getValueCount();
|
||||
if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
|
||||
|
@ -63,17 +61,16 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
|
|||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
if (topLevel == null) {
|
||||
reset();
|
||||
}
|
||||
super.setNextReader(readerContext);
|
||||
if (subDvs != null) {
|
||||
subDv = subDvs[readerContext.ord];
|
||||
toGlobal = ordMap.getGlobalOrds(readerContext.ord);
|
||||
assert toGlobal != null;
|
||||
} else {
|
||||
assert readerContext.ord==0 || topLevel.getValueCount() == 0;
|
||||
subDv = topLevel;
|
||||
}
|
||||
assert subDv.docID() == -1; // make sure we haven't used this iterator before
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -33,7 +33,6 @@ abstract class UniqueSlotAcc extends SlotAcc {
|
|||
HLLAgg.HLLFactory factory;
|
||||
SchemaField field;
|
||||
FixedBitSet[] arr;
|
||||
int currentDocBase;
|
||||
int[] counts; // populated with the cardinality once
|
||||
int nTerms;
|
||||
|
||||
|
@ -53,11 +52,6 @@ abstract class UniqueSlotAcc extends SlotAcc {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
currentDocBase = readerContext.docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getValue(int slot) throws IOException {
|
||||
if (fcontext.isShard()) {
|
||||
|
|
|
@ -108,6 +108,11 @@ public class DebugAgg extends AggValueSource {
|
|||
sub.reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void resetIterators() throws IOException {
|
||||
sub.resetIterators();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resize(Resizer resizer) {
|
||||
resizes.addAndGet(1);
|
||||
|
|
|
@ -591,6 +591,9 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
iclient.commit();
|
||||
client.commit();
|
||||
|
||||
|
||||
|
||||
|
||||
// test for presence of debugging info
|
||||
ModifiableSolrParams debugP = params(p);
|
||||
debugP.set("debugQuery","true");
|
||||
|
@ -729,7 +732,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
|
||||
// Same thing for dates
|
||||
// test min/max of string field
|
||||
if (date.equals("date_dt") || date.equals("date_dtd")) { // supports only single valued currently...
|
||||
if (date.equals("date_dt") || date.equals("date_dtd")) { // supports only single valued currently... see SOLR-11706
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{" +
|
||||
" f3:{${terms} type:field, field:${num_is}, facet:{a:'min(${date})'}, sort:'a desc' }" +
|
||||
|
@ -1037,6 +1040,30 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
" } }"
|
||||
);
|
||||
|
||||
// range facet with stats on string fields
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f:{type:range, field:${num_d}, start:-5, end:10, gap:5, other:all, facet:{ wn:'unique(${where_s})',wh:'hll(${where_s})' } }}"
|
||||
)
|
||||
, "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,wn:1,wh:1}, {val:0.0,count:2,wn:2,wh:2}, {val:5.0,count:0}]" +
|
||||
" ,before:{count:1,wn:1,wh:1}" +
|
||||
" ,after:{count:1,wn:1,wh:1} " +
|
||||
" ,between:{count:3,wn:2,wh:2} " +
|
||||
" } }"
|
||||
);
|
||||
|
||||
if (where_s.equals("where_s") || where_s.equals("where_sd")) { // min/max only supports only single valued currently... see SOLR-11706
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f:{type:range, field:${num_d}, start:-5, end:10, gap:5, other:all, facet:{ wmin:'min(${where_s})', wmax:'max(${where_s})' } }}"
|
||||
)
|
||||
, "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,wmin:NY,wmax:NY}, {val:0.0,count:2,wmin:NJ,wmax:NY}, {val:5.0,count:0}]" +
|
||||
" ,before:{count:1,wmin:NJ,wmax:NJ}" +
|
||||
" ,after:{count:1,wmin:NJ,wmax:NJ} " +
|
||||
" ,between:{count:3,wmin:NJ,wmax:NY} " +
|
||||
" } }"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// stats at top level
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
|
|
Loading…
Reference in New Issue