SOLR-11664: fix range facet issues with sub-aggregations on string fields, adds resetIterator to SlotAcc

This commit is contained in:
yonik 2017-12-05 20:14:57 -05:00
parent 2c14b91418
commit e84cce8ea1
9 changed files with 68 additions and 25 deletions

View File

@ -208,6 +208,10 @@ Bug Fixes
* SOLR-11590: Synchronize ZK connect/disconnect handling so that they are processed in linear order
(noble, Varun Thacker)
* SOLR-11664: JSON Facet API: range facets containing unique, hll, min, max aggregations over string fields
produced incorrect results since 7.0 (Volodymyr Rudniev, yonik)
Optimizations
----------------------
* SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr

View File

@ -199,6 +199,7 @@ public class HLLAgg extends StrAggValueSource {
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getNumeric(readerContext.reader(), sf.getName());
}
@ -224,6 +225,7 @@ public class HLLAgg extends StrAggValueSource {
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
}

View File

@ -303,8 +303,8 @@ public class MinMaxAgg extends SimpleAggValueSource {
}
@Override
public void reset() throws IOException {
super.reset();
public void resetIterators() throws IOException {
super.resetIterators();
topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null);
if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
ordMap = ((MultiDocValues.MultiSortedDocValues)topLevel).mapping;
@ -322,13 +322,11 @@ public class MinMaxAgg extends SimpleAggValueSource {
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
if (topLevel == null) {
reset();
}
super.setNextReader(readerContext);
if (subDvs != null) {
subDv = subDvs[readerContext.ord];
toGlobal = ordMap.getGlobalOrds(readerContext.ord);
assert toGlobal != null;
} else {
assert readerContext.ord==0 || topLevel.getValueCount() == 0;
subDv = topLevel;

View File

@ -43,12 +43,27 @@ import org.apache.solr.search.SolrIndexSearcher;
public abstract class SlotAcc implements Closeable {
String key; // todo...
protected final FacetContext fcontext;
protected LeafReaderContext currentReaderContext;
protected int currentDocBase;
public SlotAcc(FacetContext fcontext) {
this.fcontext = fcontext;
}
public void setNextReader(LeafReaderContext readerContext) throws IOException {}
/**
* NOTE: this currently detects when it is being reused and calls resetIterators by comparing reader ords
* with previous calls to setNextReader. For this reason, current users must call setNextReader
* in segment order. Failure to do so will cause worse performance.
*/
public void setNextReader(LeafReaderContext readerContext) throws IOException {
LeafReaderContext lastReaderContext = currentReaderContext;
currentReaderContext = readerContext;
currentDocBase = currentReaderContext.docBase;
if (lastReaderContext == null || lastReaderContext.ord >= currentReaderContext.ord) {
// if we went backwards (or reused) a reader, we need to reset iterators that can be used only once.
resetIterators();
}
}
public abstract void collect(int doc, int slot) throws IOException;
@ -96,8 +111,12 @@ public abstract class SlotAcc implements Closeable {
}
}
/** Called to reset the acc to a fresh state, ready for reuse */
public abstract void reset() throws IOException;
/** Typically called from setNextReader to reset docValue iterators */
protected void resetIterators() throws IOException {};
public abstract void resize(Resizer resizer);
@Override
@ -208,6 +227,7 @@ abstract class FuncSlotAcc extends SlotAcc {
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = valueSource.getValues(fcontext.qcontext, readerContext);
}
}

View File

@ -40,8 +40,7 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
}
@Override
public void reset() throws IOException {
super.reset();
public void resetIterators() throws IOException {
topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, field, null);
nTerms = (int) topLevel.getValueCount();
if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
@ -60,9 +59,6 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
if (topLevel == null) {
reset();
}
super.setNextReader(readerContext);
if (subDvs != null) {
subDv = subDvs[readerContext.ord];

View File

@ -38,13 +38,11 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
public UniqueSinglevaluedSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
super(fcontext, field, numSlots, factory);
// let setNextReader lazily call reset(), that way an extra call to reset() after creation won't matter
}
@Override
public void reset() throws IOException {
super.reset();
SolrIndexSearcher searcher = fcontext.qcontext.searcher();
public void resetIterators() throws IOException {
super.resetIterators();
topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null);
nTerms = topLevel.getValueCount();
if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
@ -63,17 +61,16 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
if (topLevel == null) {
reset();
}
super.setNextReader(readerContext);
if (subDvs != null) {
subDv = subDvs[readerContext.ord];
toGlobal = ordMap.getGlobalOrds(readerContext.ord);
assert toGlobal != null;
} else {
assert readerContext.ord==0 || topLevel.getValueCount() == 0;
subDv = topLevel;
}
assert subDv.docID() == -1; // make sure we haven't used this iterator before
}
@Override

View File

@ -33,7 +33,6 @@ abstract class UniqueSlotAcc extends SlotAcc {
HLLAgg.HLLFactory factory;
SchemaField field;
FixedBitSet[] arr;
int currentDocBase;
int[] counts; // populated with the cardinality once
int nTerms;
@ -53,11 +52,6 @@ abstract class UniqueSlotAcc extends SlotAcc {
}
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
currentDocBase = readerContext.docBase;
}
@Override
public Object getValue(int slot) throws IOException {
if (fcontext.isShard()) {

View File

@ -108,6 +108,11 @@ public class DebugAgg extends AggValueSource {
sub.reset();
}
@Override
protected void resetIterators() throws IOException {
sub.resetIterators();
}
@Override
public void resize(Resizer resizer) {
resizes.addAndGet(1);

View File

@ -591,6 +591,9 @@ public class TestJsonFacets extends SolrTestCaseHS {
iclient.commit();
client.commit();
// test for presence of debugging info
ModifiableSolrParams debugP = params(p);
debugP.set("debugQuery","true");
@ -729,7 +732,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
// Same thing for dates
// test min/max of string field
if (date.equals("date_dt") || date.equals("date_dtd")) { // supports only single valued currently...
if (date.equals("date_dt") || date.equals("date_dtd")) { // supports only single valued currently... see SOLR-11706
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{" +
" f3:{${terms} type:field, field:${num_is}, facet:{a:'min(${date})'}, sort:'a desc' }" +
@ -1037,6 +1040,30 @@ public class TestJsonFacets extends SolrTestCaseHS {
" } }"
);
// range facet with stats on string fields
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f:{type:range, field:${num_d}, start:-5, end:10, gap:5, other:all, facet:{ wn:'unique(${where_s})',wh:'hll(${where_s})' } }}"
)
, "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,wn:1,wh:1}, {val:0.0,count:2,wn:2,wh:2}, {val:5.0,count:0}]" +
" ,before:{count:1,wn:1,wh:1}" +
" ,after:{count:1,wn:1,wh:1} " +
" ,between:{count:3,wn:2,wh:2} " +
" } }"
);
if (where_s.equals("where_s") || where_s.equals("where_sd")) { // min/max only supports only single valued currently... see SOLR-11706
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f:{type:range, field:${num_d}, start:-5, end:10, gap:5, other:all, facet:{ wmin:'min(${where_s})', wmax:'max(${where_s})' } }}"
)
, "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,wmin:NY,wmax:NY}, {val:0.0,count:2,wmin:NJ,wmax:NY}, {val:5.0,count:0}]" +
" ,before:{count:1,wmin:NJ,wmax:NJ}" +
" ,after:{count:1,wmin:NJ,wmax:NJ} " +
" ,between:{count:3,wmin:NJ,wmax:NY} " +
" } }"
);
}
// stats at top level
client.testJQ(params(p, "q", "*:*"