SOLR-10596: fix unique/hll docvalue iterator reuse

This commit is contained in:
yonik 2017-05-03 23:04:33 -04:00
parent 40c8ea4b1a
commit 3a7aedcef9
9 changed files with 52 additions and 25 deletions

View File

@ -312,7 +312,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
} }
// note: only called by enum/stream prior to collect // note: only called by enum/stream prior to collect
void resetStats() { void resetStats() throws IOException {
countAcc.reset(); countAcc.reset();
for (SlotAcc acc : accs) { for (SlotAcc acc : accs) {
acc.reset(); acc.reset();

View File

@ -52,16 +52,16 @@ public class HLLAgg extends StrAggValueSource {
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg()); SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg());
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) { if (sf.hasDocValues()) {
return new UniqueMultiDvSlotAcc(fcontext, getArg(), numSlots, fcontext.isShard() ? factory : null); return new UniqueMultiDvSlotAcc(fcontext, sf, numSlots, fcontext.isShard() ? factory : null);
} else { } else {
return new UniqueMultivaluedSlotAcc(fcontext, getArg(), numSlots, fcontext.isShard() ? factory : null); return new UniqueMultivaluedSlotAcc(fcontext, sf, numSlots, fcontext.isShard() ? factory : null);
} }
} else { } else {
if (sf.getType().getNumberType() != null) { if (sf.getType().getNumberType() != null) {
// always use hll here since we don't know how many values there are? // always use hll here since we don't know how many values there are?
return new NumericAcc(fcontext, getArg(), numSlots); return new NumericAcc(fcontext, getArg(), numSlots);
} else { } else {
return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots, fcontext.isShard() ? factory : null); return new UniqueSinglevaluedSlotAcc(fcontext, sf, numSlots, fcontext.isShard() ? factory : null);
} }
} }
} }

View File

@ -94,7 +94,7 @@ public abstract class SlotAcc implements Closeable {
} }
} }
public abstract void reset(); public abstract void reset() throws IOException;
public abstract void resize(Resizer resizer); public abstract void resize(Resizer resizer);

View File

@ -43,15 +43,15 @@ public class UniqueAgg extends StrAggValueSource {
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg()); SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg());
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) { if (sf.hasDocValues()) {
return new UniqueMultiDvSlotAcc(fcontext, getArg(), numSlots, null); return new UniqueMultiDvSlotAcc(fcontext, sf, numSlots, null);
} else { } else {
return new UniqueMultivaluedSlotAcc(fcontext, getArg(), numSlots, null); return new UniqueMultivaluedSlotAcc(fcontext, sf, numSlots, null);
} }
} else { } else {
if (sf.getType().getNumberType() != null) { if (sf.getType().getNumberType() != null) {
return new NumericAcc(fcontext, getArg(), numSlots); return new NumericAcc(fcontext, getArg(), numSlots);
} else { } else {
return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots, null); return new UniqueSinglevaluedSlotAcc(fcontext, sf, numSlots, null);
} }
} }
} }

View File

@ -25,19 +25,23 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.schema.SchemaField;
class UniqueMultiDvSlotAcc extends UniqueSlotAcc { class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
final SortedSetDocValues topLevel; SortedSetDocValues topLevel;
final SortedSetDocValues[] subDvs; SortedSetDocValues[] subDvs;
final MultiDocValues.OrdinalMap ordMap; MultiDocValues.OrdinalMap ordMap;
LongValues toGlobal; LongValues toGlobal;
SortedSetDocValues subDv; SortedSetDocValues subDv;
public UniqueMultiDvSlotAcc(FacetContext fcontext, String field, int numSlots, HLLAgg.HLLFactory factory) throws IOException { public UniqueMultiDvSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
super(fcontext, field, numSlots, factory); super(fcontext, field, numSlots, factory);
SolrIndexSearcher searcher = fcontext.qcontext.searcher(); }
topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, searcher.getSchema().getField(field), null);
@Override
public void reset() throws IOException {
super.reset();
topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, field, null);
nTerms = (int) topLevel.getValueCount(); nTerms = (int) topLevel.getValueCount();
if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) { if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
ordMap = ((MultiDocValues.MultiSortedSetDocValues) topLevel).mapping; ordMap = ((MultiDocValues.MultiSortedSetDocValues) topLevel).mapping;
@ -55,6 +59,9 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
@Override @Override
public void setNextReader(LeafReaderContext readerContext) throws IOException { public void setNextReader(LeafReaderContext readerContext) throws IOException {
if (topLevel == null) {
reset();
}
super.setNextReader(readerContext); super.setNextReader(readerContext);
if (subDvs != null) { if (subDvs != null) {
subDv = subDvs[readerContext.ord]; subDv = subDvs[readerContext.ord];

View File

@ -21,16 +21,17 @@ import java.io.IOException;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
class UniqueMultivaluedSlotAcc extends UniqueSlotAcc implements UnInvertedField.Callback { class UniqueMultivaluedSlotAcc extends UniqueSlotAcc implements UnInvertedField.Callback {
private UnInvertedField uif; private UnInvertedField uif;
private UnInvertedField.DocToTerm docToTerm; private UnInvertedField.DocToTerm docToTerm;
public UniqueMultivaluedSlotAcc(FacetContext fcontext, String field, int numSlots, HLLAgg.HLLFactory factory) throws IOException { public UniqueMultivaluedSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
super(fcontext, field, numSlots, factory); super(fcontext, field, numSlots, factory);
SolrIndexSearcher searcher = fcontext.qcontext.searcher(); SolrIndexSearcher searcher = fcontext.qcontext.searcher();
uif = UnInvertedField.getUnInvertedField(field, searcher); uif = UnInvertedField.getUnInvertedField(field.getName(), searcher);
docToTerm = uif.new DocToTerm(); docToTerm = uif.new DocToTerm();
fcontext.qcontext.addCloseHook(this); // TODO: find way to close accumulators instead of using close hook? fcontext.qcontext.addCloseHook(this); // TODO: find way to close accumulators instead of using close hook?
nTerms = uif.numTerms(); nTerms = uif.numTerms();

View File

@ -25,19 +25,26 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc { class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
final SortedDocValues topLevel; SortedDocValues topLevel;
final SortedDocValues[] subDvs; SortedDocValues[] subDvs;
final MultiDocValues.OrdinalMap ordMap; MultiDocValues.OrdinalMap ordMap;
LongValues toGlobal; LongValues toGlobal;
SortedDocValues subDv; SortedDocValues subDv;
public UniqueSinglevaluedSlotAcc(FacetContext fcontext, String field, int numSlots, HLLAgg.HLLFactory factory) throws IOException { public UniqueSinglevaluedSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
super(fcontext, field, numSlots, factory); super(fcontext, field, numSlots, factory);
// let setNextReader lazily call reset(), that way an extra call to reset() after creation won't matter
}
@Override
public void reset() throws IOException {
super.reset();
SolrIndexSearcher searcher = fcontext.qcontext.searcher(); SolrIndexSearcher searcher = fcontext.qcontext.searcher();
topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, searcher.getSchema().getField(field), null); topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null);
nTerms = topLevel.getValueCount(); nTerms = topLevel.getValueCount();
if (topLevel instanceof MultiDocValues.MultiSortedDocValues) { if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
ordMap = ((MultiDocValues.MultiSortedDocValues)topLevel).mapping; ordMap = ((MultiDocValues.MultiSortedDocValues)topLevel).mapping;
@ -55,6 +62,9 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
@Override @Override
public void setNextReader(LeafReaderContext readerContext) throws IOException { public void setNextReader(LeafReaderContext readerContext) throws IOException {
if (topLevel == null) {
reset();
}
super.setNextReader(readerContext); super.setNextReader(readerContext);
if (subDvs != null) { if (subDvs != null) {
subDv = subDvs[readerContext.ord]; subDv = subDvs[readerContext.ord];

View File

@ -37,15 +37,15 @@ abstract class UniqueSlotAcc extends SlotAcc {
int[] counts; // populated with the cardinality once int[] counts; // populated with the cardinality once
int nTerms; int nTerms;
public UniqueSlotAcc(FacetContext fcontext, String field, int numSlots, HLLAgg.HLLFactory factory) throws IOException { public UniqueSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
super(fcontext); super(fcontext);
this.factory = factory; this.factory = factory;
arr = new FixedBitSet[numSlots]; arr = new FixedBitSet[numSlots];
this.field = fcontext.searcher.getSchema().getField(field); this.field = field;
} }
@Override @Override
public void reset() { public void reset() throws IOException {
counts = null; counts = null;
for (FixedBitSet bits : arr) { for (FixedBitSet bits : arr) {
if (bits == null) continue; if (bits == null) continue;

View File

@ -1360,6 +1360,15 @@ public class TestJsonFacets extends SolrTestCaseHS {
"}" "}"
); );
// test acc reuse (i.e. reset() method). This is normally used for stats that are not calculated in the first phase,
// currently non-sorting stats.
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})'} }}"
)
, "facets=={ 'count':6, " +
"'f1':{ buckets:[{val:B, count:3, h:2},{val:A, count:2, h:2}] } } "
);
} }
@Test @Test