mirror of https://github.com/apache/lucene.git
SOLR-10596: fix unique/hll docvalue iterator reuse
This commit is contained in:
parent
40c8ea4b1a
commit
3a7aedcef9
|
@ -312,7 +312,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// note: only called by enum/stream prior to collect
|
// note: only called by enum/stream prior to collect
|
||||||
void resetStats() {
|
void resetStats() throws IOException {
|
||||||
countAcc.reset();
|
countAcc.reset();
|
||||||
for (SlotAcc acc : accs) {
|
for (SlotAcc acc : accs) {
|
||||||
acc.reset();
|
acc.reset();
|
||||||
|
|
|
@ -52,16 +52,16 @@ public class HLLAgg extends StrAggValueSource {
|
||||||
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg());
|
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg());
|
||||||
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
if (sf.hasDocValues()) {
|
if (sf.hasDocValues()) {
|
||||||
return new UniqueMultiDvSlotAcc(fcontext, getArg(), numSlots, fcontext.isShard() ? factory : null);
|
return new UniqueMultiDvSlotAcc(fcontext, sf, numSlots, fcontext.isShard() ? factory : null);
|
||||||
} else {
|
} else {
|
||||||
return new UniqueMultivaluedSlotAcc(fcontext, getArg(), numSlots, fcontext.isShard() ? factory : null);
|
return new UniqueMultivaluedSlotAcc(fcontext, sf, numSlots, fcontext.isShard() ? factory : null);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (sf.getType().getNumberType() != null) {
|
if (sf.getType().getNumberType() != null) {
|
||||||
// always use hll here since we don't know how many values there are?
|
// always use hll here since we don't know how many values there are?
|
||||||
return new NumericAcc(fcontext, getArg(), numSlots);
|
return new NumericAcc(fcontext, getArg(), numSlots);
|
||||||
} else {
|
} else {
|
||||||
return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots, fcontext.isShard() ? factory : null);
|
return new UniqueSinglevaluedSlotAcc(fcontext, sf, numSlots, fcontext.isShard() ? factory : null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,7 +94,7 @@ public abstract class SlotAcc implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract void reset();
|
public abstract void reset() throws IOException;
|
||||||
|
|
||||||
public abstract void resize(Resizer resizer);
|
public abstract void resize(Resizer resizer);
|
||||||
|
|
||||||
|
|
|
@ -43,15 +43,15 @@ public class UniqueAgg extends StrAggValueSource {
|
||||||
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg());
|
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg());
|
||||||
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
if (sf.hasDocValues()) {
|
if (sf.hasDocValues()) {
|
||||||
return new UniqueMultiDvSlotAcc(fcontext, getArg(), numSlots, null);
|
return new UniqueMultiDvSlotAcc(fcontext, sf, numSlots, null);
|
||||||
} else {
|
} else {
|
||||||
return new UniqueMultivaluedSlotAcc(fcontext, getArg(), numSlots, null);
|
return new UniqueMultivaluedSlotAcc(fcontext, sf, numSlots, null);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (sf.getType().getNumberType() != null) {
|
if (sf.getType().getNumberType() != null) {
|
||||||
return new NumericAcc(fcontext, getArg(), numSlots);
|
return new NumericAcc(fcontext, getArg(), numSlots);
|
||||||
} else {
|
} else {
|
||||||
return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots, null);
|
return new UniqueSinglevaluedSlotAcc(fcontext, sf, numSlots, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,19 +25,23 @@ import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LongValues;
|
import org.apache.lucene.util.LongValues;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
|
||||||
class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
|
class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
|
||||||
final SortedSetDocValues topLevel;
|
SortedSetDocValues topLevel;
|
||||||
final SortedSetDocValues[] subDvs;
|
SortedSetDocValues[] subDvs;
|
||||||
final MultiDocValues.OrdinalMap ordMap;
|
MultiDocValues.OrdinalMap ordMap;
|
||||||
LongValues toGlobal;
|
LongValues toGlobal;
|
||||||
SortedSetDocValues subDv;
|
SortedSetDocValues subDv;
|
||||||
|
|
||||||
public UniqueMultiDvSlotAcc(FacetContext fcontext, String field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
|
public UniqueMultiDvSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
|
||||||
super(fcontext, field, numSlots, factory);
|
super(fcontext, field, numSlots, factory);
|
||||||
SolrIndexSearcher searcher = fcontext.qcontext.searcher();
|
}
|
||||||
topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, searcher.getSchema().getField(field), null);
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, field, null);
|
||||||
nTerms = (int) topLevel.getValueCount();
|
nTerms = (int) topLevel.getValueCount();
|
||||||
if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
|
if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
|
||||||
ordMap = ((MultiDocValues.MultiSortedSetDocValues) topLevel).mapping;
|
ordMap = ((MultiDocValues.MultiSortedSetDocValues) topLevel).mapping;
|
||||||
|
@ -55,6 +59,9 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||||
|
if (topLevel == null) {
|
||||||
|
reset();
|
||||||
|
}
|
||||||
super.setNextReader(readerContext);
|
super.setNextReader(readerContext);
|
||||||
if (subDvs != null) {
|
if (subDvs != null) {
|
||||||
subDv = subDvs[readerContext.ord];
|
subDv = subDvs[readerContext.ord];
|
||||||
|
|
|
@ -21,16 +21,17 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
||||||
class UniqueMultivaluedSlotAcc extends UniqueSlotAcc implements UnInvertedField.Callback {
|
class UniqueMultivaluedSlotAcc extends UniqueSlotAcc implements UnInvertedField.Callback {
|
||||||
private UnInvertedField uif;
|
private UnInvertedField uif;
|
||||||
private UnInvertedField.DocToTerm docToTerm;
|
private UnInvertedField.DocToTerm docToTerm;
|
||||||
|
|
||||||
public UniqueMultivaluedSlotAcc(FacetContext fcontext, String field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
|
public UniqueMultivaluedSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
|
||||||
super(fcontext, field, numSlots, factory);
|
super(fcontext, field, numSlots, factory);
|
||||||
SolrIndexSearcher searcher = fcontext.qcontext.searcher();
|
SolrIndexSearcher searcher = fcontext.qcontext.searcher();
|
||||||
uif = UnInvertedField.getUnInvertedField(field, searcher);
|
uif = UnInvertedField.getUnInvertedField(field.getName(), searcher);
|
||||||
docToTerm = uif.new DocToTerm();
|
docToTerm = uif.new DocToTerm();
|
||||||
fcontext.qcontext.addCloseHook(this); // TODO: find way to close accumulators instead of using close hook?
|
fcontext.qcontext.addCloseHook(this); // TODO: find way to close accumulators instead of using close hook?
|
||||||
nTerms = uif.numTerms();
|
nTerms = uif.numTerms();
|
||||||
|
|
|
@ -25,19 +25,26 @@ import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LongValues;
|
import org.apache.lucene.util.LongValues;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
||||||
class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
|
class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
|
||||||
final SortedDocValues topLevel;
|
SortedDocValues topLevel;
|
||||||
final SortedDocValues[] subDvs;
|
SortedDocValues[] subDvs;
|
||||||
final MultiDocValues.OrdinalMap ordMap;
|
MultiDocValues.OrdinalMap ordMap;
|
||||||
LongValues toGlobal;
|
LongValues toGlobal;
|
||||||
SortedDocValues subDv;
|
SortedDocValues subDv;
|
||||||
|
|
||||||
public UniqueSinglevaluedSlotAcc(FacetContext fcontext, String field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
|
public UniqueSinglevaluedSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
|
||||||
super(fcontext, field, numSlots, factory);
|
super(fcontext, field, numSlots, factory);
|
||||||
|
// let setNextReader lazily call reset(), that way an extra call to reset() after creation won't matter
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
SolrIndexSearcher searcher = fcontext.qcontext.searcher();
|
SolrIndexSearcher searcher = fcontext.qcontext.searcher();
|
||||||
topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, searcher.getSchema().getField(field), null);
|
topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null);
|
||||||
nTerms = topLevel.getValueCount();
|
nTerms = topLevel.getValueCount();
|
||||||
if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
|
if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
|
||||||
ordMap = ((MultiDocValues.MultiSortedDocValues)topLevel).mapping;
|
ordMap = ((MultiDocValues.MultiSortedDocValues)topLevel).mapping;
|
||||||
|
@ -55,6 +62,9 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||||
|
if (topLevel == null) {
|
||||||
|
reset();
|
||||||
|
}
|
||||||
super.setNextReader(readerContext);
|
super.setNextReader(readerContext);
|
||||||
if (subDvs != null) {
|
if (subDvs != null) {
|
||||||
subDv = subDvs[readerContext.ord];
|
subDv = subDvs[readerContext.ord];
|
||||||
|
|
|
@ -37,15 +37,15 @@ abstract class UniqueSlotAcc extends SlotAcc {
|
||||||
int[] counts; // populated with the cardinality once
|
int[] counts; // populated with the cardinality once
|
||||||
int nTerms;
|
int nTerms;
|
||||||
|
|
||||||
public UniqueSlotAcc(FacetContext fcontext, String field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
|
public UniqueSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
|
||||||
super(fcontext);
|
super(fcontext);
|
||||||
this.factory = factory;
|
this.factory = factory;
|
||||||
arr = new FixedBitSet[numSlots];
|
arr = new FixedBitSet[numSlots];
|
||||||
this.field = fcontext.searcher.getSchema().getField(field);
|
this.field = field;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reset() {
|
public void reset() throws IOException {
|
||||||
counts = null;
|
counts = null;
|
||||||
for (FixedBitSet bits : arr) {
|
for (FixedBitSet bits : arr) {
|
||||||
if (bits == null) continue;
|
if (bits == null) continue;
|
||||||
|
|
|
@ -1360,6 +1360,15 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
||||||
"}"
|
"}"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// test acc reuse (i.e. reset() method). This is normally used for stats that are not calculated in the first phase,
|
||||||
|
// currently non-sorting stats.
|
||||||
|
client.testJQ(params(p, "q", "*:*"
|
||||||
|
, "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})'} }}"
|
||||||
|
)
|
||||||
|
, "facets=={ 'count':6, " +
|
||||||
|
"'f1':{ buckets:[{val:B, count:3, h:2},{val:A, count:2, h:2}] } } "
|
||||||
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue