mirror of https://github.com/apache/lucene.git
SOLR-10634: calc metrics in first phase if limit=-1 and no subfacets
This commit is contained in:
parent
ea9adc055b
commit
d60c72f34c
|
@ -231,6 +231,14 @@ Bug Fixes
|
|||
resulting in exceptions when using a hashing faceting method and sorting by hll(numeric_field).
|
||||
(yonik)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-10634: JSON Facet API: When a field/terms facet will retrieve all buckets (i.e. limit:-1)
|
||||
and there are no nested facets, aggregations are computed in the first collection phase
|
||||
so that the second phase which would normally involve calculating the domain for the bucket
|
||||
can be skipped entirely, leading to large performance improvements. (yonik)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -129,7 +129,36 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
}
|
||||
sortAcc = indexOrderAcc;
|
||||
deferredAggs = freq.getFacetStats();
|
||||
} else {
|
||||
}
|
||||
|
||||
// If we are going to return all buckets and if there are no subfacets (that would need a domain), then don't defer
|
||||
// any aggregation calculations to a second phase. This way we can avoid calculating domains for each bucket, which
|
||||
// can be expensive.
|
||||
if (freq.limit == -1 && freq.subFacets.size() == 0) {
|
||||
accs = new SlotAcc[ freq.getFacetStats().size() ];
|
||||
int otherAccIdx = 0;
|
||||
for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
|
||||
AggValueSource agg = entry.getValue();
|
||||
SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots);
|
||||
acc.key = entry.getKey();
|
||||
accMap.put(acc.key, acc);
|
||||
accs[otherAccIdx++] = acc;
|
||||
}
|
||||
if (accs.length == 1) {
|
||||
collectAcc = accs[0];
|
||||
} else {
|
||||
collectAcc = new MultiAcc(fcontext, accs);
|
||||
}
|
||||
|
||||
if (sortAcc == null) {
|
||||
sortAcc = accMap.get(freq.sortVariable);
|
||||
assert sortAcc != null;
|
||||
}
|
||||
|
||||
deferredAggs = null;
|
||||
}
|
||||
|
||||
if (sortAcc == null) {
|
||||
AggValueSource sortAgg = freq.getFacetStats().get(freq.sortVariable);
|
||||
if (sortAgg != null) {
|
||||
collectAcc = sortAgg.createSlotAcc(fcontext, numDocs, numSlots);
|
||||
|
@ -140,7 +169,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
deferredAggs.remove(freq.sortVariable);
|
||||
}
|
||||
|
||||
if (deferredAggs.size() == 0) {
|
||||
if (deferredAggs == null || deferredAggs.size() == 0) {
|
||||
deferredAggs = null;
|
||||
}
|
||||
|
||||
|
@ -438,6 +467,54 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
}
|
||||
}
|
||||
|
||||
static class MultiAcc extends SlotAcc {
|
||||
final SlotAcc[] subAccs;
|
||||
|
||||
MultiAcc(FacetContext fcontext, SlotAcc[] subAccs) {
|
||||
super(fcontext);
|
||||
this.subAccs = subAccs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc, int slot) throws IOException {
|
||||
for (SlotAcc acc : subAccs) {
|
||||
acc.collect(doc, slot);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slotA, int slotB) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getValue(int slotNum) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
for (SlotAcc acc : subAccs) {
|
||||
acc.reset();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resize(Resizer resizer) {
|
||||
for (SlotAcc acc : subAccs) {
|
||||
acc.resize(resizer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
|
||||
for (SlotAcc acc : subAccs) {
|
||||
acc.setValues(bucket, slotNum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static class SpecialSlotAcc extends SlotAcc {
|
||||
SlotAcc collectAcc;
|
||||
SlotAcc[] otherAccs;
|
||||
|
|
|
@ -164,4 +164,12 @@
|
|||
</lst>
|
||||
</initParams>
|
||||
|
||||
|
||||
<valueSourceParser name="nvl" class="org.apache.solr.search.function.NvlValueSourceParser">
|
||||
<float name="nvlFloatValue">0.0</float>
|
||||
</valueSourceParser>
|
||||
<valueSourceParser name="agg_debug" class="org.apache.solr.search.facet.DebugAgg$Parser">
|
||||
</valueSourceParser>
|
||||
|
||||
|
||||
</config>
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.FunctionQParser;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.search.ValueSourceParser;
|
||||
|
||||
|
||||
public class DebugAgg extends AggValueSource {
|
||||
|
||||
public static class Parser extends ValueSourceParser {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
return new DebugAgg();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public DebugAgg() {
|
||||
super("debug");
|
||||
}
|
||||
|
||||
public DebugAgg(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
|
||||
return new Acc(fcontext, numDocs, numSlots);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "debug()";
|
||||
}
|
||||
|
||||
public static class Acc extends SlotAcc {
|
||||
public static AtomicLong creates = new AtomicLong(0);
|
||||
public static AtomicLong resets = new AtomicLong(0);
|
||||
public static AtomicLong resizes = new AtomicLong(0);
|
||||
public static Acc last;
|
||||
|
||||
public CountSlotAcc sub;
|
||||
public int numDocs;
|
||||
public int numSlots;
|
||||
|
||||
public Acc(FacetContext fcontext, int numDocs, int numSlots) {
|
||||
super(fcontext);
|
||||
this.last = this;
|
||||
this.numDocs = numDocs;
|
||||
this.numSlots = numSlots;
|
||||
creates.addAndGet(1);
|
||||
sub = new CountSlotArrAcc(fcontext, numSlots);
|
||||
|
||||
new RuntimeException("DEBUG Acc numSlots=" + numSlots).printStackTrace();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc, int slot) throws IOException {
|
||||
sub.collect(doc, slot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slotA, int slotB) {
|
||||
return sub.compare(slotA, slotB);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getValue(int slotNum) throws IOException {
|
||||
return sub.getValue(slotNum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
resets.addAndGet(1);
|
||||
sub.reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resize(Resizer resizer) {
|
||||
resizes.addAndGet(1);
|
||||
this.numSlots = resizer.getNewSize();
|
||||
sub.resize(resizer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
sub.setNextReader(readerContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int collect(DocSet docs, int slot) throws IOException {
|
||||
return sub.collect(docs, slot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
|
||||
sub.key = this.key; // TODO: Blech... this should be fixed
|
||||
sub.setValues(bucket, slotNum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
sub.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetMerger createFacetMerger(Object prototype) {
|
||||
return new FacetLongMerger();
|
||||
}
|
||||
|
||||
}
|
|
@ -471,7 +471,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_f", "num_i","num_l", "num_is","num_ls", "num_fs", "num_ds", "super_s","super_ss", "val_b","val_b", "date","date_dt", "sparse_s","sparse_ss", "multi_ss","multi_ss") );
|
||||
|
||||
// multi-valued strings, method=dv for terms facets
|
||||
doStatsTemplated(client, params(p, "terms", "method:dv,", "rows", "0", "noexist", "noexist_ss", "cat_s", "cat_ss", "where_s", "where_ss", "num_d", "num_f", "num_i", "num_l", "super_s", "super_ss", "val_b", "val_b", "date", "date_dt", "sparse_s", "sparse_ss", "multi_ss", "multi_ss"));
|
||||
doStatsTemplated(client, params(p, "terms_method", "method:dv,", "rows", "0", "noexist", "noexist_ss", "cat_s", "cat_ss", "where_s", "where_ss", "num_d", "num_f", "num_i", "num_l", "super_s", "super_ss", "val_b", "val_b", "date", "date_dt", "sparse_s", "sparse_ss", "multi_ss", "multi_ss"));
|
||||
|
||||
// single valued docvalues for strings, and single valued numeric doc values for numeric fields
|
||||
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "num_is","num_lds", "num_fs","num_dds", "super_s","super_sd", "val_b","val_b", "date","date_dtd", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") );
|
||||
|
@ -491,6 +491,26 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
if (p.get("num_is") == null) p.add("num_is","num_is");
|
||||
if (p.get("num_fs") == null) p.add("num_fs","num_fs");
|
||||
|
||||
String terms = p.get("terms");
|
||||
if (terms == null) terms="";
|
||||
int limit=0;
|
||||
switch (random().nextInt(4)) {
|
||||
case 0: limit=-1;
|
||||
case 1: limit=1000000;
|
||||
case 2: // fallthrough
|
||||
case 3: // fallthrough
|
||||
}
|
||||
if (limit != 0) {
|
||||
terms=terms+"limit:"+limit+",";
|
||||
}
|
||||
String terms_method = p.get("terms_method");
|
||||
if (terms_method != null) {
|
||||
terms=terms+terms_method;
|
||||
}
|
||||
p.set("terms", terms);
|
||||
// "${terms}" should be put at the beginning of generic terms facets.
|
||||
// It may specify "method=..." or "limit:-1", so should not be used if the facet explicitly specifies.
|
||||
|
||||
MacroExpander m = new MacroExpander( p.getMap() );
|
||||
|
||||
String cat_s = m.expand("${cat_s}");
|
||||
|
@ -862,7 +882,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
|
||||
// test numBuckets
|
||||
client.testJQ(params(p, "q", "*:*", "rows", "0", "facet", "true"
|
||||
, "json.facet", "{f1:{terms:{${terms} field:${cat_s}, numBuckets:true, limit:1}}}" // TODO: limit:0 produced an error
|
||||
, "json.facet", "{f1:{terms:{${terms_method} field:${cat_s}, numBuckets:true, limit:1}}}" // TODO: limit:0 produced an error
|
||||
)
|
||||
, "facets=={ 'count':6, " +
|
||||
"'f1':{ numBuckets:2, buckets:[{val:B, count:3}]} } "
|
||||
|
@ -1048,10 +1068,10 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
// also test limit:0
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{" +
|
||||
" f0:{${terms} type:terms, field:${multi_ss}, allBuckets:true, limit:0} " +
|
||||
",f1:{${terms} type:terms, field:${multi_ss}, allBuckets:true, limit:0, offset:1} " + // offset with 0 limit
|
||||
",f2:{${terms} type:terms, field:${multi_ss}, allBuckets:true, limit:0, facet:{x:'sum(${num_d})'}, sort:'x desc' } " +
|
||||
",f3:{${terms} type:terms, field:${multi_ss}, allBuckets:true, limit:0, missing:true, facet:{x:'sum(${num_d})', y:'avg(${num_d})'}, sort:'x desc' } " +
|
||||
" f0:{${terms_method} type:terms, field:${multi_ss}, allBuckets:true, limit:0} " +
|
||||
",f1:{${terms_method} type:terms, field:${multi_ss}, allBuckets:true, limit:0, offset:1} " + // offset with 0 limit
|
||||
",f2:{${terms_method} type:terms, field:${multi_ss}, allBuckets:true, limit:0, facet:{x:'sum(${num_d})'}, sort:'x desc' } " +
|
||||
",f3:{${terms_method} type:terms, field:${multi_ss}, allBuckets:true, limit:0, missing:true, facet:{x:'sum(${num_d})', y:'avg(${num_d})'}, sort:'x desc' } " +
|
||||
"}"
|
||||
)
|
||||
, "facets=={ 'count':6, " +
|
||||
|
@ -1066,9 +1086,9 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
// also test limit:0
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{" +
|
||||
" f0:{${terms} type:terms, field:${num_i}, allBuckets:true, limit:0} " +
|
||||
",f1:{${terms} type:terms, field:${num_i}, allBuckets:true, limit:0, offset:1} " + // offset with 0 limit
|
||||
",f2:{${terms} type:terms, field:${num_i}, allBuckets:true, limit:0, facet:{x:'sum(${num_d})'}, sort:'x desc' } " +
|
||||
" f0:{${terms_method} type:terms, field:${num_i}, allBuckets:true, limit:0} " +
|
||||
",f1:{${terms_method} type:terms, field:${num_i}, allBuckets:true, limit:0, offset:1} " + // offset with 0 limit
|
||||
",f2:{${terms_method} type:terms, field:${num_i}, allBuckets:true, limit:0, facet:{x:'sum(${num_d})'}, sort:'x desc' } " +
|
||||
"}"
|
||||
)
|
||||
, "facets=={ 'count':6, " +
|
||||
|
@ -1398,6 +1418,102 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// test which phase stats are calculated in
|
||||
////////////////////////////////////////////////////////////////
|
||||
if (client.local()) {
|
||||
long creates, resets;
|
||||
// NOTE: these test the current implementation and may need to be adjusted to match future optimizations (such as calculating N buckets in parallel in the second phase)
|
||||
|
||||
creates = DebugAgg.Acc.creates.get();
|
||||
resets = DebugAgg.Acc.resets.get();
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f1:{terms:{${terms_method} field:${super_s}, limit:1, facet:{x:'debug()'} }}}" // x should be deferred to 2nd phase
|
||||
)
|
||||
, "facets=={ 'count':6, " +
|
||||
"f1:{ buckets:[{ val:batman, count:1, x:1}]} } "
|
||||
);
|
||||
|
||||
assertEquals(1, DebugAgg.Acc.creates.get() - creates);
|
||||
assertTrue( DebugAgg.Acc.resets.get() - resets <= 1);
|
||||
assertTrue( DebugAgg.Acc.last.numSlots <= 2 ); // probably "1", but may be special slot for something. As long as it's not cardinality of the field
|
||||
|
||||
|
||||
creates = DebugAgg.Acc.creates.get();
|
||||
resets = DebugAgg.Acc.resets.get();
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f1:{terms:{${terms_method} field:${super_s}, limit:1, facet:{ x:'debug()'} , sort:'x asc' }}}" // sorting by x... must be done all at once in first phase
|
||||
)
|
||||
, "facets=={ 'count':6, " +
|
||||
"f1:{ buckets:[{ val:batman, count:1, x:1}]}" +
|
||||
" } "
|
||||
);
|
||||
|
||||
assertEquals(1, DebugAgg.Acc.creates.get() - creates);
|
||||
assertTrue( DebugAgg.Acc.resets.get() - resets == 0);
|
||||
assertTrue( DebugAgg.Acc.last.numSlots >= 5 ); // all slots should be done in a single shot. there may be more than 5 due to special slots or hashing.
|
||||
|
||||
|
||||
// When limit:-1, we should do most stats in first phase (SOLR-10634)
|
||||
creates = DebugAgg.Acc.creates.get();
|
||||
resets = DebugAgg.Acc.resets.get();
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f1:{terms:{${terms_method} field:${super_s}, limit:-1, facet:{x:'debug()'} }}}"
|
||||
)
|
||||
, "facets=="
|
||||
);
|
||||
|
||||
assertEquals(1, DebugAgg.Acc.creates.get() - creates);
|
||||
assertTrue( DebugAgg.Acc.resets.get() - resets == 0);
|
||||
assertTrue( DebugAgg.Acc.last.numSlots >= 5 ); // all slots should be done in a single shot. there may be more than 5 due to special slots or hashing.
|
||||
|
||||
// Now for a numeric field
|
||||
// When limit:-1, we should do most stats in first phase (SOLR-10634)
|
||||
creates = DebugAgg.Acc.creates.get();
|
||||
resets = DebugAgg.Acc.resets.get();
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f1:{terms:{${terms_method} field:${num_d}, limit:-1, facet:{x:'debug()'} }}}"
|
||||
)
|
||||
, "facets=="
|
||||
);
|
||||
|
||||
assertEquals(1, DebugAgg.Acc.creates.get() - creates);
|
||||
assertTrue( DebugAgg.Acc.resets.get() - resets == 0);
|
||||
assertTrue( DebugAgg.Acc.last.numSlots >= 5 ); // all slots should be done in a single shot. there may be more than 5 due to special slots or hashing.
|
||||
|
||||
|
||||
// But if we need to calculate domains anyway, it probably makes sense to calculate most stats in the 2nd phase (along with sub-facets)
|
||||
creates = DebugAgg.Acc.creates.get();
|
||||
resets = DebugAgg.Acc.resets.get();
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f1:{terms:{${terms_method} field:${super_s}, limit:-1, facet:{ x:'debug()' , y:{terms:${where_s}} } }}}"
|
||||
)
|
||||
, "facets=="
|
||||
);
|
||||
|
||||
assertEquals(1, DebugAgg.Acc.creates.get() - creates);
|
||||
assertTrue( DebugAgg.Acc.resets.get() - resets >=4);
|
||||
assertTrue( DebugAgg.Acc.last.numSlots <= 2 ); // probably 1, but could be higher
|
||||
|
||||
// Now with a numeric field
|
||||
// But if we need to calculate domains anyway, it probably makes sense to calculate most stats in the 2nd phase (along with sub-facets)
|
||||
creates = DebugAgg.Acc.creates.get();
|
||||
resets = DebugAgg.Acc.resets.get();
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f1:{terms:{${terms_method} field:${num_d}, limit:-1, facet:{ x:'debug()' , y:{terms:${where_s}} } }}}"
|
||||
)
|
||||
, "facets=="
|
||||
);
|
||||
|
||||
assertEquals(1, DebugAgg.Acc.creates.get() - creates);
|
||||
assertTrue( DebugAgg.Acc.resets.get() - resets >=4);
|
||||
assertTrue( DebugAgg.Acc.last.numSlots <= 2 ); // probably 1, but could be higher
|
||||
}
|
||||
//////////////////////////////////////////////////////////////// end phase testing
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue