Search: Optimize (perf) execution of global facets, closes #889.

This commit is contained in:
kimchy 2011-04-28 17:20:31 +03:00
parent 0ab8d1f414
commit 2c0bb9199a
8 changed files with 325 additions and 19 deletions

View File

@ -0,0 +1,153 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.benchmark.search.facet;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.SizeValue;
import org.elasticsearch.common.util.concurrent.jsr166y.ThreadLocalRandom;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.node.Node;
import org.elasticsearch.search.facet.FacetBuilders;
import static org.elasticsearch.client.Requests.*;
import static org.elasticsearch.cluster.metadata.IndexMetaData.*;
import static org.elasticsearch.common.settings.ImmutableSettings.*;
import static org.elasticsearch.common.xcontent.XContentFactory.*;
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
import static org.elasticsearch.node.NodeBuilder.*;
public class QueryFilterFacetSearchBenchmark {
static long COUNT = SizeValue.parseSizeValue("1m").singles();
static int BATCH = 100;
static int QUERY_COUNT = 200;
static int NUMBER_OF_TERMS = 200;
static Client client;
public static void main(String[] args) throws Exception {
Settings settings = settingsBuilder()
.put("index.refresh_interval", "-1")
.put("gateway.type", "local")
.put(SETTING_NUMBER_OF_SHARDS, 2)
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.build();
Node node1 = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "node1")).node();
Node node2 = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "node2")).node();
Node clientNode = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "client")).client(true).node();
client = clientNode.client();
long[] lValues = new long[NUMBER_OF_TERMS];
for (int i = 0; i < NUMBER_OF_TERMS; i++) {
lValues[i] = ThreadLocalRandom.current().nextLong();
}
Thread.sleep(10000);
try {
client.admin().indices().create(createIndexRequest("test")).actionGet();
StopWatch stopWatch = new StopWatch().start();
System.out.println("--> Indexing [" + COUNT + "] ...");
long ITERS = COUNT / BATCH;
long i = 1;
int counter = 0;
for (; i <= ITERS; i++) {
BulkRequestBuilder request = client.prepareBulk();
for (int j = 0; j < BATCH; j++) {
counter++;
XContentBuilder builder = jsonBuilder().startObject();
builder.field("id", Integer.toString(counter));
builder.field("l_value", lValues[counter % lValues.length]);
builder.endObject();
request.add(Requests.indexRequest("test").type("type1").id(Integer.toString(counter))
.source(builder));
}
BulkResponse response = request.execute().actionGet();
if (response.hasFailures()) {
System.err.println("--> failures...");
}
if (((i * BATCH) % 10000) == 0) {
System.out.println("--> Indexed " + (i * BATCH) + " took " + stopWatch.stop().lastTaskTime());
stopWatch.start();
}
}
System.out.println("--> Indexing took " + stopWatch.totalTime() + ", TPS " + (((double) (COUNT)) / stopWatch.totalTime().secondsFrac()));
} catch (Exception e) {
System.out.println("--> Index already exists, ignoring indexing phase, waiting for green");
ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet();
if (clusterHealthResponse.timedOut()) {
System.err.println("--> Timed out waiting for cluster health");
}
}
client.admin().indices().prepareRefresh().execute().actionGet();
COUNT = client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().count();
System.out.println("--> Number of docs in index: " + COUNT);
long totalQueryTime = 0;
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(termQuery("l_value", lValues[0]))
.execute().actionGet();
totalQueryTime += searchResponse.tookInMillis();
}
System.out.println("--> Simple Query on first l_value " + (totalQueryTime / QUERY_COUNT) + "ms");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(termQuery("l_value", lValues[0]))
.addFacet(FacetBuilders.queryFacet("query").query(termQuery("l_value", lValues[0])))
.execute().actionGet();
totalQueryTime += searchResponse.tookInMillis();
}
System.out.println("--> Query facet first l_value " + (totalQueryTime / QUERY_COUNT) + "ms");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(termQuery("l_value", lValues[0]))
.addFacet(FacetBuilders.queryFacet("query").query(termQuery("l_value", lValues[0])).global(true))
.execute().actionGet();
totalQueryTime += searchResponse.tookInMillis();
}
System.out.println("--> Query facet first l_value (global) " + (totalQueryTime / QUERY_COUNT) + "ms");
}
}

View File

@ -44,6 +44,10 @@ public abstract class AbstractFacetCollector extends FacetCollector {
this.facetName = facetName;
}
public Filter getFilter() {
return this.filter;
}
@Override public void setFilter(Filter filter) {
if (this.filter == null) {
this.filter = filter;

View File

@ -19,13 +19,13 @@
package org.elasticsearch.search.facet;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.*;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.collect.ImmutableList;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.NoopCollector;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchPhase;
@ -67,22 +67,51 @@ public class FacetPhase implements SearchPhase {
return;
}
// run global facets ...
if (context.searcher().hasCollectors(ContextIndexSearcher.Scopes.GLOBAL)) {
Query query = Queries.MATCH_ALL_QUERY;
if (context.types().length > 0) {
query = new FilteredQuery(query, context.filterCache().cache(context.mapperService().typesFilter(context.types())));
}
// optimize global facet execution, based on filters (don't iterate over all docs), and check
// if we have special facets that can be optimized for all execution, do it
List<Collector> collectors = context.searcher().removeCollectors(ContextIndexSearcher.Scopes.GLOBAL);
context.searcher().processingScope(ContextIndexSearcher.Scopes.GLOBAL);
try {
context.searcher().search(query, NoopCollector.NOOP_COLLECTOR);
} catch (IOException e) {
throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e);
} finally {
context.searcher().processedScope();
if (collectors != null && !collectors.isEmpty()) {
Map<Filter, List<Collector>> filtersByCollector = Maps.newHashMap();
for (Collector collector : collectors) {
if (collector instanceof OptimizeGlobalFacetCollector) {
try {
((OptimizeGlobalFacetCollector) collector).optimizedGlobalExecution(context);
} catch (IOException e) {
throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e);
}
} else {
Filter filter = Queries.MATCH_ALL_FILTER;
if (collector instanceof AbstractFacetCollector) {
AbstractFacetCollector facetCollector = (AbstractFacetCollector) collector;
if (facetCollector.getFilter() != null) {
filter = facetCollector.getFilter();
}
}
List<Collector> list = filtersByCollector.get(filter);
if (list == null) {
list = ImmutableList.of(collector);
filtersByCollector.put(filter, list);
} else {
list.add(collector);
}
}
}
// now, go and execute the filters->collector ones
for (Map.Entry<Filter, List<Collector>> entry : filtersByCollector.entrySet()) {
Filter filter = entry.getKey();
Query query = new DeletionAwareConstantScoreQuery(filter);
if (context.types().length > 0) {
query = new FilteredQuery(query, context.filterCache().cache(context.mapperService().typesFilter(context.types())));
}
try {
context.searcher().search(query, MultiCollector.wrap(entry.getValue().toArray(new Collector[entry.getValue().size()])));
} catch (IOException e) {
throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e);
}
}
}
SearchContextFacets contextFacets = context.facets();
List<Facet> facets = Lists.newArrayListWithCapacity(2);

View File

@ -0,0 +1,29 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
public interface OptimizeGlobalFacetCollector {
void optimizedGlobalExecution(SearchContext searchContext) throws IOException;
}

View File

@ -20,19 +20,21 @@
package org.elasticsearch.search.facet.filter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.*;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.DocSets;
import org.elasticsearch.index.cache.filter.FilterCache;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.OptimizeGlobalFacetCollector;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
/**
* @author kimchy (shay.banon)
*/
public class FilterFacetCollector extends AbstractFacetCollector {
public class FilterFacetCollector extends AbstractFacetCollector implements OptimizeGlobalFacetCollector {
private final Filter filter;
@ -45,6 +47,19 @@ public class FilterFacetCollector extends AbstractFacetCollector {
this.filter = filter;
}
@Override public void optimizedGlobalExecution(SearchContext searchContext) throws IOException {
Query query = new DeletionAwareConstantScoreQuery(filter);
if (super.filter != null) {
query = new FilteredQuery(query, super.filter);
}
if (searchContext.types().length > 0) {
query = new FilteredQuery(query, searchContext.filterCache().cache(searchContext.mapperService().typesFilter(searchContext.types())));
}
TotalHitCountCollector collector = new TotalHitCountCollector();
searchContext.searcher().search(query, collector);
count = collector.getTotalHits();
}
@Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException {
docSet = DocSets.convert(reader, filter.getDocIdSet(reader));
}

View File

@ -27,13 +27,17 @@ import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.index.cache.filter.FilterCache;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.OptimizeGlobalFacetCollector;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
/**
* @author kimchy (shay.banon)
*/
public class QueryFacetCollector extends AbstractFacetCollector {
public class QueryFacetCollector extends AbstractFacetCollector implements OptimizeGlobalFacetCollector {
private final Query query;
private final Filter filter;
@ -43,6 +47,7 @@ public class QueryFacetCollector extends AbstractFacetCollector {
public QueryFacetCollector(String facetName, Query query, FilterCache filterCache) {
super(facetName);
this.query = query;
Filter possibleFilter = extractFilterIfApplicable(query);
if (possibleFilter != null) {
this.filter = possibleFilter;
@ -61,6 +66,19 @@ public class QueryFacetCollector extends AbstractFacetCollector {
}
}
@Override public void optimizedGlobalExecution(SearchContext searchContext) throws IOException {
Query query = this.query;
if (super.filter != null) {
query = new FilteredQuery(query, super.filter);
}
if (searchContext.types().length > 0) {
query = new FilteredQuery(query, searchContext.filterCache().cache(searchContext.mapperService().typesFilter(searchContext.types())));
}
TotalHitCountCollector collector = new TotalHitCountCollector();
searchContext.searcher().search(query, collector);
count = collector.getTotalHits();
}
@Override public Facet facet() {
return new InternalQueryFacet(facetName, count);
}

View File

@ -77,6 +77,10 @@ public class ContextIndexSearcher extends ExtendedIndexSearcher {
collectors.add(collector);
}
public List<Collector> removeCollectors(String scope) {
return scopeCollectors.remove(scope);
}
public boolean hasCollectors(String scope) {
if (scopeCollectors == null) {
return false;

View File

@ -32,6 +32,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.search.facet.datehistogram.DateHistogramFacet;
import org.elasticsearch.search.facet.filter.FilterFacet;
import org.elasticsearch.search.facet.histogram.HistogramFacet;
import org.elasticsearch.search.facet.query.QueryFacet;
import org.elasticsearch.search.facet.range.RangeFacet;
import org.elasticsearch.search.facet.statistical.StatisticalFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
@ -566,6 +567,18 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.entries().get(0).term(), equalTo("111"));
assertThat(facet.entries().get(0).count(), equalTo(1));
// now with global
searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(termsFacet("facet1").field("stag").size(10).facetFilter(termFilter("tag", "xxx")).global(true).executionHint(executionHint))
.execute().actionGet();
facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(1));
assertThat(facet.entries().get(0).term(), equalTo("111"));
assertThat(facet.entries().get(0).count(), equalTo(1));
// Test Facet Filter (with a type)
searchResponse = client.prepareSearch()
@ -1722,6 +1735,47 @@ public class SimpleFacetsTests extends AbstractNodesTests {
}
}
@Test public void testQueryFacet() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").execute().actionGet();
client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet();
for (int i = 0; i < 20; i++) {
client.prepareIndex("test", "type1", Integer.toString(i)).setSource("num", i % 10).execute().actionGet();
}
client.admin().indices().prepareRefresh().execute().actionGet();
for (int i = 0; i < numberOfRuns(); i++) {
SearchResponse searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(queryFacet("query").query(termQuery("num", 1)))
.execute().actionGet();
QueryFacet facet = searchResponse.facets().facet("query");
assertThat(facet.count(), equalTo(2l));
searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(queryFacet("query").query(termQuery("num", 1)).global(true))
.execute().actionGet();
facet = searchResponse.facets().facet("query");
assertThat(facet.count(), equalTo(2l));
searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(queryFacet("query").query(termsQuery("num", new long[]{1, 2})).facetFilter(termFilter("num", 1)).global(true))
.execute().actionGet();
facet = searchResponse.facets().facet("query");
assertThat(facet.count(), equalTo(2l));
}
}
private long utcTimeInMillis(String time) {
return timeInMillis(time, DateTimeZone.UTC);
}