Made sure `all_terms` works consistently. In some cases the `all_terms` option was ignored: * Faceting on number based fields. * The `execution_type` was set to `map`. * In the case the `fields` option was used.

Closes #2861
This commit is contained in:
Martijn van Groningen 2013-04-05 14:25:09 +02:00
parent 831ea789aa
commit 9b5c74d43e
7 changed files with 266 additions and 136 deletions

View File

@ -145,6 +145,11 @@ public class TermsFacetParser extends AbstractComponent implements FacetParser {
return new IndexNameFacetExecutor(context.shardTarget().index(), comparatorType, size);
}
if (fieldsNames != null && fieldsNames.length == 1) {
field = fieldsNames[0];
fieldsNames = null;
}
Pattern pattern = null;
if (regex != null) {
pattern = Regex.compile(regex, regexFlags);

View File

@ -31,6 +31,7 @@ import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.fielddata.DoubleValues;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.DoubleFacetAggregatorBase;
import org.elasticsearch.search.facet.FacetExecutor;
@ -68,17 +69,42 @@ public class TermsDoubleFacetExecutor extends FacetExecutor {
this.facets = CacheRecycler.popDoubleIntMap();
// TODO: we need to support this with the new field data....
// if (allTerms) {
// try {
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
// DoubleFieldData fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
// fieldData.forEachValue(aggregator);
// }
// } catch (Exception e) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
// }
// }
if (allTerms) {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
int maxDoc = readerContext.reader().maxDoc();
DoubleValues values = indexFieldData.load(readerContext).getDoubleValues();
if (values instanceof DoubleValues.WithOrdinals) {
DoubleValues.WithOrdinals valuesWithOrds = (DoubleValues.WithOrdinals) values;
Ordinals.Docs ordinals = valuesWithOrds.ordinals();
for (int ord = 1; ord < ordinals.getMaxOrd(); ord++) {
facets.putIfAbsent(valuesWithOrds.getValueByOrd(ord), 0);
}
} else {
// Shouldn't be true, otherwise it is WithOrdinals... just to be sure...
if (values.isMultiValued()) {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
DoubleValues.Iter iter = values.getIter(docId);
while (iter.hasNext()) {
facets.putIfAbsent(iter.next(), 0);
}
}
} else {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
double value = values.getValue(docId);
facets.putIfAbsent(value, 0);
}
}
}
}
}
}
@Override

View File

@ -31,6 +31,7 @@ import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.LongValues;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.FacetExecutor;
import org.elasticsearch.search.facet.InternalFacet;
@ -65,20 +66,44 @@ public class TermsLongFacetExecutor extends FacetExecutor {
this.comparatorType = comparatorType;
this.script = script;
this.excluded = excluded;
this.facets = CacheRecycler.popLongIntMap();
// TODO: we need to support this with the new field data....
// if (allTerms) {
// try {
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
// LongFieldData fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
// fieldData.forEachValue(aggregator);
// }
// } catch (Exception e) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
// }
// }
if (allTerms) {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
int maxDoc = readerContext.reader().maxDoc();
LongValues values = indexFieldData.load(readerContext).getLongValues();
if (values instanceof LongValues.WithOrdinals) {
LongValues.WithOrdinals valuesWithOrds = (LongValues.WithOrdinals) values;
Ordinals.Docs ordinals = valuesWithOrds.ordinals();
for (int ord = 1; ord < ordinals.getMaxOrd(); ord++) {
facets.putIfAbsent(valuesWithOrds.getValueByOrd(ord), 0);
}
} else {
// Shouldn't be true, otherwise it is WithOrdinals... just to be sure...
if (values.isMultiValued()) {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
LongValues.Iter iter = values.getIter(docId);
while (iter.hasNext()) {
facets.putIfAbsent(iter.next(), 0);
}
}
} else {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
long value = values.getValue(docId);
facets.putIfAbsent(value, 0);
}
}
}
}
}
}
@Override

View File

@ -19,14 +19,10 @@
package org.elasticsearch.search.facet.terms.strings;
import java.io.IOException;
import java.util.Arrays;
import java.util.regex.Pattern;
import com.google.common.collect.ImmutableSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
@ -34,12 +30,10 @@ import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.FacetExecutor;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.strings.HashedAggregator.BytesRefCountIterator;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import java.io.IOException;
import java.util.regex.Pattern;
/**
*
@ -54,7 +48,6 @@ public class FieldsTermsStringFacetExecutor extends FacetExecutor {
long missing;
long total;
public FieldsTermsStringFacetExecutor(String facetName, String[] fieldsNames, int size, InternalStringTermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) {
this.size = size;
@ -74,19 +67,11 @@ public class FieldsTermsStringFacetExecutor extends FacetExecutor {
aggregator = new HashedScriptAggregator(excluded, pattern, script);
}
// TODO: we need to support this flag with the new field data...
// if (allTerms) {
// try {
// for (int i = 0; i < fieldsNames.length; i++) {
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
// FieldData fieldData = fieldDataCache.cache(fieldsDataType[i], readerContext.reader(), indexFieldsNames[i]);
// fieldData.forEachValue(aggregator);
// }
// }
// } catch (Exception e) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
// }
// }
if (allTerms) {
for (int i = 0; i < fieldsNames.length; i++) {
TermsStringFacetExecutor.loadAllTerms(context, indexFieldDatas[i], aggregator);
}
}
}
@Override

View File

@ -18,8 +18,7 @@
*/
package org.elasticsearch.search.facet.terms.strings;
import java.util.Arrays;
import com.google.common.collect.ImmutableList;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
@ -30,7 +29,7 @@ import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import com.google.common.collect.ImmutableList;
import java.util.Arrays;
public class HashedAggregator {
private int missing;
@ -49,7 +48,7 @@ public class HashedAggregator {
public void onDoc(int docId, BytesValues values) {
if (values.hasValue(docId)) {
final Iter iter = values.getIter(docId);
while(iter.hasNext()) {
while (iter.hasNext()) {
onValue(docId, iter.next(), iter.hash(), values);
total++;
}
@ -58,6 +57,16 @@ public class HashedAggregator {
}
}
public void addValue(BytesRef value, int hashCode) {
int key = hash.add(value, hashCode);
if (key < 0) {
key = ((-key) - 1);
}
if (key >= counts.length) {
counts = ArrayUtil.grow(counts, key + 1);
}
}
protected BytesRef makesSafe(BytesRef ref, BytesValues values) {
return values.makeSafe(ref);
}
@ -65,7 +74,7 @@ public class HashedAggregator {
protected void onValue(int docId, BytesRef value, int hashCode, BytesValues values) {
int key = hash.add(value, hashCode);
if (key < 0) {
key = ((-key)-1);
key = ((-key) - 1);
} else if (key >= counts.length) {
counts = ArrayUtil.grow(counts, key + 1);
}
@ -94,6 +103,7 @@ public class HashedAggregator {
private final int size;
private int current = 0;
private int currentCount = -1;
BytesRefCountIterator() {
this.size = hash.size();
}
@ -121,7 +131,7 @@ public class HashedAggregator {
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
BytesRefCountIterator iter = aggregator.getIter();
BytesRef next = null;
while((next = iter.next()) != null) {
while ((next = iter.next()) != null) {
ordered.insertWithOverflow(new InternalStringTermsFacet.TermEntry(BytesRef.deepCopyOf(next), iter.count()));
// maybe we can survive with a 0-copy here if we keep the bytes ref hash around?
}
@ -134,7 +144,7 @@ public class HashedAggregator {
BoundedTreeSet<InternalStringTermsFacet.TermEntry> ordered = new BoundedTreeSet<InternalStringTermsFacet.TermEntry>(comparatorType.comparator(), size);
BytesRefCountIterator iter = aggregator.getIter();
BytesRef next = null;
while((next = iter.next()) != null) {
while ((next = iter.next()) != null) {
ordered.add(new InternalStringTermsFacet.TermEntry(BytesRef.deepCopyOf(next), iter.count()));
// maybe we can survive with a 0-copy here if we keep the bytes ref hash around?
}

View File

@ -19,32 +19,21 @@
package org.elasticsearch.search.facet.terms.strings;
import gnu.trove.iterator.TObjectIntIterator;
import gnu.trove.map.hash.TObjectIntHashMap;
import java.io.IOException;
import java.util.Arrays;
import java.util.regex.Pattern;
import com.google.common.collect.ImmutableSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.FacetExecutor;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.strings.HashedAggregator.BytesRefCountIterator;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import java.io.IOException;
import java.util.regex.Pattern;
/**
*
@ -76,6 +65,10 @@ public class TermsStringFacetExecutor extends FacetExecutor {
} else {
aggregator = new HashedScriptAggregator(excluded, pattern, script);
}
if (allTerms) {
loadAllTerms(context, indexFieldData, aggregator);
}
}
@Override
@ -126,4 +119,44 @@ public class TermsStringFacetExecutor extends FacetExecutor {
}
}
static void loadAllTerms(SearchContext context, IndexFieldData indexFieldData, HashedAggregator aggregator) {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
int maxDoc = readerContext.reader().maxDoc();
if (indexFieldData instanceof IndexFieldData.WithOrdinals) {
BytesValues.WithOrdinals values = ((IndexFieldData.WithOrdinals) indexFieldData).load(readerContext).getBytesValues();
Ordinals.Docs ordinals = values.ordinals();
// 0 = docs with no value for field, so start from 1 instead
for (int ord = 1; ord < ordinals.getMaxOrd(); ord++) {
BytesRef value = values.getValueByOrd(ord);
aggregator.addValue(value, value.hashCode());
}
} else {
BytesValues values = indexFieldData.load(readerContext).getBytesValues();
// Shouldn't be true, otherwise it is WithOrdinals... just to be sure...
if (values.isMultiValued()) {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
BytesValues.Iter iter = values.getIter(docId);
while (iter.hasNext()) {
aggregator.addValue(iter.next(), iter.hash());
}
}
} else {
BytesRef spare = new BytesRef();
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
int hash = values.getValueHashed(docId, spare);
aggregator.addValue(spare, hash);
}
}
}
}
}
}

View File

@ -1061,21 +1061,67 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.getEntries().get(3).getTerm().string(), anyOf(equalTo("zzz"), equalTo("xxx")));
assertThat(facet.getEntries().get(3).getCount(), equalTo(1));
// TODO: support allTerms with the new field data
// searchResponse = client.prepareSearch()
// .setQuery(termQuery("xxx", "yyy")) // don't match anything
// .addFacet(termsFacet("facet1").field("tag").size(10).allTerms(true).executionHint(executionHint))
// .execute().actionGet();
//
// facet = searchResponse.facets().facet("facet1");
// assertThat(facet.getName(), equalTo("facet1"));
// assertThat(facet.getEntries().size(), equalTo(3));
// assertThat(facet.getEntries().get(0).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
// assertThat(facet.getEntries().get(0).getCount(), equalTo(0));
// assertThat(facet.getEntries().get(1).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
// assertThat(facet.getEntries().get(1).getCount(), equalTo(0));
// assertThat(facet.getEntries().get(2).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
// assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
searchResponse = client.prepareSearch()
.setQuery(termQuery("xxx", "yyy")) // don't match anything
.addFacet(termsFacet("facet1").field("tag").size(10).allTerms(true).executionHint(executionHint))
.execute().actionGet();
facet = searchResponse.getFacets().facet("facet1");
assertThat(facet.getName(), equalTo("facet1"));
assertThat(facet.getEntries().size(), equalTo(3));
assertThat(facet.getEntries().get(0).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
assertThat(facet.getEntries().get(0).getCount(), equalTo(0));
assertThat(facet.getEntries().get(1).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
assertThat(facet.getEntries().get(1).getCount(), equalTo(0));
assertThat(facet.getEntries().get(2).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
searchResponse = client.prepareSearch()
.setQuery(termQuery("xxx", "yyy")) // don't match anything
.addFacet(termsFacet("facet1").fields("tag", "stag").size(10).allTerms(true).executionHint(executionHint))
.execute().actionGet();
facet = searchResponse.getFacets().facet("facet1");
assertThat(facet.getName(), equalTo("facet1"));
assertThat(facet.getEntries().size(), equalTo(4));
assertThat(facet.getEntries().get(0).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"), equalTo("111")));
assertThat(facet.getEntries().get(0).getCount(), equalTo(0));
assertThat(facet.getEntries().get(1).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"), equalTo("111")));
assertThat(facet.getEntries().get(1).getCount(), equalTo(0));
assertThat(facet.getEntries().get(2).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"), equalTo("111")));
assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
assertThat(facet.getEntries().get(3).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"), equalTo("111")));
assertThat(facet.getEntries().get(3).getCount(), equalTo(0));
searchResponse = client.prepareSearch()
.setQuery(termQuery("xxx", "yyy")) // don't match anything
.addFacet(termsFacet("facet1").field("ltag").size(10).allTerms(true).executionHint(executionHint))
.execute().actionGet();
facet = searchResponse.getFacets().facet("facet1");
assertThat(facet.getName(), equalTo("facet1"));
assertThat(facet.getEntries().size(), equalTo(3));
assertThat(facet.getEntries().get(0).getTermAsNumber().intValue(), anyOf(equalTo(1000), equalTo(2000), equalTo(3000)));
assertThat(facet.getEntries().get(0).getCount(), equalTo(0));
assertThat(facet.getEntries().get(1).getTermAsNumber().intValue(), anyOf(equalTo(1000), equalTo(2000), equalTo(3000)));
assertThat(facet.getEntries().get(1).getCount(), equalTo(0));
assertThat(facet.getEntries().get(2).getTermAsNumber().intValue(), anyOf(equalTo(1000), equalTo(2000), equalTo(3000)));
assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
searchResponse = client.prepareSearch()
.setQuery(termQuery("xxx", "yyy")) // don't match anything
.addFacet(termsFacet("facet1").field("dtag").size(10).allTerms(true).executionHint(executionHint))
.execute().actionGet();
facet = searchResponse.getFacets().facet("facet1");
assertThat(facet.getName(), equalTo("facet1"));
assertThat(facet.getEntries().size(), equalTo(3));
assertThat(facet.getEntries().get(0).getTermAsNumber().doubleValue(), anyOf(equalTo(1000.1), equalTo(2000.1), equalTo(3000.1)));
assertThat(facet.getEntries().get(0).getCount(), equalTo(0));
assertThat(facet.getEntries().get(1).getTermAsNumber().doubleValue(), anyOf(equalTo(1000.1), equalTo(2000.1), equalTo(3000.1)));
assertThat(facet.getEntries().get(1).getCount(), equalTo(0));
assertThat(facet.getEntries().get(2).getTermAsNumber().doubleValue(), anyOf(equalTo(1000.1), equalTo(2000.1), equalTo(3000.1)));
assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
// Script Field