Made sure `all_terms` works consistently. In some cases the `all_terms` option was ignored: * Faceting on number based fields. * The `execution_type` was set to `map`. * In the case the `fields` option was used.

Closes #2861
This commit is contained in:
Martijn van Groningen 2013-04-05 14:25:09 +02:00
parent 831ea789aa
commit 9b5c74d43e
7 changed files with 266 additions and 136 deletions

View File

@ -145,6 +145,11 @@ public class TermsFacetParser extends AbstractComponent implements FacetParser {
return new IndexNameFacetExecutor(context.shardTarget().index(), comparatorType, size); return new IndexNameFacetExecutor(context.shardTarget().index(), comparatorType, size);
} }
if (fieldsNames != null && fieldsNames.length == 1) {
field = fieldsNames[0];
fieldsNames = null;
}
Pattern pattern = null; Pattern pattern = null;
if (regex != null) { if (regex != null) {
pattern = Regex.compile(regex, regexFlags); pattern = Regex.compile(regex, regexFlags);

View File

@ -31,6 +31,7 @@ import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.fielddata.DoubleValues; import org.elasticsearch.index.fielddata.DoubleValues;
import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.script.SearchScript; import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.DoubleFacetAggregatorBase; import org.elasticsearch.search.facet.DoubleFacetAggregatorBase;
import org.elasticsearch.search.facet.FacetExecutor; import org.elasticsearch.search.facet.FacetExecutor;
@ -68,17 +69,42 @@ public class TermsDoubleFacetExecutor extends FacetExecutor {
this.facets = CacheRecycler.popDoubleIntMap(); this.facets = CacheRecycler.popDoubleIntMap();
// TODO: we need to support this with the new field data.... if (allTerms) {
// if (allTerms) { for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
// try { int maxDoc = readerContext.reader().maxDoc();
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { DoubleValues values = indexFieldData.load(readerContext).getDoubleValues();
// DoubleFieldData fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); if (values instanceof DoubleValues.WithOrdinals) {
// fieldData.forEachValue(aggregator); DoubleValues.WithOrdinals valuesWithOrds = (DoubleValues.WithOrdinals) values;
// } Ordinals.Docs ordinals = valuesWithOrds.ordinals();
// } catch (Exception e) { for (int ord = 1; ord < ordinals.getMaxOrd(); ord++) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e); facets.putIfAbsent(valuesWithOrds.getValueByOrd(ord), 0);
// } }
// } } else {
// Shouldn't be true, otherwise it is WithOrdinals... just to be sure...
if (values.isMultiValued()) {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
DoubleValues.Iter iter = values.getIter(docId);
while (iter.hasNext()) {
facets.putIfAbsent(iter.next(), 0);
}
}
} else {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
double value = values.getValue(docId);
facets.putIfAbsent(value, 0);
}
}
}
}
}
} }
@Override @Override

View File

@ -31,6 +31,7 @@ import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.LongValues; import org.elasticsearch.index.fielddata.LongValues;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.script.SearchScript; import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.FacetExecutor; import org.elasticsearch.search.facet.FacetExecutor;
import org.elasticsearch.search.facet.InternalFacet; import org.elasticsearch.search.facet.InternalFacet;
@ -65,20 +66,44 @@ public class TermsLongFacetExecutor extends FacetExecutor {
this.comparatorType = comparatorType; this.comparatorType = comparatorType;
this.script = script; this.script = script;
this.excluded = excluded; this.excluded = excluded;
this.facets = CacheRecycler.popLongIntMap(); this.facets = CacheRecycler.popLongIntMap();
// TODO: we need to support this with the new field data.... if (allTerms) {
// if (allTerms) { for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
// try { int maxDoc = readerContext.reader().maxDoc();
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { LongValues values = indexFieldData.load(readerContext).getLongValues();
// LongFieldData fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); if (values instanceof LongValues.WithOrdinals) {
// fieldData.forEachValue(aggregator); LongValues.WithOrdinals valuesWithOrds = (LongValues.WithOrdinals) values;
// } Ordinals.Docs ordinals = valuesWithOrds.ordinals();
// } catch (Exception e) { for (int ord = 1; ord < ordinals.getMaxOrd(); ord++) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e); facets.putIfAbsent(valuesWithOrds.getValueByOrd(ord), 0);
// } }
// } } else {
// Shouldn't be true, otherwise it is WithOrdinals... just to be sure...
if (values.isMultiValued()) {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
LongValues.Iter iter = values.getIter(docId);
while (iter.hasNext()) {
facets.putIfAbsent(iter.next(), 0);
}
}
} else {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
long value = values.getValue(docId);
facets.putIfAbsent(value, 0);
}
}
}
}
}
} }
@Override @Override

View File

@ -19,14 +19,10 @@
package org.elasticsearch.search.facet.terms.strings; package org.elasticsearch.search.facet.terms.strings;
import java.io.IOException; import com.google.common.collect.ImmutableSet;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.fielddata.BytesValues; import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldMapper;
@ -34,12 +30,10 @@ import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.FacetExecutor; import org.elasticsearch.search.facet.FacetExecutor;
import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.InternalFacet; import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.strings.HashedAggregator.BytesRefCountIterator;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import com.google.common.collect.ImmutableList; import java.io.IOException;
import com.google.common.collect.ImmutableSet; import java.util.regex.Pattern;
/** /**
* *
@ -54,7 +48,6 @@ public class FieldsTermsStringFacetExecutor extends FacetExecutor {
long missing; long missing;
long total; long total;
public FieldsTermsStringFacetExecutor(String facetName, String[] fieldsNames, int size, InternalStringTermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, public FieldsTermsStringFacetExecutor(String facetName, String[] fieldsNames, int size, InternalStringTermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) { ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) {
this.size = size; this.size = size;
@ -74,19 +67,11 @@ public class FieldsTermsStringFacetExecutor extends FacetExecutor {
aggregator = new HashedScriptAggregator(excluded, pattern, script); aggregator = new HashedScriptAggregator(excluded, pattern, script);
} }
// TODO: we need to support this flag with the new field data... if (allTerms) {
// if (allTerms) { for (int i = 0; i < fieldsNames.length; i++) {
// try { TermsStringFacetExecutor.loadAllTerms(context, indexFieldDatas[i], aggregator);
// for (int i = 0; i < fieldsNames.length; i++) { }
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { }
// FieldData fieldData = fieldDataCache.cache(fieldsDataType[i], readerContext.reader(), indexFieldsNames[i]);
// fieldData.forEachValue(aggregator);
// }
// }
// } catch (Exception e) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
// }
// }
} }
@Override @Override

View File

@ -18,8 +18,7 @@
*/ */
package org.elasticsearch.search.facet.terms.strings; package org.elasticsearch.search.facet.terms.strings;
import java.util.Arrays; import com.google.common.collect.ImmutableList;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash;
@ -30,7 +29,7 @@ import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.TermsFacet; import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import com.google.common.collect.ImmutableList; import java.util.Arrays;
public class HashedAggregator { public class HashedAggregator {
private int missing; private int missing;
@ -58,6 +57,16 @@ public class HashedAggregator {
} }
} }
public void addValue(BytesRef value, int hashCode) {
int key = hash.add(value, hashCode);
if (key < 0) {
key = ((-key) - 1);
}
if (key >= counts.length) {
counts = ArrayUtil.grow(counts, key + 1);
}
}
protected BytesRef makesSafe(BytesRef ref, BytesValues values) { protected BytesRef makesSafe(BytesRef ref, BytesValues values) {
return values.makeSafe(ref); return values.makeSafe(ref);
} }
@ -94,6 +103,7 @@ public class HashedAggregator {
private final int size; private final int size;
private int current = 0; private int current = 0;
private int currentCount = -1; private int currentCount = -1;
BytesRefCountIterator() { BytesRefCountIterator() {
this.size = hash.size(); this.size = hash.size();
} }

View File

@ -19,32 +19,21 @@
package org.elasticsearch.search.facet.terms.strings; package org.elasticsearch.search.facet.terms.strings;
import gnu.trove.iterator.TObjectIntIterator; import com.google.common.collect.ImmutableSet;
import gnu.trove.map.hash.TObjectIntHashMap;
import java.io.IOException;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.BytesValues; import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.script.SearchScript; import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.FacetExecutor; import org.elasticsearch.search.facet.FacetExecutor;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.InternalFacet; import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.TermsFacet; import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.strings.HashedAggregator.BytesRefCountIterator;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import com.google.common.collect.ImmutableList; import java.io.IOException;
import com.google.common.collect.ImmutableSet; import java.util.regex.Pattern;
/** /**
* *
@ -76,6 +65,10 @@ public class TermsStringFacetExecutor extends FacetExecutor {
} else { } else {
aggregator = new HashedScriptAggregator(excluded, pattern, script); aggregator = new HashedScriptAggregator(excluded, pattern, script);
} }
if (allTerms) {
loadAllTerms(context, indexFieldData, aggregator);
}
} }
@Override @Override
@ -126,4 +119,44 @@ public class TermsStringFacetExecutor extends FacetExecutor {
} }
} }
static void loadAllTerms(SearchContext context, IndexFieldData indexFieldData, HashedAggregator aggregator) {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
int maxDoc = readerContext.reader().maxDoc();
if (indexFieldData instanceof IndexFieldData.WithOrdinals) {
BytesValues.WithOrdinals values = ((IndexFieldData.WithOrdinals) indexFieldData).load(readerContext).getBytesValues();
Ordinals.Docs ordinals = values.ordinals();
// 0 = docs with no value for field, so start from 1 instead
for (int ord = 1; ord < ordinals.getMaxOrd(); ord++) {
BytesRef value = values.getValueByOrd(ord);
aggregator.addValue(value, value.hashCode());
}
} else {
BytesValues values = indexFieldData.load(readerContext).getBytesValues();
// Shouldn't be true, otherwise it is WithOrdinals... just to be sure...
if (values.isMultiValued()) {
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
BytesValues.Iter iter = values.getIter(docId);
while (iter.hasNext()) {
aggregator.addValue(iter.next(), iter.hash());
}
}
} else {
BytesRef spare = new BytesRef();
for (int docId = 0; docId < maxDoc; docId++) {
if (!values.hasValue(docId)) {
continue;
}
int hash = values.getValueHashed(docId, spare);
aggregator.addValue(spare, hash);
}
}
}
}
}
} }

View File

@ -1061,21 +1061,67 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.getEntries().get(3).getTerm().string(), anyOf(equalTo("zzz"), equalTo("xxx"))); assertThat(facet.getEntries().get(3).getTerm().string(), anyOf(equalTo("zzz"), equalTo("xxx")));
assertThat(facet.getEntries().get(3).getCount(), equalTo(1)); assertThat(facet.getEntries().get(3).getCount(), equalTo(1));
// TODO: support allTerms with the new field data searchResponse = client.prepareSearch()
// searchResponse = client.prepareSearch() .setQuery(termQuery("xxx", "yyy")) // don't match anything
// .setQuery(termQuery("xxx", "yyy")) // don't match anything .addFacet(termsFacet("facet1").field("tag").size(10).allTerms(true).executionHint(executionHint))
// .addFacet(termsFacet("facet1").field("tag").size(10).allTerms(true).executionHint(executionHint)) .execute().actionGet();
// .execute().actionGet();
// facet = searchResponse.getFacets().facet("facet1");
// facet = searchResponse.facets().facet("facet1"); assertThat(facet.getName(), equalTo("facet1"));
// assertThat(facet.getName(), equalTo("facet1")); assertThat(facet.getEntries().size(), equalTo(3));
// assertThat(facet.getEntries().size(), equalTo(3)); assertThat(facet.getEntries().get(0).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
// assertThat(facet.getEntries().get(0).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"))); assertThat(facet.getEntries().get(0).getCount(), equalTo(0));
// assertThat(facet.getEntries().get(0).getCount(), equalTo(0)); assertThat(facet.getEntries().get(1).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
// assertThat(facet.getEntries().get(1).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"))); assertThat(facet.getEntries().get(1).getCount(), equalTo(0));
// assertThat(facet.getEntries().get(1).getCount(), equalTo(0)); assertThat(facet.getEntries().get(2).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
// assertThat(facet.getEntries().get(2).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"))); assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
// assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
searchResponse = client.prepareSearch()
.setQuery(termQuery("xxx", "yyy")) // don't match anything
.addFacet(termsFacet("facet1").fields("tag", "stag").size(10).allTerms(true).executionHint(executionHint))
.execute().actionGet();
facet = searchResponse.getFacets().facet("facet1");
assertThat(facet.getName(), equalTo("facet1"));
assertThat(facet.getEntries().size(), equalTo(4));
assertThat(facet.getEntries().get(0).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"), equalTo("111")));
assertThat(facet.getEntries().get(0).getCount(), equalTo(0));
assertThat(facet.getEntries().get(1).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"), equalTo("111")));
assertThat(facet.getEntries().get(1).getCount(), equalTo(0));
assertThat(facet.getEntries().get(2).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"), equalTo("111")));
assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
assertThat(facet.getEntries().get(3).getTerm().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"), equalTo("111")));
assertThat(facet.getEntries().get(3).getCount(), equalTo(0));
searchResponse = client.prepareSearch()
.setQuery(termQuery("xxx", "yyy")) // don't match anything
.addFacet(termsFacet("facet1").field("ltag").size(10).allTerms(true).executionHint(executionHint))
.execute().actionGet();
facet = searchResponse.getFacets().facet("facet1");
assertThat(facet.getName(), equalTo("facet1"));
assertThat(facet.getEntries().size(), equalTo(3));
assertThat(facet.getEntries().get(0).getTermAsNumber().intValue(), anyOf(equalTo(1000), equalTo(2000), equalTo(3000)));
assertThat(facet.getEntries().get(0).getCount(), equalTo(0));
assertThat(facet.getEntries().get(1).getTermAsNumber().intValue(), anyOf(equalTo(1000), equalTo(2000), equalTo(3000)));
assertThat(facet.getEntries().get(1).getCount(), equalTo(0));
assertThat(facet.getEntries().get(2).getTermAsNumber().intValue(), anyOf(equalTo(1000), equalTo(2000), equalTo(3000)));
assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
searchResponse = client.prepareSearch()
.setQuery(termQuery("xxx", "yyy")) // don't match anything
.addFacet(termsFacet("facet1").field("dtag").size(10).allTerms(true).executionHint(executionHint))
.execute().actionGet();
facet = searchResponse.getFacets().facet("facet1");
assertThat(facet.getName(), equalTo("facet1"));
assertThat(facet.getEntries().size(), equalTo(3));
assertThat(facet.getEntries().get(0).getTermAsNumber().doubleValue(), anyOf(equalTo(1000.1), equalTo(2000.1), equalTo(3000.1)));
assertThat(facet.getEntries().get(0).getCount(), equalTo(0));
assertThat(facet.getEntries().get(1).getTermAsNumber().doubleValue(), anyOf(equalTo(1000.1), equalTo(2000.1), equalTo(3000.1)));
assertThat(facet.getEntries().get(1).getCount(), equalTo(0));
assertThat(facet.getEntries().get(2).getTermAsNumber().doubleValue(), anyOf(equalTo(1000.1), equalTo(2000.1), equalTo(3000.1)));
assertThat(facet.getEntries().get(2).getCount(), equalTo(0));
// Script Field // Script Field