refactor terms facet into string terms facets, paving the way for optimized ones for numeric values

This commit is contained in:
kimchy 2010-12-29 18:34:08 +02:00
parent e764f41341
commit c7fca8f0e0
9 changed files with 315 additions and 318 deletions

View File

@ -26,7 +26,7 @@ import org.elasticsearch.search.facet.histogram.InternalHistogramFacet;
import org.elasticsearch.search.facet.query.InternalQueryFacet; import org.elasticsearch.search.facet.query.InternalQueryFacet;
import org.elasticsearch.search.facet.range.InternalRangeFacet; import org.elasticsearch.search.facet.range.InternalRangeFacet;
import org.elasticsearch.search.facet.statistical.InternalStatisticalFacet; import org.elasticsearch.search.facet.statistical.InternalStatisticalFacet;
import org.elasticsearch.search.facet.terms.InternalTermsFacet; import org.elasticsearch.search.facet.terms.strings.InternalStringTermsFacet;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
@ -40,6 +40,6 @@ public class TransportFacetModule extends AbstractModule {
InternalFacet.Streams.registerStream(InternalHistogramFacet.STREAM, InternalHistogramFacet.TYPE); InternalFacet.Streams.registerStream(InternalHistogramFacet.STREAM, InternalHistogramFacet.TYPE);
InternalFacet.Streams.registerStream(InternalRangeFacet.STREAM, InternalRangeFacet.TYPE); InternalFacet.Streams.registerStream(InternalRangeFacet.STREAM, InternalRangeFacet.TYPE);
InternalFacet.Streams.registerStream(InternalStatisticalFacet.STREAM, InternalStatisticalFacet.TYPE); InternalFacet.Streams.registerStream(InternalStatisticalFacet.STREAM, InternalStatisticalFacet.TYPE);
InternalFacet.Streams.registerStream(InternalTermsFacet.STREAM, InternalTermsFacet.TYPE); InternalFacet.Streams.registerStream(InternalStringTermsFacet.STREAM, InternalStringTermsFacet.TYPE);
} }
} }

View File

@ -7,7 +7,7 @@
* "License"); you may not use this file except in compliance * "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at * with the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, * Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an * software distributed under the License is distributed on an
@ -19,152 +19,14 @@
package org.elasticsearch.search.facet.terms; package org.elasticsearch.search.facet.terms;
import org.elasticsearch.common.collect.ImmutableList;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.InternalFacet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List; import java.util.List;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class InternalTermsFacet implements InternalFacet, TermsFacet { public interface InternalTermsFacet extends TermsFacet {
public static Stream STREAM = new Stream() { Facet reduce(String name, List<Facet> facets);
@Override public Facet readFacet(String type, StreamInput in) throws IOException {
return readTermsFacet(in);
}
};
private String name;
private String fieldName;
int requiredSize;
Collection<Entry> entries = ImmutableList.of();
private ComparatorType comparatorType;
private InternalTermsFacet() {
}
public InternalTermsFacet(String name, String fieldName, ComparatorType comparatorType, int requiredSize, Collection<Entry> entries) {
this.name = name;
this.fieldName = fieldName;
this.comparatorType = comparatorType;
this.requiredSize = requiredSize;
this.entries = entries;
}
@Override public String name() {
return this.name;
}
@Override public String getName() {
return this.name;
}
@Override public String fieldName() {
return this.fieldName;
}
@Override public String getFieldName() {
return fieldName();
}
@Override public String type() {
return TYPE;
}
@Override public String getType() {
return type();
}
@Override public ComparatorType comparatorType() {
return comparatorType;
}
@Override public ComparatorType getComparatorType() {
return comparatorType();
}
@Override public List<Entry> entries() {
if (!(entries instanceof List)) {
entries = ImmutableList.copyOf(entries);
}
return (List<Entry>) entries;
}
@Override public List<Entry> getEntries() {
return entries();
}
@Override public Iterator<Entry> iterator() {
return entries.iterator();
}
static final class Fields {
static final XContentBuilderString _TYPE = new XContentBuilderString("_type");
static final XContentBuilderString _FIELD = new XContentBuilderString("_field");
static final XContentBuilderString TERMS = new XContentBuilderString("terms");
static final XContentBuilderString TERM = new XContentBuilderString("term");
static final XContentBuilderString COUNT = new XContentBuilderString("count");
}
@Override public void toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.field(Fields._TYPE, TermsFacet.TYPE);
builder.field(Fields._FIELD, fieldName);
builder.startArray(Fields.TERMS);
for (Entry entry : entries) {
builder.startObject();
builder.field(Fields.TERM, entry.term());
builder.field(Fields.COUNT, entry.count());
builder.endObject();
}
builder.endArray();
builder.endObject();
}
public static InternalTermsFacet readTermsFacet(StreamInput in) throws IOException {
InternalTermsFacet facet = new InternalTermsFacet();
facet.readFrom(in);
return facet;
}
@Override public void readFrom(StreamInput in) throws IOException {
name = in.readUTF();
fieldName = in.readUTF();
comparatorType = ComparatorType.fromId(in.readByte());
requiredSize = in.readVInt();
int size = in.readVInt();
entries = new ArrayList<Entry>(size);
for (int i = 0; i < size; i++) {
entries.add(new Entry(in.readUTF(), in.readVInt()));
}
}
@Override public void writeTo(StreamOutput out) throws IOException {
out.writeUTF(name);
out.writeUTF(fieldName);
out.writeByte(comparatorType.id());
out.writeVInt(requiredSize);
out.writeVInt(entries.size());
for (Entry entry : entries) {
out.writeUTF(entry.term());
out.writeVInt(entry.count());
}
}
} }

View File

@ -37,6 +37,17 @@ public interface TermsFacet extends Facet, Iterable<TermsFacet.Entry> {
*/ */
public static final String TYPE = "terms"; public static final String TYPE = "terms";
public interface Entry extends Comparable<Entry> {
String term();
String getTerm();
int count();
int getCount();
}
/** /**
* Controls how the terms facets are ordered. * Controls how the terms facets are ordered.
*/ */
@ -72,14 +83,7 @@ public interface TermsFacet extends Facet, Iterable<TermsFacet.Entry> {
TERM((byte) 2, new Comparator<Entry>() { TERM((byte) 2, new Comparator<Entry>() {
@Override public int compare(Entry o1, Entry o2) { @Override public int compare(Entry o1, Entry o2) {
int i = o1.term().compareTo(o2.term()); return o1.compareTo(o2);
if (i == 0) {
i = o1.count() - o2.count();
if (i == 0) {
i = System.identityHashCode(o1) - System.identityHashCode(o2);
}
}
return i;
} }
}), }),
/** /**
@ -136,33 +140,6 @@ public interface TermsFacet extends Facet, Iterable<TermsFacet.Entry> {
} }
} }
public class Entry {
private String term;
private int count;
public Entry(String term, int count) {
this.term = term;
this.count = count;
}
public String term() {
return term;
}
public String getTerm() {
return term;
}
public int count() {
return count;
}
public int getCount() {
return count();
}
}
/** /**
* The field name the terms were extracted from. * The field name the terms were extracted from.
*/ */
@ -186,10 +163,10 @@ public interface TermsFacet extends Facet, Iterable<TermsFacet.Entry> {
/** /**
* The terms and counts. * The terms and counts.
*/ */
List<Entry> entries(); List<? extends TermsFacet.Entry> entries();
/** /**
* The terms and counts. * The terms and counts.
*/ */
List<Entry> getEntries(); List<? extends TermsFacet.Entry> getEntries();
} }

View File

@ -19,21 +19,22 @@
package org.elasticsearch.search.facet.terms; package org.elasticsearch.search.facet.terms;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.collect.ImmutableSet; import org.elasticsearch.common.collect.ImmutableSet;
import org.elasticsearch.common.collect.Lists; import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.thread.ThreadLocals;
import org.elasticsearch.common.trove.TObjectIntHashMap;
import org.elasticsearch.common.trove.TObjectIntIterator;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetCollector; import org.elasticsearch.search.facet.FacetCollector;
import org.elasticsearch.search.facet.FacetProcessor; import org.elasticsearch.search.facet.FacetProcessor;
import org.elasticsearch.search.facet.InternalFacet; import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.index.IndexNameFacetCollector;
import org.elasticsearch.search.facet.terms.strings.FieldsTermsStringFacetCollector;
import org.elasticsearch.search.facet.terms.strings.InternalStringTermsFacet;
import org.elasticsearch.search.facet.terms.strings.ScriptTermsStringFieldFacetCollector;
import org.elasticsearch.search.facet.terms.strings.TermsStringFacetCollector;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException; import java.io.IOException;
@ -48,7 +49,7 @@ public class TermsFacetProcessor extends AbstractComponent implements FacetProce
@Inject public TermsFacetProcessor(Settings settings) { @Inject public TermsFacetProcessor(Settings settings) {
super(settings); super(settings);
InternalFacet.Streams.registerStream(InternalTermsFacet.STREAM, InternalTermsFacet.TYPE); InternalFacet.Streams.registerStream(InternalStringTermsFacet.STREAM, InternalStringTermsFacet.TYPE);
} }
@Override public String[] types() { @Override public String[] types() {
@ -121,41 +122,16 @@ public class TermsFacetProcessor extends AbstractComponent implements FacetProce
pattern = Regex.compile(regex, regexFlags); pattern = Regex.compile(regex, regexFlags);
} }
if (fieldsNames != null) { if (fieldsNames != null) {
return new TermsFieldsFacetCollector(facetName, fieldsNames, size, comparatorType, context, excluded, pattern, scriptLang, script, params); return new FieldsTermsStringFacetCollector(facetName, fieldsNames, size, comparatorType, context, excluded, pattern, scriptLang, script, params);
} }
if (field == null && fieldsNames == null && script != null) { if (field == null && fieldsNames == null && script != null) {
return new TermsScriptFieldFacetCollector(facetName, size, comparatorType, context, excluded, pattern, scriptLang, script, params); return new ScriptTermsStringFieldFacetCollector(facetName, size, comparatorType, context, excluded, pattern, scriptLang, script, params);
} }
return new TermsFacetCollector(facetName, field, size, comparatorType, context, excluded, pattern, scriptLang, script, params); return new TermsStringFacetCollector(facetName, field, size, comparatorType, context, excluded, pattern, scriptLang, script, params);
} }
@Override public Facet reduce(String name, List<Facet> facets) { @Override public Facet reduce(String name, List<Facet> facets) {
if (facets.size() == 1) {
return facets.get(0);
}
InternalTermsFacet first = (InternalTermsFacet) facets.get(0); InternalTermsFacet first = (InternalTermsFacet) facets.get(0);
TObjectIntHashMap<String> aggregated = aggregateCache.get().get(); return first.reduce(name, facets);
aggregated.clear();
for (Facet facet : facets) {
TermsFacet mFacet = (TermsFacet) facet;
for (TermsFacet.Entry entry : mFacet) {
aggregated.adjustOrPutValue(entry.term(), entry.count(), entry.count());
}
}
BoundedTreeSet<TermsFacet.Entry> ordered = new BoundedTreeSet<TermsFacet.Entry>(first.comparatorType().comparator(), first.requiredSize);
for (TObjectIntIterator<String> it = aggregated.iterator(); it.hasNext();) {
it.advance();
ordered.add(new TermsFacet.Entry(it.key(), it.value()));
}
first.entries = ordered;
return first;
} }
private static ThreadLocal<ThreadLocals.CleanableValue<TObjectIntHashMap<String>>> aggregateCache = new ThreadLocal<ThreadLocals.CleanableValue<TObjectIntHashMap<String>>>() {
@Override protected ThreadLocals.CleanableValue<TObjectIntHashMap<String>> initialValue() {
return new ThreadLocals.CleanableValue<TObjectIntHashMap<String>>(new TObjectIntHashMap<String>());
}
};
} }

View File

@ -17,12 +17,14 @@
* under the License. * under the License.
*/ */
package org.elasticsearch.search.facet.terms; package org.elasticsearch.search.facet.terms.index;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.elasticsearch.common.collect.Sets; import org.elasticsearch.common.collect.Sets;
import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.strings.InternalStringTermsFacet;
import java.io.IOException; import java.io.IOException;
@ -33,7 +35,7 @@ public class IndexNameFacetCollector extends AbstractFacetCollector {
private final String indexName; private final String indexName;
private final InternalTermsFacet.ComparatorType comparatorType; private final InternalStringTermsFacet.ComparatorType comparatorType;
private final int size; private final int size;
@ -54,6 +56,6 @@ public class IndexNameFacetCollector extends AbstractFacetCollector {
} }
@Override public Facet facet() { @Override public Facet facet() {
return new InternalTermsFacet(facetName, "_index", comparatorType, size, Sets.newHashSet(new TermsFacet.Entry(indexName, count))); return new InternalStringTermsFacet(facetName, "_index", comparatorType, size, Sets.newHashSet(new InternalStringTermsFacet.StringEntry(indexName, count)));
} }
} }

View File

@ -17,7 +17,7 @@
* under the License. * under the License.
*/ */
package org.elasticsearch.search.facet.terms; package org.elasticsearch.search.facet.terms.strings;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.BoundedTreeSet;
@ -45,7 +45,7 @@ import static org.elasticsearch.common.Strings.*;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class TermsFieldsFacetCollector extends AbstractFacetCollector { public class FieldsTermsStringFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache; private final FieldDataCache fieldDataCache;
@ -53,7 +53,7 @@ public class TermsFieldsFacetCollector extends AbstractFacetCollector {
private final String[] indexFieldsNames; private final String[] indexFieldsNames;
private final InternalTermsFacet.ComparatorType comparatorType; private final InternalStringTermsFacet.ComparatorType comparatorType;
private final int size; private final int size;
@ -67,8 +67,8 @@ public class TermsFieldsFacetCollector extends AbstractFacetCollector {
private final SearchScript script; private final SearchScript script;
public TermsFieldsFacetCollector(String facetName, String[] fieldsNames, int size, InternalTermsFacet.ComparatorType comparatorType, SearchContext context, public FieldsTermsStringFacetCollector(String facetName, String[] fieldsNames, int size, InternalStringTermsFacet.ComparatorType comparatorType, SearchContext context,
ImmutableSet<String> excluded, Pattern pattern, String scriptLang, String script, Map<String, Object> params) { ImmutableSet<String> excluded, Pattern pattern, String scriptLang, String script, Map<String, Object> params) {
super(facetName); super(facetName);
this.fieldDataCache = context.fieldDataCache(); this.fieldDataCache = context.fieldDataCache();
this.size = size; this.size = size;
@ -100,9 +100,9 @@ public class TermsFieldsFacetCollector extends AbstractFacetCollector {
} }
if (excluded.isEmpty() && pattern == null && this.script == null) { if (excluded.isEmpty() && pattern == null && this.script == null) {
aggregator = new StaticAggregatorValueProc(TermsFacetCollector.popFacets()); aggregator = new StaticAggregatorValueProc(TermsStringFacetCollector.popFacets());
} else { } else {
aggregator = new AggregatorValueProc(TermsFacetCollector.popFacets(), excluded, pattern, this.script); aggregator = new AggregatorValueProc(TermsStringFacetCollector.popFacets(), excluded, pattern, this.script);
} }
} }
@ -124,17 +124,17 @@ public class TermsFieldsFacetCollector extends AbstractFacetCollector {
@Override public Facet facet() { @Override public Facet facet() {
TObjectIntHashMap<String> facets = aggregator.facets(); TObjectIntHashMap<String> facets = aggregator.facets();
if (facets.isEmpty()) { if (facets.isEmpty()) {
TermsFacetCollector.pushFacets(facets); TermsStringFacetCollector.pushFacets(facets);
return new InternalTermsFacet(facetName, arrayToCommaDelimitedString(fieldsNames), comparatorType, size, ImmutableList.<InternalTermsFacet.Entry>of()); return new InternalStringTermsFacet(facetName, arrayToCommaDelimitedString(fieldsNames), comparatorType, size, ImmutableList.<InternalStringTermsFacet.StringEntry>of());
} else { } else {
// we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards
BoundedTreeSet<InternalTermsFacet.Entry> ordered = new BoundedTreeSet<InternalTermsFacet.Entry>(InternalTermsFacet.ComparatorType.COUNT.comparator(), size * numberOfShards); BoundedTreeSet<InternalStringTermsFacet.StringEntry> ordered = new BoundedTreeSet<InternalStringTermsFacet.StringEntry>(InternalStringTermsFacet.ComparatorType.COUNT.comparator(), size * numberOfShards);
for (TObjectIntIterator<String> it = facets.iterator(); it.hasNext();) { for (TObjectIntIterator<String> it = facets.iterator(); it.hasNext();) {
it.advance(); it.advance();
ordered.add(new InternalTermsFacet.Entry(it.key(), it.value())); ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value()));
} }
TermsFacetCollector.pushFacets(facets); TermsStringFacetCollector.pushFacets(facets);
return new InternalTermsFacet(facetName, arrayToCommaDelimitedString(fieldsNames), comparatorType, size, ordered); return new InternalStringTermsFacet(facetName, arrayToCommaDelimitedString(fieldsNames), comparatorType, size, ordered);
} }
} }

View File

@ -0,0 +1,246 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.strings;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.collect.ImmutableList;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.thread.ThreadLocals;
import org.elasticsearch.common.trove.TObjectIntHashMap;
import org.elasticsearch.common.trove.TObjectIntIterator;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.InternalTermsFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
/**
* @author kimchy (shay.banon)
*/
public class InternalStringTermsFacet implements InternalFacet, InternalTermsFacet {
public static Stream STREAM = new Stream() {
@Override public Facet readFacet(String type, StreamInput in) throws IOException {
return readTermsFacet(in);
}
};
public static class StringEntry implements Entry {
private String term;
private int count;
public StringEntry(String term, int count) {
this.term = term;
this.count = count;
}
public String term() {
return term;
}
public String getTerm() {
return term;
}
public int count() {
return count;
}
public int getCount() {
return count();
}
@Override public int compareTo(Entry o) {
int i = term.compareTo(o.term());
if (i == 0) {
i = count - o.count();
if (i == 0) {
i = System.identityHashCode(this) - System.identityHashCode(o);
}
}
return i;
}
}
private String name;
private String fieldName;
int requiredSize;
Collection<StringEntry> entries = ImmutableList.of();
private ComparatorType comparatorType;
InternalStringTermsFacet() {
}
public InternalStringTermsFacet(String name, String fieldName, ComparatorType comparatorType, int requiredSize, Collection<StringEntry> entries) {
this.name = name;
this.fieldName = fieldName;
this.comparatorType = comparatorType;
this.requiredSize = requiredSize;
this.entries = entries;
}
@Override public String name() {
return this.name;
}
@Override public String getName() {
return this.name;
}
@Override public String fieldName() {
return this.fieldName;
}
@Override public String getFieldName() {
return fieldName();
}
@Override public String type() {
return TYPE;
}
@Override public String getType() {
return type();
}
@Override public ComparatorType comparatorType() {
return comparatorType;
}
@Override public ComparatorType getComparatorType() {
return comparatorType();
}
@Override public List<StringEntry> entries() {
if (!(entries instanceof List)) {
entries = ImmutableList.copyOf(entries);
}
return (List<StringEntry>) entries;
}
@Override public List<StringEntry> getEntries() {
return entries();
}
@SuppressWarnings({"unchecked"}) @Override public Iterator<Entry> iterator() {
return (Iterator) entries.iterator();
}
private static ThreadLocal<ThreadLocals.CleanableValue<TObjectIntHashMap<String>>> aggregateCache = new ThreadLocal<ThreadLocals.CleanableValue<TObjectIntHashMap<String>>>() {
@Override protected ThreadLocals.CleanableValue<TObjectIntHashMap<String>> initialValue() {
return new ThreadLocals.CleanableValue<TObjectIntHashMap<String>>(new TObjectIntHashMap<String>());
}
};
@Override public Facet reduce(String name, List<Facet> facets) {
if (facets.size() == 1) {
return facets.get(0);
}
InternalStringTermsFacet first = (InternalStringTermsFacet) facets.get(0);
TObjectIntHashMap<String> aggregated = aggregateCache.get().get();
aggregated.clear();
for (Facet facet : facets) {
InternalStringTermsFacet mFacet = (InternalStringTermsFacet) facet;
for (InternalStringTermsFacet.StringEntry entry : mFacet.entries) {
aggregated.adjustOrPutValue(entry.term(), entry.count(), entry.count());
}
}
BoundedTreeSet<StringEntry> ordered = new BoundedTreeSet<StringEntry>(first.comparatorType().comparator(), first.requiredSize);
for (TObjectIntIterator<String> it = aggregated.iterator(); it.hasNext();) {
it.advance();
ordered.add(new StringEntry(it.key(), it.value()));
}
first.entries = ordered;
return first;
}
static final class Fields {
static final XContentBuilderString _TYPE = new XContentBuilderString("_type");
static final XContentBuilderString _FIELD = new XContentBuilderString("_field");
static final XContentBuilderString TERMS = new XContentBuilderString("terms");
static final XContentBuilderString TERM = new XContentBuilderString("term");
static final XContentBuilderString COUNT = new XContentBuilderString("count");
}
@Override public void toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.field(Fields._TYPE, TermsFacet.TYPE);
builder.field(Fields._FIELD, fieldName);
builder.startArray(Fields.TERMS);
for (Entry entry : entries) {
builder.startObject();
builder.field(Fields.TERM, entry.term());
builder.field(Fields.COUNT, entry.count());
builder.endObject();
}
builder.endArray();
builder.endObject();
}
public static InternalStringTermsFacet readTermsFacet(StreamInput in) throws IOException {
InternalStringTermsFacet facet = new InternalStringTermsFacet();
facet.readFrom(in);
return facet;
}
@Override public void readFrom(StreamInput in) throws IOException {
name = in.readUTF();
fieldName = in.readUTF();
comparatorType = ComparatorType.fromId(in.readByte());
requiredSize = in.readVInt();
int size = in.readVInt();
entries = new ArrayList<StringEntry>(size);
for (int i = 0; i < size; i++) {
entries.add(new StringEntry(in.readUTF(), in.readVInt()));
}
}
@Override public void writeTo(StreamOutput out) throws IOException {
out.writeUTF(name);
out.writeUTF(fieldName);
out.writeByte(comparatorType.id());
out.writeVInt(requiredSize);
out.writeVInt(entries.size());
for (Entry entry : entries) {
out.writeUTF(entry.term());
out.writeVInt(entry.count());
}
}
}

View File

@ -17,16 +17,14 @@
* under the License. * under the License.
*/ */
package org.elasticsearch.search.facet.terms; package org.elasticsearch.search.facet.terms.strings;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableList;
import org.elasticsearch.common.collect.ImmutableSet; import org.elasticsearch.common.collect.ImmutableSet;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.trove.TObjectIntHashMap; import org.elasticsearch.common.trove.TObjectIntHashMap;
import org.elasticsearch.common.trove.TObjectIntIterator; import org.elasticsearch.common.trove.TObjectIntIterator;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.script.search.SearchScript; import org.elasticsearch.script.search.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.Facet;
@ -40,9 +38,9 @@ import java.util.regex.Pattern;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class TermsScriptFieldFacetCollector extends AbstractFacetCollector { public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector {
private final InternalTermsFacet.ComparatorType comparatorType; private final InternalStringTermsFacet.ComparatorType comparatorType;
private final int size; private final int size;
@ -58,8 +56,8 @@ public class TermsScriptFieldFacetCollector extends AbstractFacetCollector {
private final TObjectIntHashMap<String> facets; private final TObjectIntHashMap<String> facets;
public TermsScriptFieldFacetCollector(String facetName, int size, InternalTermsFacet.ComparatorType comparatorType, SearchContext context, public ScriptTermsStringFieldFacetCollector(String facetName, int size, InternalStringTermsFacet.ComparatorType comparatorType, SearchContext context,
ImmutableSet<String> excluded, Pattern pattern, String scriptLang, String script, Map<String, Object> params) { ImmutableSet<String> excluded, Pattern pattern, String scriptLang, String script, Map<String, Object> params) {
super(facetName); super(facetName);
this.size = size; this.size = size;
this.comparatorType = comparatorType; this.comparatorType = comparatorType;
@ -70,7 +68,7 @@ public class TermsScriptFieldFacetCollector extends AbstractFacetCollector {
this.excluded = excluded; this.excluded = excluded;
this.matcher = pattern != null ? pattern.matcher("") : null; this.matcher = pattern != null ? pattern.matcher("") : null;
this.facets = TermsFacetCollector.popFacets(); this.facets = TermsStringFacetCollector.popFacets();
} }
@Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException {
@ -116,81 +114,17 @@ public class TermsScriptFieldFacetCollector extends AbstractFacetCollector {
@Override public Facet facet() { @Override public Facet facet() {
if (facets.isEmpty()) { if (facets.isEmpty()) {
TermsFacetCollector.pushFacets(facets); TermsStringFacetCollector.pushFacets(facets);
return new InternalTermsFacet(facetName, sScript, comparatorType, size, ImmutableList.<InternalTermsFacet.Entry>of()); return new InternalStringTermsFacet(facetName, sScript, comparatorType, size, ImmutableList.<InternalStringTermsFacet.StringEntry>of());
} else { } else {
// we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards
BoundedTreeSet<InternalTermsFacet.Entry> ordered = new BoundedTreeSet<InternalTermsFacet.Entry>(InternalTermsFacet.ComparatorType.COUNT.comparator(), size * numberOfShards); BoundedTreeSet<InternalStringTermsFacet.StringEntry> ordered = new BoundedTreeSet<InternalStringTermsFacet.StringEntry>(InternalStringTermsFacet.ComparatorType.COUNT.comparator(), size * numberOfShards);
for (TObjectIntIterator<String> it = facets.iterator(); it.hasNext();) { for (TObjectIntIterator<String> it = facets.iterator(); it.hasNext();) {
it.advance(); it.advance();
ordered.add(new InternalTermsFacet.Entry(it.key(), it.value())); ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value()));
} }
TermsFacetCollector.pushFacets(facets); TermsStringFacetCollector.pushFacets(facets);
return new InternalTermsFacet(facetName, sScript, comparatorType, size, ordered); return new InternalStringTermsFacet(facetName, sScript, comparatorType, size, ordered);
}
}
public static class AggregatorValueProc extends StaticAggregatorValueProc {
private final ImmutableSet<String> excluded;
private final Matcher matcher;
private final SearchScript script;
private final Map<String, Object> scriptParams;
public AggregatorValueProc(TObjectIntHashMap<String> facets, ImmutableSet<String> excluded, Pattern pattern, SearchScript script) {
super(facets);
this.excluded = excluded;
this.matcher = pattern != null ? pattern.matcher("") : null;
this.script = script;
if (script != null) {
scriptParams = Maps.newHashMapWithExpectedSize(4);
} else {
scriptParams = null;
}
}
@Override public void onValue(int docId, String value) {
if (excluded != null && excluded.contains(value)) {
return;
}
if (matcher != null && !matcher.reset(value).matches()) {
return;
}
if (script != null) {
scriptParams.put("term", value);
Object scriptValue = script.execute(docId, scriptParams);
if (scriptValue == null) {
return;
}
if (scriptValue instanceof Boolean) {
if (!((Boolean) scriptValue)) {
return;
}
} else {
value = scriptValue.toString();
}
}
super.onValue(docId, value);
}
}
public static class StaticAggregatorValueProc implements FieldData.StringValueInDocProc {
private final TObjectIntHashMap<String> facets;
public StaticAggregatorValueProc(TObjectIntHashMap<String> facets) {
this.facets = facets;
}
@Override public void onValue(int docId, String value) {
facets.adjustOrPutValue(value, 1, 1);
}
public final TObjectIntHashMap<String> facets() {
return facets;
} }
} }
} }

View File

@ -17,7 +17,7 @@
* under the License. * under the License.
*/ */
package org.elasticsearch.search.facet.terms; package org.elasticsearch.search.facet.terms.strings;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.BoundedTreeSet;
@ -46,7 +46,7 @@ import java.util.regex.Pattern;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class TermsFacetCollector extends AbstractFacetCollector { public class TermsStringFacetCollector extends AbstractFacetCollector {
static ThreadLocal<ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>>> cache = new ThreadLocal<ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>>>() { static ThreadLocal<ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>>> cache = new ThreadLocal<ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>>>() {
@Override protected ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>> initialValue() { @Override protected ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>> initialValue() {
@ -61,7 +61,7 @@ public class TermsFacetCollector extends AbstractFacetCollector {
private final String indexFieldName; private final String indexFieldName;
private final InternalTermsFacet.ComparatorType comparatorType; private final InternalStringTermsFacet.ComparatorType comparatorType;
private final int size; private final int size;
@ -75,8 +75,8 @@ public class TermsFacetCollector extends AbstractFacetCollector {
private final SearchScript script; private final SearchScript script;
public TermsFacetCollector(String facetName, String fieldName, int size, InternalTermsFacet.ComparatorType comparatorType, SearchContext context, public TermsStringFacetCollector(String facetName, String fieldName, int size, InternalStringTermsFacet.ComparatorType comparatorType, SearchContext context,
ImmutableSet<String> excluded, Pattern pattern, String scriptLang, String script, Map<String, Object> params) { ImmutableSet<String> excluded, Pattern pattern, String scriptLang, String script, Map<String, Object> params) {
super(facetName); super(facetName);
this.fieldDataCache = context.fieldDataCache(); this.fieldDataCache = context.fieldDataCache();
this.size = size; this.size = size;
@ -127,16 +127,16 @@ public class TermsFacetCollector extends AbstractFacetCollector {
TObjectIntHashMap<String> facets = aggregator.facets(); TObjectIntHashMap<String> facets = aggregator.facets();
if (facets.isEmpty()) { if (facets.isEmpty()) {
pushFacets(facets); pushFacets(facets);
return new InternalTermsFacet(facetName, fieldName, comparatorType, size, ImmutableList.<InternalTermsFacet.Entry>of()); return new InternalStringTermsFacet(facetName, fieldName, comparatorType, size, ImmutableList.<InternalStringTermsFacet.StringEntry>of());
} else { } else {
// we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards
BoundedTreeSet<InternalTermsFacet.Entry> ordered = new BoundedTreeSet<InternalTermsFacet.Entry>(InternalTermsFacet.ComparatorType.COUNT.comparator(), size * numberOfShards); BoundedTreeSet<InternalStringTermsFacet.StringEntry> ordered = new BoundedTreeSet<InternalStringTermsFacet.StringEntry>(InternalStringTermsFacet.ComparatorType.COUNT.comparator(), size * numberOfShards);
for (TObjectIntIterator<String> it = facets.iterator(); it.hasNext();) { for (TObjectIntIterator<String> it = facets.iterator(); it.hasNext();) {
it.advance(); it.advance();
ordered.add(new InternalTermsFacet.Entry(it.key(), it.value())); ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value()));
} }
pushFacets(facets); pushFacets(facets);
return new InternalTermsFacet(facetName, fieldName, comparatorType, size, ordered); return new InternalStringTermsFacet(facetName, fieldName, comparatorType, size, ordered);
} }
} }