move terms to use new field data

This commit is contained in:
Shay Banon 2013-01-22 12:06:26 +01:00
parent e5b651321f
commit 772ee9db54
26 changed files with 224 additions and 5096 deletions

View File

@ -53,7 +53,7 @@ public class EmptyOrdinals implements Ordinals {
@Override
public int getNumOrds() {
return numDocs + 1;
return 1;
}
@Override

View File

@ -21,13 +21,8 @@ package org.elasticsearch.search.facet.terms;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.bytes.InternalByteTermsFacet;
import org.elasticsearch.search.facet.terms.doubles.InternalDoubleTermsFacet;
import org.elasticsearch.search.facet.terms.floats.InternalFloatTermsFacet;
import org.elasticsearch.search.facet.terms.ints.InternalIntTermsFacet;
import org.elasticsearch.search.facet.terms.ip.InternalIpTermsFacet;
import org.elasticsearch.search.facet.terms.longs.InternalLongTermsFacet;
import org.elasticsearch.search.facet.terms.shorts.InternalShortTermsFacet;
import org.elasticsearch.search.facet.terms.strings.InternalStringTermsFacet;
import java.util.List;
@ -41,11 +36,6 @@ public abstract class InternalTermsFacet implements TermsFacet, InternalFacet {
InternalStringTermsFacet.registerStream();
InternalLongTermsFacet.registerStream();
InternalDoubleTermsFacet.registerStream();
InternalIntTermsFacet.registerStream();
InternalFloatTermsFacet.registerStream();
InternalShortTermsFacet.registerStream();
InternalByteTermsFacet.registerStream();
InternalIpTermsFacet.registerStream();
}
public abstract Facet reduce(String name, List<Facet> facets);

View File

@ -27,27 +27,18 @@ import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.IndexOrdinalFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.ip.IpFieldMapper;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetCollector;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.FacetProcessor;
import org.elasticsearch.search.facet.terms.bytes.TermsByteFacetCollector;
import org.elasticsearch.search.facet.terms.bytes.TermsByteOrdinalsFacetCollector;
import org.elasticsearch.search.facet.terms.doubles.TermsDoubleFacetCollector;
import org.elasticsearch.search.facet.terms.doubles.TermsDoubleOrdinalsFacetCollector;
import org.elasticsearch.search.facet.terms.floats.TermsFloatFacetCollector;
import org.elasticsearch.search.facet.terms.floats.TermsFloatOrdinalsFacetCollector;
import org.elasticsearch.search.facet.terms.index.IndexNameFacetCollector;
import org.elasticsearch.search.facet.terms.ints.TermsIntFacetCollector;
import org.elasticsearch.search.facet.terms.ints.TermsIntOrdinalsFacetCollector;
import org.elasticsearch.search.facet.terms.ip.TermsIpFacetCollector;
import org.elasticsearch.search.facet.terms.ip.TermsIpOrdinalsFacetCollector;
import org.elasticsearch.search.facet.terms.longs.TermsLongFacetCollector;
import org.elasticsearch.search.facet.terms.longs.TermsLongOrdinalsFacetCollector;
import org.elasticsearch.search.facet.terms.shorts.TermsShortFacetCollector;
import org.elasticsearch.search.facet.terms.shorts.TermsShortOrdinalsFacetCollector;
import org.elasticsearch.search.facet.terms.strings.FieldsTermsStringFacetCollector;
import org.elasticsearch.search.facet.terms.strings.ScriptTermsStringFieldFacetCollector;
import org.elasticsearch.search.facet.terms.strings.TermsStringFacetCollector;
@ -147,64 +138,41 @@ public class TermsFacetProcessor extends AbstractComponent implements FacetProce
if (regex != null) {
pattern = Regex.compile(regex, regexFlags);
}
SearchScript searchScript = null;
if (script != null) {
searchScript = context.scriptService().search(context.lookup(), scriptLang, script, params);
}
if (fieldsNames != null) {
return new FieldsTermsStringFacetCollector(facetName, fieldsNames, size, comparatorType, allTerms, context, excluded, pattern, scriptLang, script, params);
return new FieldsTermsStringFacetCollector(facetName, fieldsNames, size, comparatorType, allTerms, context, excluded, pattern, searchScript);
}
if (field == null && fieldsNames == null && script != null) {
return new ScriptTermsStringFieldFacetCollector(facetName, size, comparatorType, context, excluded, pattern, scriptLang, script, params);
}
FieldMapper fieldMapper = context.smartNameFieldMapper(field);
if (fieldMapper != null) {
if (fieldMapper instanceof IpFieldMapper) {
if (script != null || "map".equals(executionHint)) {
return new TermsIpFacetCollector(facetName, field, size, comparatorType, allTerms, context, scriptLang, script, params);
} else {
return new TermsIpOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, null);
}
} else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.LONG) {
if (script != null || "map".equals(executionHint)) {
return new TermsLongFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params);
} else {
return new TermsLongOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded);
}
} else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.DOUBLE) {
if (script != null) {
return new TermsDoubleFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params);
} else {
return new TermsDoubleOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded);
}
} else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.INT) {
if (script != null || "map".equals(executionHint)) {
return new TermsIntFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params);
} else {
return new TermsIntOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded);
}
} else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.FLOAT) {
if (script != null || "map".equals(executionHint)) {
return new TermsFloatFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params);
} else {
return new TermsFloatOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded);
}
} else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.SHORT) {
if (script != null || "map".equals(executionHint)) {
return new TermsShortFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params);
} else {
return new TermsShortOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded);
}
} else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.BYTE) {
if (script != null || "map".equals(executionHint)) {
return new TermsByteFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params);
} else {
return new TermsByteOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded);
}
} else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.STRING) {
if (script == null && !"map".equals(executionHint)) {
return new TermsStringOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, pattern);
}
if (fieldMapper == null) {
throw new FacetPhaseExecutionException(facetName, "failed to find mapping for [" + field + "]");
}
IndexFieldData indexFieldData = context.fieldData().getForField(fieldMapper);
if (indexFieldData instanceof IndexNumericFieldData) {
IndexNumericFieldData indexNumericFieldData = (IndexNumericFieldData) indexFieldData;
if (indexNumericFieldData.getNumericType().isFloatingPoint()) {
return new TermsDoubleFacetCollector(facetName, indexNumericFieldData, size, comparatorType, allTerms, context, excluded, searchScript);
} else {
return new TermsLongFacetCollector(facetName, indexNumericFieldData, size, comparatorType, allTerms, context, excluded, searchScript);
}
} else {
if (script != null || "map".equals(executionHint)) {
return new TermsStringFacetCollector(facetName, indexFieldData, size, comparatorType, allTerms, context, excluded, pattern, searchScript);
} else if (indexFieldData instanceof IndexOrdinalFieldData) {
return new TermsStringOrdinalsFacetCollector(facetName, (IndexOrdinalFieldData) indexFieldData, size, comparatorType, allTerms, context, excluded, pattern);
} else {
return new TermsStringFacetCollector(facetName, indexFieldData, size, comparatorType, allTerms, context, excluded, pattern, searchScript);
}
}
return new TermsStringFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, pattern, scriptLang, script, params);
}
@Override

View File

@ -1,312 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.bytes;
import com.google.common.collect.ImmutableList;
import gnu.trove.iterator.TByteIntIterator;
import gnu.trove.map.hash.TByteIntHashMap;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.InternalTermsFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
/**
*
*/
public class InternalByteTermsFacet extends InternalTermsFacet {
private static final String STREAM_TYPE = "bTerms";
public static void registerStream() {
Streams.registerStream(STREAM, STREAM_TYPE);
}
static Stream STREAM = new Stream() {
@Override
public Facet readFacet(String type, StreamInput in) throws IOException {
return readTermsFacet(in);
}
};
@Override
public String streamType() {
return STREAM_TYPE;
}
public static class ByteEntry implements Entry {
byte term;
int count;
public ByteEntry(byte term, int count) {
this.term = term;
this.count = count;
}
public Text term() {
return new StringText(Short.toString(term));
}
public Text getTerm() {
return term();
}
@Override
public Number termAsNumber() {
return term;
}
@Override
public Number getTermAsNumber() {
return termAsNumber();
}
public int count() {
return count;
}
public int getCount() {
return count();
}
@Override
public int compareTo(Entry o) {
byte anotherVal = ((ByteEntry) o).term;
int i = term - anotherVal;
if (i == 0) {
i = count - o.count();
if (i == 0) {
i = System.identityHashCode(this) - System.identityHashCode(o);
}
}
return i;
}
}
private String name;
int requiredSize;
long missing;
long total;
Collection<ByteEntry> entries = ImmutableList.of();
ComparatorType comparatorType;
InternalByteTermsFacet() {
}
public InternalByteTermsFacet(String name, ComparatorType comparatorType, int requiredSize, Collection<ByteEntry> entries, long missing, long total) {
this.name = name;
this.comparatorType = comparatorType;
this.requiredSize = requiredSize;
this.entries = entries;
this.missing = missing;
this.total = total;
}
@Override
public String name() {
return this.name;
}
@Override
public String getName() {
return this.name;
}
@Override
public String type() {
return TYPE;
}
@Override
public String getType() {
return type();
}
@Override
public long missingCount() {
return this.missing;
}
@Override
public long getMissingCount() {
return missingCount();
}
@Override
public long totalCount() {
return this.total;
}
@Override
public long getTotalCount() {
return totalCount();
}
@Override
public long otherCount() {
long other = total;
for (Entry entry : entries) {
other -= entry.count();
}
return other;
}
@Override
public long getOtherCount() {
return otherCount();
}
@Override
public List<ByteEntry> entries() {
if (!(entries instanceof List)) {
entries = ImmutableList.copyOf(entries);
}
return (List<ByteEntry>) entries;
}
@Override
public List<ByteEntry> getEntries() {
return entries();
}
@SuppressWarnings({"unchecked"})
@Override
public Iterator<Entry> iterator() {
return (Iterator) entries.iterator();
}
@Override
public Facet reduce(String name, List<Facet> facets) {
if (facets.size() == 1) {
return facets.get(0);
}
InternalByteTermsFacet first = (InternalByteTermsFacet) facets.get(0);
TByteIntHashMap aggregated = CacheRecycler.popByteIntMap();
long missing = 0;
long total = 0;
for (Facet facet : facets) {
InternalByteTermsFacet mFacet = (InternalByteTermsFacet) facet;
missing += mFacet.missingCount();
total += mFacet.totalCount();
for (ByteEntry entry : mFacet.entries) {
aggregated.adjustOrPutValue(entry.term, entry.count(), entry.count());
}
}
BoundedTreeSet<ByteEntry> ordered = new BoundedTreeSet<ByteEntry>(first.comparatorType.comparator(), first.requiredSize);
for (TByteIntIterator it = aggregated.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new ByteEntry(it.key(), it.value()));
}
first.entries = ordered;
first.missing = missing;
first.total = total;
CacheRecycler.pushByteIntMap(aggregated);
return first;
}
static final class Fields {
static final XContentBuilderString _TYPE = new XContentBuilderString("_type");
static final XContentBuilderString MISSING = new XContentBuilderString("missing");
static final XContentBuilderString TOTAL = new XContentBuilderString("total");
static final XContentBuilderString OTHER = new XContentBuilderString("other");
static final XContentBuilderString TERMS = new XContentBuilderString("terms");
static final XContentBuilderString TERM = new XContentBuilderString("term");
static final XContentBuilderString COUNT = new XContentBuilderString("count");
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.field(Fields._TYPE, TermsFacet.TYPE);
builder.field(Fields.MISSING, missing);
builder.field(Fields.TOTAL, total);
builder.field(Fields.OTHER, otherCount());
builder.startArray(Fields.TERMS);
for (ByteEntry entry : entries) {
builder.startObject();
builder.field(Fields.TERM, entry.term);
builder.field(Fields.COUNT, entry.count());
builder.endObject();
}
builder.endArray();
builder.endObject();
return builder;
}
public static InternalByteTermsFacet readTermsFacet(StreamInput in) throws IOException {
InternalByteTermsFacet facet = new InternalByteTermsFacet();
facet.readFrom(in);
return facet;
}
@Override
public void readFrom(StreamInput in) throws IOException {
name = in.readString();
comparatorType = ComparatorType.fromId(in.readByte());
requiredSize = in.readVInt();
missing = in.readVLong();
total = in.readVLong();
int size = in.readVInt();
entries = new ArrayList<ByteEntry>(size);
for (int i = 0; i < size; i++) {
entries.add(new ByteEntry(in.readByte(), in.readVInt()));
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeByte(comparatorType.id());
out.writeVInt(requiredSize);
out.writeVLong(missing);
out.writeVLong(total);
out.writeVInt(entries.size());
for (ByteEntry entry : entries) {
out.writeByte(entry.term);
out.writeVInt(entry.count());
}
}
}

View File

@ -1,255 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.bytes;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import gnu.trove.iterator.TByteIntIterator;
import gnu.trove.map.hash.TByteIntHashMap;
import gnu.trove.set.hash.TByteHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.bytes.ByteFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
/**
*
*/
public class TermsByteFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final FieldDataType fieldDataType;
private ByteFieldData fieldData;
private final StaticAggregatorValueProc aggregator;
private final SearchScript script;
public TermsByteFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, String scriptLang, String script, Map<String, Object> params) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms short facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.BYTE) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of byte type, can't run terms short facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (script != null) {
this.script = context.scriptService().search(context.lookup(), scriptLang, script, params);
} else {
this.script = null;
}
if (this.script == null && excluded.isEmpty()) {
aggregator = new StaticAggregatorValueProc(CacheRecycler.popByteIntMap());
} else {
aggregator = new AggregatorValueProc(CacheRecycler.popByteIntMap(), excluded, this.script);
}
if (allTerms) {
try {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
ByteFieldData fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
fieldData.forEachValue(aggregator);
}
} catch (Exception e) {
throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
if (script != null) {
script.setScorer(scorer);
}
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
if (script != null) {
script.setNextReader(context);
}
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachValueInDoc(doc, aggregator);
}
@Override
public Facet facet() {
TByteIntHashMap facets = aggregator.facets();
if (facets.isEmpty()) {
CacheRecycler.pushByteIntMap(facets);
return new InternalByteTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalByteTermsFacet.ByteEntry>of(), aggregator.missing(), aggregator.total());
} else {
if (size < EntryPriorityQueue.LIMIT) {
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
for (TByteIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.insertWithOverflow(new InternalByteTermsFacet.ByteEntry(it.key(), it.value()));
}
InternalByteTermsFacet.ByteEntry[] list = new InternalByteTermsFacet.ByteEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalByteTermsFacet.ByteEntry) ordered.pop();
}
CacheRecycler.pushByteIntMap(facets);
return new InternalByteTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total());
} else {
BoundedTreeSet<InternalByteTermsFacet.ByteEntry> ordered = new BoundedTreeSet<InternalByteTermsFacet.ByteEntry>(comparatorType.comparator(), size);
for (TByteIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new InternalByteTermsFacet.ByteEntry(it.key(), it.value()));
}
CacheRecycler.pushByteIntMap(facets);
return new InternalByteTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total());
}
}
}
public static class AggregatorValueProc extends StaticAggregatorValueProc {
private final SearchScript script;
private final TByteHashSet excluded;
public AggregatorValueProc(TByteIntHashMap facets, Set<BytesRef> excluded, SearchScript script) {
super(facets);
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TByteHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Byte.parseByte(s.utf8ToString()));
}
}
this.script = script;
}
@Override
public void onValue(int docId, byte value) {
if (excluded != null && excluded.contains(value)) {
return;
}
if (script != null) {
script.setNextDocId(docId);
script.setNextVar("term", value);
Object scriptValue = script.run();
if (scriptValue == null) {
return;
}
if (scriptValue instanceof Boolean) {
if (!((Boolean) scriptValue)) {
return;
}
} else {
value = ((Number) scriptValue).byteValue();
}
}
super.onValue(docId, value);
}
}
public static class StaticAggregatorValueProc implements ByteFieldData.ValueInDocProc, ByteFieldData.ValueProc {
private final TByteIntHashMap facets;
private int missing;
private int total;
public StaticAggregatorValueProc(TByteIntHashMap facets) {
this.facets = facets;
}
@Override
public void onValue(byte value) {
facets.putIfAbsent(value, 0);
}
@Override
public void onValue(int docId, byte value) {
facets.adjustOrPutValue(value, 1, 1);
total++;
}
@Override
public void onMissing(int docID) {
missing++;
}
public final TByteIntHashMap facets() {
return facets;
}
public final int missing() {
return this.missing;
}
public final int total() {
return this.total;
}
}
}

View File

@ -1,268 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.bytes;
import com.google.common.collect.ImmutableSet;
import gnu.trove.set.hash.TByteHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.bytes.ByteFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
*
*/
public class TermsByteOrdinalsFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final int minCount;
private final FieldDataType fieldDataType;
private ByteFieldData fieldData;
private final List<ReaderAggregator> aggregators;
private ReaderAggregator current;
long missing;
long total;
private final TByteHashSet excluded;
public TermsByteOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms byte facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.BYTE) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of byte type, can't run terms byte facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TByteHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Byte.parseByte(s.utf8ToString()));
}
}
// minCount is offset by -1
if (allTerms) {
minCount = -1;
} else {
minCount = 0;
}
this.aggregators = new ArrayList<ReaderAggregator>(context.searcher().getIndexReader().leaves().size());
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
if (current.values.length > 1) {
aggregators.add(current);
}
}
fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
current = new ReaderAggregator(fieldData);
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachOrdinalInDoc(doc, current);
}
@Override
public Facet facet() {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
// if we have values for this one, add it
if (current.values.length > 1) {
aggregators.add(current);
}
}
AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size());
for (ReaderAggregator aggregator : aggregators) {
if (aggregator.nextPosition()) {
queue.add(aggregator);
}
}
// YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes
if (size < EntryPriorityQueue.LIMIT) {
// optimize to use priority size
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
byte value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalByteTermsFacet.ByteEntry entry = new InternalByteTermsFacet.ByteEntry(value, count);
ordered.insertWithOverflow(entry);
}
}
}
InternalByteTermsFacet.ByteEntry[] list = new InternalByteTermsFacet.ByteEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalByteTermsFacet.ByteEntry) ordered.pop();
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalByteTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
}
BoundedTreeSet<InternalByteTermsFacet.ByteEntry> ordered = new BoundedTreeSet<InternalByteTermsFacet.ByteEntry>(comparatorType.comparator(), size);
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
byte value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalByteTermsFacet.ByteEntry entry = new InternalByteTermsFacet.ByteEntry(value, count);
ordered.add(entry);
}
}
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalByteTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}
public static class ReaderAggregator implements FieldData.OrdinalInDocProc {
final byte[] values;
final int[] counts;
int position = 0;
byte current;
int total;
public ReaderAggregator(ByteFieldData fieldData) {
this.values = fieldData.values();
this.counts = CacheRecycler.popIntArray(fieldData.values().length);
}
@Override
public void onOrdinal(int docId, int ordinal) {
counts[ordinal]++;
total++;
}
public boolean nextPosition() {
if (++position >= values.length) {
return false;
}
current = values[position];
return true;
}
}
public static class AggregatorPriorityQueue extends PriorityQueue<ReaderAggregator> {
public AggregatorPriorityQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) {
return a.current < b.current;
}
}
}

View File

@ -27,24 +27,19 @@ import gnu.trove.set.hash.TDoubleHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.doubles.DoubleFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.fielddata.DoubleValues;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
/**
@ -52,9 +47,7 @@ import java.util.Set;
*/
public class TermsDoubleFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final IndexNumericFieldData indexFieldData;
private final TermsFacet.ComparatorType comparatorType;
@ -62,43 +55,20 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector {
private final int numberOfShards;
private final FieldDataType fieldDataType;
private DoubleFieldData fieldData;
private DoubleValues values;
private final StaticAggregatorValueProc aggregator;
private final SearchScript script;
public TermsDoubleFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, String scriptLang, String script, Map<String, Object> params) {
public TermsDoubleFacetCollector(String facetName, IndexNumericFieldData indexFieldData, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, SearchScript script) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.indexFieldData = indexFieldData;
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms double facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.DOUBLE) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of double type, can't run terms double facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (script != null) {
this.script = context.scriptService().search(context.lookup(), scriptLang, script, params);
} else {
this.script = null;
}
this.script = script;
if (this.script == null && excluded.isEmpty()) {
aggregator = new StaticAggregatorValueProc(CacheRecycler.popDoubleIntMap());
@ -106,16 +76,17 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector {
aggregator = new AggregatorValueProc(CacheRecycler.popDoubleIntMap(), excluded, this.script);
}
if (allTerms) {
try {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
DoubleFieldData fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
fieldData.forEachValue(aggregator);
}
} catch (Exception e) {
throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
}
}
// TODO: we need to support this with the new field data....
// if (allTerms) {
// try {
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
// DoubleFieldData fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
// fieldData.forEachValue(aggregator);
// }
// } catch (Exception e) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
// }
// }
}
@Override
@ -127,7 +98,7 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
values = indexFieldData.load(context).getDoubleValues();
if (script != null) {
script.setNextReader(context);
}
@ -135,7 +106,7 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector {
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachValueInDoc(doc, aggregator);
values.forEachValueInDoc(doc, aggregator);
}
@Override
@ -212,7 +183,7 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector {
}
}
public static class StaticAggregatorValueProc implements DoubleFieldData.ValueInDocProc, DoubleFieldData.ValueProc {
public static class StaticAggregatorValueProc implements DoubleValues.ValueInDocProc {
private final TDoubleIntHashMap facets;
@ -223,11 +194,6 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector {
this.facets = facets;
}
@Override
public void onValue(double value) {
facets.putIfAbsent(value, 0);
}
@Override
public void onValue(int docId, double value) {
facets.adjustOrPutValue(value, 1, 1);

View File

@ -1,267 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.doubles;
import com.google.common.collect.ImmutableSet;
import gnu.trove.set.hash.TDoubleHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.doubles.DoubleFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
*
*/
public class TermsDoubleOrdinalsFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final int minCount;
private final FieldDataType fieldDataType;
private DoubleFieldData fieldData;
private final List<ReaderAggregator> aggregators;
private ReaderAggregator current;
long missing;
long total;
private final TDoubleHashSet excluded;
public TermsDoubleOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms double facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.DOUBLE) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of double type, can't run terms double facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TDoubleHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Double.parseDouble(s.utf8ToString()));
}
}
// minCount is offset by -1
if (allTerms) {
minCount = -1;
} else {
minCount = 0;
}
this.aggregators = new ArrayList<ReaderAggregator>(context.searcher().getIndexReader().leaves().size());
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
if (current.values.length > 1) {
aggregators.add(current);
}
}
fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
current = new ReaderAggregator(fieldData);
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachOrdinalInDoc(doc, current);
}
@Override
public Facet facet() {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
// if we have values for this one, add it
if (current.values.length > 1) {
aggregators.add(current);
}
}
AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size());
for (ReaderAggregator aggregator : aggregators) {
if (aggregator.nextPosition()) {
queue.add(aggregator);
}
}
// YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes
if (size < EntryPriorityQueue.LIMIT) {
// optimize to use priority size
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
double value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalDoubleTermsFacet.DoubleEntry entry = new InternalDoubleTermsFacet.DoubleEntry(value, count);
ordered.insertWithOverflow(entry);
}
}
}
InternalDoubleTermsFacet.DoubleEntry[] list = new InternalDoubleTermsFacet.DoubleEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalDoubleTermsFacet.DoubleEntry) ordered.pop();
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalDoubleTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
}
BoundedTreeSet<InternalDoubleTermsFacet.DoubleEntry> ordered = new BoundedTreeSet<InternalDoubleTermsFacet.DoubleEntry>(comparatorType.comparator(), size);
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
double value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalDoubleTermsFacet.DoubleEntry entry = new InternalDoubleTermsFacet.DoubleEntry(value, count);
ordered.add(entry);
}
}
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalDoubleTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}
public static class ReaderAggregator implements FieldData.OrdinalInDocProc {
final double[] values;
final int[] counts;
int position = 0;
double current;
int total;
public ReaderAggregator(DoubleFieldData fieldData) {
this.values = fieldData.values();
this.counts = CacheRecycler.popIntArray(fieldData.values().length);
}
@Override
public void onOrdinal(int docId, int ordinal) {
counts[ordinal]++;
total++;
}
public boolean nextPosition() {
if (++position >= values.length) {
return false;
}
current = values[position];
return true;
}
}
public static class AggregatorPriorityQueue extends PriorityQueue<ReaderAggregator> {
public AggregatorPriorityQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) {
return a.current < b.current;
}
}
}

View File

@ -1,311 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.floats;
import com.google.common.collect.ImmutableList;
import gnu.trove.iterator.TFloatIntIterator;
import gnu.trove.map.hash.TFloatIntHashMap;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.InternalTermsFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
/**
*
*/
public class InternalFloatTermsFacet extends InternalTermsFacet {
private static final String STREAM_TYPE = "fTerms";
public static void registerStream() {
Streams.registerStream(STREAM, STREAM_TYPE);
}
static Stream STREAM = new Stream() {
@Override
public Facet readFacet(String type, StreamInput in) throws IOException {
return readTermsFacet(in);
}
};
@Override
public String streamType() {
return STREAM_TYPE;
}
public static class FloatEntry implements Entry {
float term;
int count;
public FloatEntry(float term, int count) {
this.term = term;
this.count = count;
}
public Text term() {
return new StringText(Float.toString(term));
}
public Text getTerm() {
return term();
}
@Override
public Number termAsNumber() {
return term;
}
@Override
public Number getTermAsNumber() {
return termAsNumber();
}
public int count() {
return count;
}
public int getCount() {
return count();
}
@Override
public int compareTo(Entry o) {
float anotherVal = ((FloatEntry) o).term;
if (term < anotherVal) {
return -1;
}
if (term == anotherVal) {
int i = count - o.count();
if (i == 0) {
i = System.identityHashCode(this) - System.identityHashCode(o);
}
return i;
}
return 1;
}
}
private String name;
int requiredSize;
long missing;
long total;
Collection<FloatEntry> entries = ImmutableList.of();
ComparatorType comparatorType;
InternalFloatTermsFacet() {
}
public InternalFloatTermsFacet(String name, ComparatorType comparatorType, int requiredSize, Collection<FloatEntry> entries, long missing, long total) {
this.name = name;
this.comparatorType = comparatorType;
this.requiredSize = requiredSize;
this.entries = entries;
this.missing = missing;
this.total = total;
}
@Override
public String name() {
return this.name;
}
@Override
public String getName() {
return this.name;
}
@Override
public String type() {
return TYPE;
}
@Override
public String getType() {
return type();
}
@Override
public List<FloatEntry> entries() {
if (!(entries instanceof List)) {
entries = ImmutableList.copyOf(entries);
}
return (List<FloatEntry>) entries;
}
@Override
public List<FloatEntry> getEntries() {
return entries();
}
@SuppressWarnings({"unchecked"})
@Override
public Iterator<Entry> iterator() {
return (Iterator) entries.iterator();
}
@Override
public long missingCount() {
return this.missing;
}
@Override
public long getMissingCount() {
return missingCount();
}
@Override
public long totalCount() {
return this.total;
}
@Override
public long getTotalCount() {
return totalCount();
}
@Override
public long otherCount() {
long other = total;
for (Entry entry : entries) {
other -= entry.count();
}
return other;
}
@Override
public long getOtherCount() {
return otherCount();
}
@Override
public Facet reduce(String name, List<Facet> facets) {
if (facets.size() == 1) {
return facets.get(0);
}
InternalFloatTermsFacet first = (InternalFloatTermsFacet) facets.get(0);
TFloatIntHashMap aggregated = CacheRecycler.popFloatIntMap();
long missing = 0;
long total = 0;
for (Facet facet : facets) {
InternalFloatTermsFacet mFacet = (InternalFloatTermsFacet) facet;
missing += mFacet.missingCount();
total += mFacet.totalCount();
for (FloatEntry entry : mFacet.entries) {
aggregated.adjustOrPutValue(entry.term, entry.count(), entry.count());
}
}
BoundedTreeSet<FloatEntry> ordered = new BoundedTreeSet<FloatEntry>(first.comparatorType.comparator(), first.requiredSize);
for (TFloatIntIterator it = aggregated.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new FloatEntry(it.key(), it.value()));
}
first.entries = ordered;
first.missing = missing;
first.total = total;
CacheRecycler.pushFloatIntMap(aggregated);
return first;
}
static final class Fields {
static final XContentBuilderString _TYPE = new XContentBuilderString("_type");
static final XContentBuilderString MISSING = new XContentBuilderString("missing");
static final XContentBuilderString TOTAL = new XContentBuilderString("total");
static final XContentBuilderString OTHER = new XContentBuilderString("other");
static final XContentBuilderString TERMS = new XContentBuilderString("terms");
static final XContentBuilderString TERM = new XContentBuilderString("term");
static final XContentBuilderString COUNT = new XContentBuilderString("count");
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.field(Fields._TYPE, TermsFacet.TYPE);
builder.field(Fields.MISSING, missing);
builder.field(Fields.TOTAL, total);
builder.field(Fields.OTHER, otherCount());
builder.startArray(Fields.TERMS);
for (FloatEntry entry : entries) {
builder.startObject();
builder.field(Fields.TERM, entry.term);
builder.field(Fields.COUNT, entry.count());
builder.endObject();
}
builder.endArray();
builder.endObject();
return builder;
}
public static InternalFloatTermsFacet readTermsFacet(StreamInput in) throws IOException {
InternalFloatTermsFacet facet = new InternalFloatTermsFacet();
facet.readFrom(in);
return facet;
}
@Override
public void readFrom(StreamInput in) throws IOException {
name = in.readString();
comparatorType = ComparatorType.fromId(in.readByte());
requiredSize = in.readVInt();
missing = in.readVLong();
total = in.readVLong();
int size = in.readVInt();
entries = new ArrayList<FloatEntry>(size);
for (int i = 0; i < size; i++) {
entries.add(new FloatEntry(in.readFloat(), in.readVInt()));
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeByte(comparatorType.id());
out.writeVInt(requiredSize);
out.writeVLong(missing);
out.writeVLong(total);
out.writeVInt(entries.size());
for (FloatEntry entry : entries) {
out.writeFloat(entry.term);
out.writeVInt(entry.count());
}
}
}

View File

@ -1,254 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.floats;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import gnu.trove.iterator.TFloatIntIterator;
import gnu.trove.map.hash.TFloatIntHashMap;
import gnu.trove.set.hash.TFloatHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.floats.FloatFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
/**
*
*/
public class TermsFloatFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final FieldDataType fieldDataType;
private FloatFieldData fieldData;
private final StaticAggregatorValueProc aggregator;
private final SearchScript script;
public TermsFloatFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, String scriptLang, String script, Map<String, Object> params) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms float facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.FLOAT) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't is not of float type, can't run terms float facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (script != null) {
this.script = context.scriptService().search(context.lookup(), scriptLang, script, params);
} else {
this.script = null;
}
if (this.script == null && excluded.isEmpty()) {
aggregator = new StaticAggregatorValueProc(CacheRecycler.popFloatIntMap());
} else {
aggregator = new AggregatorValueProc(CacheRecycler.popFloatIntMap(), excluded, this.script);
}
if (allTerms) {
try {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
FloatFieldData fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
fieldData.forEachValue(aggregator);
}
} catch (Exception e) {
throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
if (script != null) {
script.setScorer(scorer);
}
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
if (script != null) {
script.setNextReader(context);
}
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachValueInDoc(doc, aggregator);
}
@Override
public Facet facet() {
TFloatIntHashMap facets = aggregator.facets();
if (facets.isEmpty()) {
CacheRecycler.pushFloatIntMap(facets);
return new InternalFloatTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalFloatTermsFacet.FloatEntry>of(), aggregator.missing(), aggregator.total());
} else {
if (size < EntryPriorityQueue.LIMIT) {
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
for (TFloatIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.insertWithOverflow(new InternalFloatTermsFacet.FloatEntry(it.key(), it.value()));
}
InternalFloatTermsFacet.FloatEntry[] list = new InternalFloatTermsFacet.FloatEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalFloatTermsFacet.FloatEntry) ordered.pop();
}
CacheRecycler.pushFloatIntMap(facets);
return new InternalFloatTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total());
} else {
BoundedTreeSet<InternalFloatTermsFacet.FloatEntry> ordered = new BoundedTreeSet<InternalFloatTermsFacet.FloatEntry>(comparatorType.comparator(), size);
for (TFloatIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new InternalFloatTermsFacet.FloatEntry(it.key(), it.value()));
}
CacheRecycler.pushFloatIntMap(facets);
return new InternalFloatTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total());
}
}
}
public static class AggregatorValueProc extends StaticAggregatorValueProc {
private final SearchScript script;
private final TFloatHashSet excluded;
public AggregatorValueProc(TFloatIntHashMap facets, Set<BytesRef> excluded, SearchScript script) {
super(facets);
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TFloatHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Float.parseFloat(s.utf8ToString()));
}
}
this.script = script;
}
@Override
public void onValue(int docId, float value) {
if (excluded != null && excluded.contains(value)) {
return;
}
if (script != null) {
script.setNextDocId(docId);
script.setNextVar("term", value);
Object scriptValue = script.run();
if (scriptValue == null) {
return;
}
if (scriptValue instanceof Boolean) {
if (!((Boolean) scriptValue)) {
return;
}
} else {
value = ((Number) scriptValue).floatValue();
}
}
super.onValue(docId, value);
}
}
public static class StaticAggregatorValueProc implements FloatFieldData.ValueInDocProc, FloatFieldData.ValueProc {
private final TFloatIntHashMap facets;
private int missing;
private int total;
public StaticAggregatorValueProc(TFloatIntHashMap facets) {
this.facets = facets;
}
@Override
public void onValue(float value) {
facets.putIfAbsent(value, 0);
}
@Override
public void onValue(int docId, float value) {
facets.adjustOrPutValue(value, 1, 1);
total++;
}
@Override
public void onMissing(int docId) {
missing++;
}
public final TFloatIntHashMap facets() {
return facets;
}
public final int missing() {
return this.missing;
}
public final int total() {
return this.total;
}
}
}

View File

@ -1,267 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.floats;
import com.google.common.collect.ImmutableSet;
import gnu.trove.set.hash.TFloatHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.floats.FloatFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
*
*/
public class TermsFloatOrdinalsFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final int minCount;
private final FieldDataType fieldDataType;
private FloatFieldData fieldData;
private final List<ReaderAggregator> aggregators;
private ReaderAggregator current;
long missing;
long total;
private final TFloatHashSet excluded;
public TermsFloatOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms float facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.FLOAT) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of float type, can't run terms float facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TFloatHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Float.parseFloat(s.utf8ToString()));
}
}
// minCount is offset by -1
if (allTerms) {
minCount = -1;
} else {
minCount = 0;
}
this.aggregators = new ArrayList<ReaderAggregator>(context.searcher().getIndexReader().leaves().size());
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
if (current.values.length > 1) {
aggregators.add(current);
}
}
fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
current = new ReaderAggregator(fieldData);
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachOrdinalInDoc(doc, current);
}
@Override
public Facet facet() {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
// if we have values for this one, add it
if (current.values.length > 1) {
aggregators.add(current);
}
}
AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size());
for (ReaderAggregator aggregator : aggregators) {
if (aggregator.nextPosition()) {
queue.add(aggregator);
}
}
// YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes
if (size < EntryPriorityQueue.LIMIT) {
// optimize to use priority size
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
float value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalFloatTermsFacet.FloatEntry entry = new InternalFloatTermsFacet.FloatEntry(value, count);
ordered.insertWithOverflow(entry);
}
}
}
InternalFloatTermsFacet.FloatEntry[] list = new InternalFloatTermsFacet.FloatEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalFloatTermsFacet.FloatEntry) ordered.pop();
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalFloatTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
}
BoundedTreeSet<InternalFloatTermsFacet.FloatEntry> ordered = new BoundedTreeSet<InternalFloatTermsFacet.FloatEntry>(comparatorType.comparator(), size);
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
float value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalFloatTermsFacet.FloatEntry entry = new InternalFloatTermsFacet.FloatEntry(value, count);
ordered.add(entry);
}
}
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalFloatTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}
public static class ReaderAggregator implements FieldData.OrdinalInDocProc {
final float[] values;
final int[] counts;
int position = 0;
float current;
int total;
public ReaderAggregator(FloatFieldData fieldData) {
this.values = fieldData.values();
this.counts = CacheRecycler.popIntArray(fieldData.values().length);
}
@Override
public void onOrdinal(int docId, int ordinal) {
counts[ordinal]++;
total++;
}
public boolean nextPosition() {
if (++position >= values.length) {
return false;
}
current = values[position];
return true;
}
}
public static class AggregatorPriorityQueue extends PriorityQueue<ReaderAggregator> {
public AggregatorPriorityQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) {
return a.current < b.current;
}
}
}

View File

@ -1,308 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.ints;
import com.google.common.collect.ImmutableList;
import gnu.trove.iterator.TIntIntIterator;
import gnu.trove.map.hash.TIntIntHashMap;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.InternalTermsFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
/**
*
*/
public class InternalIntTermsFacet extends InternalTermsFacet {
private static final String STREAM_TYPE = "iTerms";
public static void registerStream() {
Streams.registerStream(STREAM, STREAM_TYPE);
}
static Stream STREAM = new Stream() {
@Override
public Facet readFacet(String type, StreamInput in) throws IOException {
return readTermsFacet(in);
}
};
@Override
public String streamType() {
return STREAM_TYPE;
}
public static class IntEntry implements Entry {
int term;
int count;
public IntEntry(int term, int count) {
this.term = term;
this.count = count;
}
public Text term() {
return new StringText(Integer.toString(term));
}
public Text getTerm() {
return term();
}
@Override
public Number termAsNumber() {
return term;
}
@Override
public Number getTermAsNumber() {
return termAsNumber();
}
public int count() {
return count;
}
public int getCount() {
return count();
}
@Override
public int compareTo(Entry o) {
int anotherVal = ((IntEntry) o).term;
int i = term - anotherVal;
if (i == 0) {
i = count - o.count();
if (i == 0) {
i = System.identityHashCode(this) - System.identityHashCode(o);
}
}
return i;
}
}
private String name;
int requiredSize;
long missing;
long total;
Collection<IntEntry> entries = ImmutableList.of();
ComparatorType comparatorType;
InternalIntTermsFacet() {
}
public InternalIntTermsFacet(String name, ComparatorType comparatorType, int requiredSize, Collection<IntEntry> entries, long missing, long total) {
this.name = name;
this.comparatorType = comparatorType;
this.requiredSize = requiredSize;
this.entries = entries;
this.missing = missing;
this.total = total;
}
@Override
public String name() {
return this.name;
}
@Override
public String getName() {
return this.name;
}
@Override
public String type() {
return TYPE;
}
@Override
public String getType() {
return type();
}
@Override
public List<IntEntry> entries() {
if (!(entries instanceof List)) {
entries = ImmutableList.copyOf(entries);
}
return (List<IntEntry>) entries;
}
@Override
public List<IntEntry> getEntries() {
return entries();
}
@SuppressWarnings({"unchecked"})
@Override
public Iterator<Entry> iterator() {
return (Iterator) entries.iterator();
}
@Override
public long missingCount() {
return this.missing;
}
@Override
public long getMissingCount() {
return missingCount();
}
@Override
public long totalCount() {
return this.total;
}
@Override
public long getTotalCount() {
return totalCount();
}
@Override
public long otherCount() {
long other = total;
for (Entry entry : entries) {
other -= entry.count();
}
return other;
}
@Override
public long getOtherCount() {
return otherCount();
}
@Override
public Facet reduce(String name, List<Facet> facets) {
if (facets.size() == 1) {
return facets.get(0);
}
InternalIntTermsFacet first = (InternalIntTermsFacet) facets.get(0);
TIntIntHashMap aggregated = CacheRecycler.popIntIntMap();
long missing = 0;
long total = 0;
for (Facet facet : facets) {
InternalIntTermsFacet mFacet = (InternalIntTermsFacet) facet;
missing += mFacet.missingCount();
total += mFacet.totalCount();
for (IntEntry entry : mFacet.entries) {
aggregated.adjustOrPutValue(entry.term, entry.count(), entry.count());
}
}
BoundedTreeSet<IntEntry> ordered = new BoundedTreeSet<IntEntry>(first.comparatorType.comparator(), first.requiredSize);
for (TIntIntIterator it = aggregated.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new IntEntry(it.key(), it.value()));
}
first.entries = ordered;
first.missing = missing;
first.total = total;
CacheRecycler.pushIntIntMap(aggregated);
return first;
}
static final class Fields {
static final XContentBuilderString _TYPE = new XContentBuilderString("_type");
static final XContentBuilderString MISSING = new XContentBuilderString("missing");
static final XContentBuilderString TOTAL = new XContentBuilderString("total");
static final XContentBuilderString OTHER = new XContentBuilderString("other");
static final XContentBuilderString TERMS = new XContentBuilderString("terms");
static final XContentBuilderString TERM = new XContentBuilderString("term");
static final XContentBuilderString COUNT = new XContentBuilderString("count");
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.field(Fields._TYPE, TermsFacet.TYPE);
builder.field(Fields.MISSING, missing);
builder.field(Fields.TOTAL, total);
builder.field(Fields.OTHER, otherCount());
builder.startArray(Fields.TERMS);
for (IntEntry entry : entries) {
builder.startObject();
builder.field(Fields.TERM, entry.term);
builder.field(Fields.COUNT, entry.count());
builder.endObject();
}
builder.endArray();
builder.endObject();
return builder;
}
public static InternalIntTermsFacet readTermsFacet(StreamInput in) throws IOException {
InternalIntTermsFacet facet = new InternalIntTermsFacet();
facet.readFrom(in);
return facet;
}
@Override
public void readFrom(StreamInput in) throws IOException {
name = in.readString();
comparatorType = ComparatorType.fromId(in.readByte());
requiredSize = in.readVInt();
missing = in.readVLong();
total = in.readVLong();
int size = in.readVInt();
entries = new ArrayList<IntEntry>(size);
for (int i = 0; i < size; i++) {
entries.add(new IntEntry(in.readInt(), in.readVInt()));
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeByte(comparatorType.id());
out.writeVInt(requiredSize);
out.writeVLong(missing);
out.writeVLong(total);
out.writeVInt(entries.size());
for (IntEntry entry : entries) {
out.writeInt(entry.term);
out.writeVInt(entry.count());
}
}
}

View File

@ -1,254 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.ints;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import gnu.trove.iterator.TIntIntIterator;
import gnu.trove.map.hash.TIntIntHashMap;
import gnu.trove.set.hash.TIntHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.ints.IntFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
/**
*
*/
public class TermsIntFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final FieldDataType fieldDataType;
private IntFieldData fieldData;
private final StaticAggregatorValueProc aggregator;
private final SearchScript script;
public TermsIntFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, String scriptLang, String script, Map<String, Object> params) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms int facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.INT) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of int type, can't run terms int facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (script != null) {
this.script = context.scriptService().search(context.lookup(), scriptLang, script, params);
} else {
this.script = null;
}
if (this.script == null && excluded.isEmpty()) {
aggregator = new StaticAggregatorValueProc(CacheRecycler.popIntIntMap());
} else {
aggregator = new AggregatorValueProc(CacheRecycler.popIntIntMap(), excluded, this.script);
}
if (allTerms) {
try {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
IntFieldData fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
fieldData.forEachValue(aggregator);
}
} catch (Exception e) {
throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
if (script != null) {
script.setScorer(scorer);
}
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
if (script != null) {
script.setNextReader(context);
}
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachValueInDoc(doc, aggregator);
}
@Override
public Facet facet() {
TIntIntHashMap facets = aggregator.facets();
if (facets.isEmpty()) {
CacheRecycler.pushIntIntMap(facets);
return new InternalIntTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalIntTermsFacet.IntEntry>of(), aggregator.missing(), aggregator.total());
} else {
if (size < EntryPriorityQueue.LIMIT) {
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
for (TIntIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.insertWithOverflow(new InternalIntTermsFacet.IntEntry(it.key(), it.value()));
}
InternalIntTermsFacet.IntEntry[] list = new InternalIntTermsFacet.IntEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalIntTermsFacet.IntEntry) ordered.pop();
}
CacheRecycler.pushIntIntMap(facets);
return new InternalIntTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total());
} else {
BoundedTreeSet<InternalIntTermsFacet.IntEntry> ordered = new BoundedTreeSet<InternalIntTermsFacet.IntEntry>(comparatorType.comparator(), size);
for (TIntIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new InternalIntTermsFacet.IntEntry(it.key(), it.value()));
}
CacheRecycler.pushIntIntMap(facets);
return new InternalIntTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total());
}
}
}
public static class AggregatorValueProc extends StaticAggregatorValueProc {
private final SearchScript script;
private final TIntHashSet excluded;
public AggregatorValueProc(TIntIntHashMap facets, Set<BytesRef> excluded, SearchScript script) {
super(facets);
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TIntHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Integer.parseInt(s.utf8ToString()));
}
}
this.script = script;
}
@Override
public void onValue(int docId, int value) {
if (excluded != null && excluded.contains(value)) {
return;
}
if (script != null) {
script.setNextDocId(docId);
script.setNextVar("term", value);
Object scriptValue = script.run();
if (scriptValue == null) {
return;
}
if (scriptValue instanceof Boolean) {
if (!((Boolean) scriptValue)) {
return;
}
} else {
value = ((Number) scriptValue).intValue();
}
}
super.onValue(docId, value);
}
}
public static class StaticAggregatorValueProc implements IntFieldData.ValueInDocProc, IntFieldData.ValueProc {
private final TIntIntHashMap facets;
private int missing;
private int total;
public StaticAggregatorValueProc(TIntIntHashMap facets) {
this.facets = facets;
}
@Override
public void onValue(int value) {
facets.putIfAbsent(value, 0);
}
@Override
public void onValue(int docId, int value) {
facets.adjustOrPutValue(value, 1, 1);
total++;
}
@Override
public void onMissing(int docId) {
missing++;
}
public final TIntIntHashMap facets() {
return facets;
}
public final int missing() {
return this.missing;
}
public final int total() {
return this.total;
}
}
}

View File

@ -1,267 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.ints;
import com.google.common.collect.ImmutableSet;
import gnu.trove.set.hash.TIntHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.ints.IntFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
*
*/
public class TermsIntOrdinalsFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final int minCount;
private final FieldDataType fieldDataType;
private IntFieldData fieldData;
private final List<ReaderAggregator> aggregators;
private ReaderAggregator current;
long missing;
long total;
private final TIntHashSet excluded;
public TermsIntOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms int facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.INT) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of int type, can't run terms int facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TIntHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Integer.parseInt(s.utf8ToString()));
}
}
// minCount is offset by -1
if (allTerms) {
minCount = -1;
} else {
minCount = 0;
}
this.aggregators = new ArrayList<ReaderAggregator>(context.searcher().getIndexReader().leaves().size());
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
if (current.values.length > 1) {
aggregators.add(current);
}
}
fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
current = new ReaderAggregator(fieldData);
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachOrdinalInDoc(doc, current);
}
@Override
public Facet facet() {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
// if we have values for this one, add it
if (current.values.length > 1) {
aggregators.add(current);
}
}
AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size());
for (ReaderAggregator aggregator : aggregators) {
if (aggregator.nextPosition()) {
queue.add(aggregator);
}
}
// YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes
if (size < EntryPriorityQueue.LIMIT) {
// optimize to use priority size
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
int value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalIntTermsFacet.IntEntry entry = new InternalIntTermsFacet.IntEntry(value, count);
ordered.insertWithOverflow(entry);
}
}
}
InternalIntTermsFacet.IntEntry[] list = new InternalIntTermsFacet.IntEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalIntTermsFacet.IntEntry) ordered.pop();
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalIntTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
}
BoundedTreeSet<InternalIntTermsFacet.IntEntry> ordered = new BoundedTreeSet<InternalIntTermsFacet.IntEntry>(comparatorType.comparator(), size);
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
int value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalIntTermsFacet.IntEntry entry = new InternalIntTermsFacet.IntEntry(value, count);
ordered.add(entry);
}
}
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalIntTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}
public static class ReaderAggregator implements FieldData.OrdinalInDocProc {
final int[] values;
final int[] counts;
int position = 0;
int current;
int total = 0;
public ReaderAggregator(IntFieldData fieldData) {
this.values = fieldData.values();
this.counts = CacheRecycler.popIntArray(fieldData.values().length);
}
@Override
public void onOrdinal(int docId, int ordinal) {
counts[ordinal]++;
total++;
}
public boolean nextPosition() {
if (++position >= values.length) {
return false;
}
current = values[position];
return true;
}
}
public static class AggregatorPriorityQueue extends PriorityQueue<ReaderAggregator> {
public AggregatorPriorityQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) {
return a.current < b.current;
}
}
}

View File

@ -1,313 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.ip;
import com.google.common.collect.ImmutableList;
import gnu.trove.iterator.TLongIntIterator;
import gnu.trove.map.hash.TLongIntHashMap;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import org.elasticsearch.index.mapper.ip.IpFieldMapper;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.InternalTermsFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
/**
*
*/
public class InternalIpTermsFacet extends InternalTermsFacet {
private static final String STREAM_TYPE = "ipTerms";
public static void registerStream() {
Streams.registerStream(STREAM, STREAM_TYPE);
}
static Stream STREAM = new Stream() {
@Override
public Facet readFacet(String type, StreamInput in) throws IOException {
return readTermsFacet(in);
}
};
@Override
public String streamType() {
return STREAM_TYPE;
}
public static class LongEntry implements Entry {
long term;
int count;
public LongEntry(long term, int count) {
this.term = term;
this.count = count;
}
public Text term() {
return new StringText(IpFieldMapper.longToIp(term));
}
public Text getTerm() {
return term();
}
@Override
public Number termAsNumber() {
return term;
}
@Override
public Number getTermAsNumber() {
return termAsNumber();
}
public int count() {
return count;
}
public int getCount() {
return count();
}
@Override
public int compareTo(Entry o) {
long anotherVal = ((LongEntry) o).term;
if (term < anotherVal) {
return -1;
}
if (term == anotherVal) {
int i = count - o.count();
if (i == 0) {
i = System.identityHashCode(this) - System.identityHashCode(o);
}
return i;
}
return 1;
}
}
private String name;
int requiredSize;
long missing;
long total;
Collection<LongEntry> entries = ImmutableList.of();
ComparatorType comparatorType;
InternalIpTermsFacet() {
}
public InternalIpTermsFacet(String name, ComparatorType comparatorType, int requiredSize, Collection<LongEntry> entries, long missing, long total) {
this.name = name;
this.comparatorType = comparatorType;
this.requiredSize = requiredSize;
this.entries = entries;
this.missing = missing;
this.total = total;
}
@Override
public String name() {
return this.name;
}
@Override
public String getName() {
return this.name;
}
@Override
public String type() {
return TYPE;
}
@Override
public String getType() {
return type();
}
@Override
public List<LongEntry> entries() {
if (!(entries instanceof List)) {
entries = ImmutableList.copyOf(entries);
}
return (List<LongEntry>) entries;
}
@Override
public List<LongEntry> getEntries() {
return entries();
}
@SuppressWarnings({"unchecked"})
@Override
public Iterator<Entry> iterator() {
return (Iterator) entries.iterator();
}
@Override
public long missingCount() {
return this.missing;
}
@Override
public long getMissingCount() {
return missingCount();
}
@Override
public long totalCount() {
return this.total;
}
@Override
public long getTotalCount() {
return totalCount();
}
@Override
public long otherCount() {
long other = total;
for (Entry entry : entries) {
other -= entry.count();
}
return other;
}
@Override
public long getOtherCount() {
return otherCount();
}
@Override
public Facet reduce(String name, List<Facet> facets) {
if (facets.size() == 1) {
return facets.get(0);
}
InternalIpTermsFacet first = (InternalIpTermsFacet) facets.get(0);
TLongIntHashMap aggregated = CacheRecycler.popLongIntMap();
long missing = 0;
long total = 0;
for (Facet facet : facets) {
InternalIpTermsFacet mFacet = (InternalIpTermsFacet) facet;
missing += mFacet.missingCount();
total += mFacet.totalCount();
for (LongEntry entry : mFacet.entries) {
aggregated.adjustOrPutValue(entry.term, entry.count(), entry.count());
}
}
BoundedTreeSet<LongEntry> ordered = new BoundedTreeSet<LongEntry>(first.comparatorType.comparator(), first.requiredSize);
for (TLongIntIterator it = aggregated.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new LongEntry(it.key(), it.value()));
}
first.entries = ordered;
first.missing = missing;
first.total = total;
CacheRecycler.pushLongIntMap(aggregated);
return first;
}
static final class Fields {
static final XContentBuilderString _TYPE = new XContentBuilderString("_type");
static final XContentBuilderString MISSING = new XContentBuilderString("missing");
static final XContentBuilderString TOTAL = new XContentBuilderString("total");
static final XContentBuilderString OTHER = new XContentBuilderString("other");
static final XContentBuilderString TERMS = new XContentBuilderString("terms");
static final XContentBuilderString TERM = new XContentBuilderString("term");
static final XContentBuilderString COUNT = new XContentBuilderString("count");
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.field(Fields._TYPE, TermsFacet.TYPE);
builder.field(Fields.MISSING, missing);
builder.field(Fields.TOTAL, total);
builder.field(Fields.OTHER, otherCount());
builder.startArray(Fields.TERMS);
for (LongEntry entry : entries) {
builder.startObject();
builder.field(Fields.TERM, entry.term()); // displayed as string
builder.field(Fields.COUNT, entry.count());
builder.endObject();
}
builder.endArray();
builder.endObject();
return builder;
}
public static InternalIpTermsFacet readTermsFacet(StreamInput in) throws IOException {
InternalIpTermsFacet facet = new InternalIpTermsFacet();
facet.readFrom(in);
return facet;
}
@Override
public void readFrom(StreamInput in) throws IOException {
name = in.readString();
comparatorType = ComparatorType.fromId(in.readByte());
requiredSize = in.readVInt();
missing = in.readVLong();
total = in.readVLong();
int size = in.readVInt();
entries = new ArrayList<LongEntry>(size);
for (int i = 0; i < size; i++) {
entries.add(new LongEntry(in.readLong(), in.readVInt()));
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeByte(comparatorType.id());
out.writeVInt(requiredSize);
out.writeVLong(missing);
out.writeVLong(total);
out.writeVInt(entries.size());
for (LongEntry entry : entries) {
out.writeLong(entry.term);
out.writeVInt(entry.count());
}
}
}

View File

@ -1,237 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.ip;
import com.google.common.collect.ImmutableList;
import gnu.trove.iterator.TLongIntIterator;
import gnu.trove.map.hash.TLongIntHashMap;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.longs.LongFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
/**
*
*/
public class TermsIpFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final FieldDataType fieldDataType;
private LongFieldData fieldData;
private final StaticAggregatorValueProc aggregator;
private final SearchScript script;
public TermsIpFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
String scriptLang, String script, Map<String, Object> params) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms long facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.LONG) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of long type, can't run terms long facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (script != null) {
this.script = context.scriptService().search(context.lookup(), scriptLang, script, params);
} else {
this.script = null;
}
if (this.script == null) {
aggregator = new StaticAggregatorValueProc(CacheRecycler.popLongIntMap());
} else {
aggregator = new AggregatorValueProc(CacheRecycler.popLongIntMap(), this.script);
}
if (allTerms) {
try {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
LongFieldData fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
fieldData.forEachValue(aggregator);
}
} catch (Exception e) {
throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
if (script != null) {
script.setScorer(scorer);
}
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
if (script != null) {
script.setNextReader(context);
}
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachValueInDoc(doc, aggregator);
}
@Override
public Facet facet() {
TLongIntHashMap facets = aggregator.facets();
if (facets.isEmpty()) {
CacheRecycler.pushLongIntMap(facets);
return new InternalIpTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalIpTermsFacet.LongEntry>of(), aggregator.missing(), aggregator.total());
} else {
if (size < EntryPriorityQueue.LIMIT) {
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
for (TLongIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.insertWithOverflow(new InternalIpTermsFacet.LongEntry(it.key(), it.value()));
}
InternalIpTermsFacet.LongEntry[] list = new InternalIpTermsFacet.LongEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalIpTermsFacet.LongEntry) ordered.pop();
}
CacheRecycler.pushLongIntMap(facets);
return new InternalIpTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total());
} else {
BoundedTreeSet<InternalIpTermsFacet.LongEntry> ordered = new BoundedTreeSet<InternalIpTermsFacet.LongEntry>(comparatorType.comparator(), size);
for (TLongIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new InternalIpTermsFacet.LongEntry(it.key(), it.value()));
}
CacheRecycler.pushLongIntMap(facets);
return new InternalIpTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total());
}
}
}
public static class AggregatorValueProc extends StaticAggregatorValueProc {
private final SearchScript script;
public AggregatorValueProc(TLongIntHashMap facets, SearchScript script) {
super(facets);
this.script = script;
}
@Override
public void onValue(int docId, long value) {
if (script != null) {
script.setNextDocId(docId);
script.setNextVar("term", value);
Object scriptValue = script.run();
if (scriptValue == null) {
return;
}
if (scriptValue instanceof Boolean) {
if (!((Boolean) scriptValue)) {
return;
}
} else {
value = ((Number) scriptValue).longValue();
}
}
super.onValue(docId, value);
}
}
public static class StaticAggregatorValueProc implements LongFieldData.ValueInDocProc, LongFieldData.ValueProc {
private final TLongIntHashMap facets;
private int missing;
private int total;
public StaticAggregatorValueProc(TLongIntHashMap facets) {
this.facets = facets;
}
@Override
public void onValue(long value) {
facets.putIfAbsent(value, 0);
}
@Override
public void onValue(int docId, long value) {
facets.adjustOrPutValue(value, 1, 1);
total++;
}
@Override
public void onMissing(int docId) {
missing++;
}
public final TLongIntHashMap facets() {
return facets;
}
public final int missing() {
return this.missing;
}
public final int total() {
return this.total;
}
}
}

View File

@ -1,266 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.ip;
import com.google.common.collect.ImmutableSet;
import gnu.trove.set.hash.TLongHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.longs.LongFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
*
*/
public class TermsIpOrdinalsFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final int minCount;
private final FieldDataType fieldDataType;
private LongFieldData fieldData;
private final List<ReaderAggregator> aggregators;
private ReaderAggregator current;
long missing;
long total;
private final TLongHashSet excluded;
public TermsIpOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<String> excluded) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms long facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.LONG) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of long type, can't run terms long facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TLongHashSet(excluded.size());
for (String s : excluded) {
this.excluded.add(Long.parseLong(s));
}
}
// minCount is offset by -1
if (allTerms) {
minCount = -1;
} else {
minCount = 0;
}
this.aggregators = new ArrayList<ReaderAggregator>(context.searcher().getIndexReader().leaves().size());
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
if (current.values.length > 1) {
aggregators.add(current);
}
}
fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
current = new ReaderAggregator(fieldData);
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachOrdinalInDoc(doc, current);
}
@Override
public Facet facet() {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
// if we have values for this one, add it
if (current.values.length > 1) {
aggregators.add(current);
}
}
AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size());
for (ReaderAggregator aggregator : aggregators) {
if (aggregator.nextPosition()) {
queue.add(aggregator);
}
}
// YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes
if (size < EntryPriorityQueue.LIMIT) {
// optimize to use priority size
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
long value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalIpTermsFacet.LongEntry entry = new InternalIpTermsFacet.LongEntry(value, count);
ordered.insertWithOverflow(entry);
}
}
}
InternalIpTermsFacet.LongEntry[] list = new InternalIpTermsFacet.LongEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalIpTermsFacet.LongEntry) ordered.pop();
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalIpTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
}
BoundedTreeSet<InternalIpTermsFacet.LongEntry> ordered = new BoundedTreeSet<InternalIpTermsFacet.LongEntry>(comparatorType.comparator(), size);
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
long value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalIpTermsFacet.LongEntry entry = new InternalIpTermsFacet.LongEntry(value, count);
ordered.add(entry);
}
}
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalIpTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}
public static class ReaderAggregator implements FieldData.OrdinalInDocProc {
final long[] values;
final int[] counts;
int position = 0;
long current = Integer.MIN_VALUE;
int total;
public ReaderAggregator(LongFieldData fieldData) {
this.values = fieldData.values();
this.counts = CacheRecycler.popIntArray(fieldData.values().length);
}
@Override
public void onOrdinal(int docId, int ordinal) {
counts[ordinal]++;
total++;
}
public boolean nextPosition() {
if (++position >= values.length) {
return false;
}
current = values[position];
return true;
}
}
public static class AggregatorPriorityQueue extends PriorityQueue<ReaderAggregator> {
public AggregatorPriorityQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) {
return a.current < b.current;
}
}
}

View File

@ -24,46 +24,30 @@ import com.google.common.collect.ImmutableSet;
import gnu.trove.iterator.TLongIntIterator;
import gnu.trove.map.hash.TLongIntHashMap;
import gnu.trove.set.hash.TLongHashSet;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.util.concurrent.ThreadLocals;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.longs.LongFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.LongValues;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.*;
import java.util.Arrays;
import java.util.Set;
/**
*
*/
public class TermsLongFacetCollector extends AbstractFacetCollector {
static ThreadLocal<ThreadLocals.CleanableValue<Deque<TLongIntHashMap>>> cache = new ThreadLocal<ThreadLocals.CleanableValue<Deque<TLongIntHashMap>>>() {
@Override
protected ThreadLocals.CleanableValue<Deque<TLongIntHashMap>> initialValue() {
return new ThreadLocals.CleanableValue<Deque<TLongIntHashMap>>(new ArrayDeque<TLongIntHashMap>());
}
};
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final IndexNumericFieldData indexFieldData;
private final TermsFacet.ComparatorType comparatorType;
@ -71,43 +55,20 @@ public class TermsLongFacetCollector extends AbstractFacetCollector {
private final int numberOfShards;
private final FieldDataType fieldDataType;
private LongFieldData fieldData;
private LongValues values;
private final StaticAggregatorValueProc aggregator;
private final SearchScript script;
public TermsLongFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, String scriptLang, String script, Map<String, Object> params) {
public TermsLongFacetCollector(String facetName, IndexNumericFieldData indexFieldData, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, SearchScript script) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.indexFieldData = indexFieldData;
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms long facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.LONG) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of long type, can't run terms long facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (script != null) {
this.script = context.scriptService().search(context.lookup(), scriptLang, script, params);
} else {
this.script = null;
}
this.script = script;
if (this.script == null && excluded.isEmpty()) {
aggregator = new StaticAggregatorValueProc(CacheRecycler.popLongIntMap());
@ -115,16 +76,17 @@ public class TermsLongFacetCollector extends AbstractFacetCollector {
aggregator = new AggregatorValueProc(CacheRecycler.popLongIntMap(), excluded, this.script);
}
if (allTerms) {
try {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
LongFieldData fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
fieldData.forEachValue(aggregator);
}
} catch (Exception e) {
throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
}
}
// TODO: we need to support this with the new field data....
// if (allTerms) {
// try {
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
// LongFieldData fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
// fieldData.forEachValue(aggregator);
// }
// } catch (Exception e) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
// }
// }
}
@Override
@ -136,7 +98,7 @@ public class TermsLongFacetCollector extends AbstractFacetCollector {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
values = indexFieldData.load(context).getLongValues();
if (script != null) {
script.setNextReader(context);
}
@ -144,7 +106,7 @@ public class TermsLongFacetCollector extends AbstractFacetCollector {
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachValueInDoc(doc, aggregator);
values.forEachValueInDoc(doc, aggregator);
}
@Override
@ -221,7 +183,7 @@ public class TermsLongFacetCollector extends AbstractFacetCollector {
}
}
public static class StaticAggregatorValueProc implements LongFieldData.ValueInDocProc, LongFieldData.ValueProc {
public static class StaticAggregatorValueProc implements LongValues.ValueInDocProc {
private final TLongIntHashMap facets;
@ -232,11 +194,6 @@ public class TermsLongFacetCollector extends AbstractFacetCollector {
this.facets = facets;
}
@Override
public void onValue(long value) {
facets.putIfAbsent(value, 0);
}
@Override
public void onValue(int docId, long value) {
facets.adjustOrPutValue(value, 1, 1);

View File

@ -1,267 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.longs;
import com.google.common.collect.ImmutableSet;
import gnu.trove.set.hash.TLongHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.longs.LongFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
*
*/
public class TermsLongOrdinalsFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final int minCount;
private final FieldDataType fieldDataType;
private LongFieldData fieldData;
private final List<ReaderAggregator> aggregators;
private ReaderAggregator current;
long missing;
long total;
private final TLongHashSet excluded;
public TermsLongOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms long facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.LONG) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of long type, can't run terms long facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TLongHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Long.parseLong(s.utf8ToString()));
}
}
// minCount is offset by -1
if (allTerms) {
minCount = -1;
} else {
minCount = 0;
}
this.aggregators = new ArrayList<ReaderAggregator>(context.searcher().getIndexReader().leaves().size());
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
if (current.values.length > 1) {
aggregators.add(current);
}
}
fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
current = new ReaderAggregator(fieldData);
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachOrdinalInDoc(doc, current);
}
@Override
public Facet facet() {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
// if we have values for this one, add it
if (current.values.length > 1) {
aggregators.add(current);
}
}
AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size());
for (ReaderAggregator aggregator : aggregators) {
if (aggregator.nextPosition()) {
queue.add(aggregator);
}
}
// YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes
if (size < EntryPriorityQueue.LIMIT) {
// optimize to use priority size
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
long value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalLongTermsFacet.LongEntry entry = new InternalLongTermsFacet.LongEntry(value, count);
ordered.insertWithOverflow(entry);
}
}
}
InternalLongTermsFacet.LongEntry[] list = new InternalLongTermsFacet.LongEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalLongTermsFacet.LongEntry) ordered.pop();
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalLongTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
}
BoundedTreeSet<InternalLongTermsFacet.LongEntry> ordered = new BoundedTreeSet<InternalLongTermsFacet.LongEntry>(comparatorType.comparator(), size);
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
long value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalLongTermsFacet.LongEntry entry = new InternalLongTermsFacet.LongEntry(value, count);
ordered.add(entry);
}
}
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalLongTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}
public static class ReaderAggregator implements FieldData.OrdinalInDocProc {
final long[] values;
final int[] counts;
int position = 0;
long current = Integer.MIN_VALUE;
int total;
public ReaderAggregator(LongFieldData fieldData) {
this.values = fieldData.values();
this.counts = CacheRecycler.popIntArray(fieldData.values().length);
}
@Override
public void onOrdinal(int docId, int ordinal) {
counts[ordinal]++;
total++;
}
public boolean nextPosition() {
if (++position >= values.length) {
return false;
}
current = values[position];
return true;
}
}
public static class AggregatorPriorityQueue extends PriorityQueue<ReaderAggregator> {
public AggregatorPriorityQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) {
return a.current < b.current;
}
}
}

View File

@ -1,308 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.shorts;
import com.google.common.collect.ImmutableList;
import gnu.trove.iterator.TShortIntIterator;
import gnu.trove.map.hash.TShortIntHashMap;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.InternalTermsFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
/**
*
*/
public class InternalShortTermsFacet extends InternalTermsFacet {
private static final String STREAM_TYPE = "sTerms";
public static void registerStream() {
Streams.registerStream(STREAM, STREAM_TYPE);
}
static Stream STREAM = new Stream() {
@Override
public Facet readFacet(String type, StreamInput in) throws IOException {
return readTermsFacet(in);
}
};
@Override
public String streamType() {
return STREAM_TYPE;
}
public static class ShortEntry implements Entry {
short term;
int count;
public ShortEntry(short term, int count) {
this.term = term;
this.count = count;
}
public Text term() {
return new StringText(Short.toString(term));
}
public Text getTerm() {
return term();
}
@Override
public Number termAsNumber() {
return term;
}
@Override
public Number getTermAsNumber() {
return termAsNumber();
}
public int count() {
return count;
}
public int getCount() {
return count();
}
@Override
public int compareTo(Entry o) {
short anotherVal = ((ShortEntry) o).term;
int i = term - anotherVal;
if (i == 0) {
i = count - o.count();
if (i == 0) {
i = System.identityHashCode(this) - System.identityHashCode(o);
}
}
return i;
}
}
private String name;
int requiredSize;
long missing;
long total;
Collection<ShortEntry> entries = ImmutableList.of();
ComparatorType comparatorType;
InternalShortTermsFacet() {
}
public InternalShortTermsFacet(String name, ComparatorType comparatorType, int requiredSize, Collection<ShortEntry> entries, long missing, long total) {
this.name = name;
this.comparatorType = comparatorType;
this.requiredSize = requiredSize;
this.entries = entries;
this.missing = missing;
this.total = total;
}
@Override
public String name() {
return this.name;
}
@Override
public String getName() {
return this.name;
}
@Override
public String type() {
return TYPE;
}
@Override
public String getType() {
return type();
}
@Override
public List<ShortEntry> entries() {
if (!(entries instanceof List)) {
entries = ImmutableList.copyOf(entries);
}
return (List<ShortEntry>) entries;
}
@Override
public List<ShortEntry> getEntries() {
return entries();
}
@SuppressWarnings({"unchecked"})
@Override
public Iterator<Entry> iterator() {
return (Iterator) entries.iterator();
}
@Override
public long missingCount() {
return this.missing;
}
@Override
public long getMissingCount() {
return missingCount();
}
@Override
public long totalCount() {
return this.total;
}
@Override
public long getTotalCount() {
return totalCount();
}
@Override
public long otherCount() {
long other = total;
for (Entry entry : entries) {
other -= entry.count();
}
return other;
}
@Override
public long getOtherCount() {
return otherCount();
}
@Override
public Facet reduce(String name, List<Facet> facets) {
if (facets.size() == 1) {
return facets.get(0);
}
InternalShortTermsFacet first = (InternalShortTermsFacet) facets.get(0);
TShortIntHashMap aggregated = CacheRecycler.popShortIntMap();
long missing = 0;
long total = 0;
for (Facet facet : facets) {
InternalShortTermsFacet mFacet = (InternalShortTermsFacet) facet;
missing += mFacet.missingCount();
total += mFacet.totalCount();
for (ShortEntry entry : mFacet.entries) {
aggregated.adjustOrPutValue(entry.term, entry.count(), entry.count());
}
}
BoundedTreeSet<ShortEntry> ordered = new BoundedTreeSet<ShortEntry>(first.comparatorType.comparator(), first.requiredSize);
for (TShortIntIterator it = aggregated.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new ShortEntry(it.key(), it.value()));
}
first.entries = ordered;
first.missing = missing;
first.total = total;
CacheRecycler.pushShortIntMap(aggregated);
return first;
}
static final class Fields {
static final XContentBuilderString _TYPE = new XContentBuilderString("_type");
static final XContentBuilderString MISSING = new XContentBuilderString("missing");
static final XContentBuilderString TOTAL = new XContentBuilderString("total");
static final XContentBuilderString OTHER = new XContentBuilderString("other");
static final XContentBuilderString TERMS = new XContentBuilderString("terms");
static final XContentBuilderString TERM = new XContentBuilderString("term");
static final XContentBuilderString COUNT = new XContentBuilderString("count");
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.field(Fields._TYPE, TermsFacet.TYPE);
builder.field(Fields.MISSING, missing);
builder.field(Fields.TOTAL, total);
builder.field(Fields.OTHER, otherCount());
builder.startArray(Fields.TERMS);
for (ShortEntry entry : entries) {
builder.startObject();
builder.field(Fields.TERM, entry.term);
builder.field(Fields.COUNT, entry.count());
builder.endObject();
}
builder.endArray();
builder.endObject();
return builder;
}
public static InternalShortTermsFacet readTermsFacet(StreamInput in) throws IOException {
InternalShortTermsFacet facet = new InternalShortTermsFacet();
facet.readFrom(in);
return facet;
}
@Override
public void readFrom(StreamInput in) throws IOException {
name = in.readString();
comparatorType = ComparatorType.fromId(in.readByte());
requiredSize = in.readVInt();
missing = in.readVLong();
total = in.readVLong();
int size = in.readVInt();
entries = new ArrayList<ShortEntry>(size);
for (int i = 0; i < size; i++) {
entries.add(new ShortEntry(in.readShort(), in.readVInt()));
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeByte(comparatorType.id());
out.writeVInt(requiredSize);
out.writeVLong(missing);
out.writeVLong(total);
out.writeVInt(entries.size());
for (ShortEntry entry : entries) {
out.writeShort(entry.term);
out.writeVInt(entry.count());
}
}
}

View File

@ -1,255 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.shorts;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import gnu.trove.iterator.TShortIntIterator;
import gnu.trove.map.hash.TShortIntHashMap;
import gnu.trove.set.hash.TShortHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.shorts.ShortFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
/**
*
*/
public class TermsShortFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final FieldDataType fieldDataType;
private ShortFieldData fieldData;
private final StaticAggregatorValueProc aggregator;
private final SearchScript script;
public TermsShortFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, String scriptLang, String script, Map<String, Object> params) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms short facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.SHORT) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of short type, can't run terms short facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (script != null) {
this.script = context.scriptService().search(context.lookup(), scriptLang, script, params);
} else {
this.script = null;
}
if (this.script == null && excluded.isEmpty()) {
aggregator = new StaticAggregatorValueProc(CacheRecycler.popShortIntMap());
} else {
aggregator = new AggregatorValueProc(CacheRecycler.popShortIntMap(), excluded, this.script);
}
if (allTerms) {
try {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
ShortFieldData fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
fieldData.forEachValue(aggregator);
}
} catch (Exception e) {
throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
if (script != null) {
script.setScorer(scorer);
}
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
if (script != null) {
script.setNextReader(context);
}
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachValueInDoc(doc, aggregator);
}
@Override
public Facet facet() {
TShortIntHashMap facets = aggregator.facets();
if (facets.isEmpty()) {
CacheRecycler.pushShortIntMap(facets);
return new InternalShortTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalShortTermsFacet.ShortEntry>of(), aggregator.missing(), aggregator.total());
} else {
if (size < EntryPriorityQueue.LIMIT) {
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
for (TShortIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.insertWithOverflow(new InternalShortTermsFacet.ShortEntry(it.key(), it.value()));
}
InternalShortTermsFacet.ShortEntry[] list = new InternalShortTermsFacet.ShortEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalShortTermsFacet.ShortEntry) ordered.pop();
}
CacheRecycler.pushShortIntMap(facets);
return new InternalShortTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total());
} else {
BoundedTreeSet<InternalShortTermsFacet.ShortEntry> ordered = new BoundedTreeSet<InternalShortTermsFacet.ShortEntry>(comparatorType.comparator(), size);
for (TShortIntIterator it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new InternalShortTermsFacet.ShortEntry(it.key(), it.value()));
}
CacheRecycler.pushShortIntMap(facets);
return new InternalShortTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total());
}
}
}
public static class AggregatorValueProc extends StaticAggregatorValueProc {
private final SearchScript script;
private final TShortHashSet excluded;
public AggregatorValueProc(TShortIntHashMap facets, Set<BytesRef> excluded, SearchScript script) {
super(facets);
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TShortHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Short.parseShort(s.utf8ToString()));
}
}
this.script = script;
}
@Override
public void onValue(int docId, short value) {
if (excluded != null && excluded.contains(value)) {
return;
}
if (script != null) {
script.setNextDocId(docId);
script.setNextVar("term", value);
Object scriptValue = script.run();
if (scriptValue == null) {
return;
}
if (scriptValue instanceof Boolean) {
if (!((Boolean) scriptValue)) {
return;
}
} else {
value = ((Number) scriptValue).shortValue();
}
}
super.onValue(docId, value);
}
}
public static class StaticAggregatorValueProc implements ShortFieldData.ValueInDocProc, ShortFieldData.ValueProc {
private final TShortIntHashMap facets;
private int missing;
private int total;
public StaticAggregatorValueProc(TShortIntHashMap facets) {
this.facets = facets;
}
@Override
public void onValue(short value) {
facets.putIfAbsent(value, 0);
}
@Override
public void onValue(int docId, short value) {
facets.adjustOrPutValue(value, 1, 1);
total++;
}
@Override
public void onMissing(int docId) {
missing++;
}
public final TShortIntHashMap facets() {
return facets;
}
public final int missing() {
return this.missing;
}
public final int total() {
return this.total;
}
}
}

View File

@ -1,267 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facet.terms.shorts;
import com.google.common.collect.ImmutableSet;
import gnu.trove.set.hash.TShortHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.shorts.ShortFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
*
*/
public class TermsShortOrdinalsFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final TermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final int minCount;
private final FieldDataType fieldDataType;
private ShortFieldData fieldData;
private final List<ReaderAggregator> aggregators;
private ReaderAggregator current;
long missing;
long total;
private final TShortHashSet excluded;
public TermsShortOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms short facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.SHORT) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of short type, can't run terms short facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
this.excluded = new TShortHashSet(excluded.size());
for (BytesRef s : excluded) {
this.excluded.add(Short.parseShort(s.utf8ToString()));
}
}
// minCount is offset by -1
if (allTerms) {
minCount = -1;
} else {
minCount = 0;
}
this.aggregators = new ArrayList<ReaderAggregator>(context.searcher().getIndexReader().leaves().size());
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
if (current.values.length > 1) {
aggregators.add(current);
}
}
fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
current = new ReaderAggregator(fieldData);
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachOrdinalInDoc(doc, current);
}
@Override
public Facet facet() {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
// if we have values for this one, add it
if (current.values.length > 1) {
aggregators.add(current);
}
}
AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size());
for (ReaderAggregator aggregator : aggregators) {
if (aggregator.nextPosition()) {
queue.add(aggregator);
}
}
// YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes
if (size < EntryPriorityQueue.LIMIT) {
// optimize to use priority size
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
short value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalShortTermsFacet.ShortEntry entry = new InternalShortTermsFacet.ShortEntry(value, count);
ordered.insertWithOverflow(entry);
}
}
}
InternalShortTermsFacet.ShortEntry[] list = new InternalShortTermsFacet.ShortEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (InternalShortTermsFacet.ShortEntry) ordered.pop();
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalShortTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
}
BoundedTreeSet<InternalShortTermsFacet.ShortEntry> ordered = new BoundedTreeSet<InternalShortTermsFacet.ShortEntry>(comparatorType.comparator(), size);
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
short value = agg.current;
int count = 0;
do {
count += agg.counts[agg.position];
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
// we are done with this reader
queue.pop();
agg = queue.top();
}
} while (agg != null && value == agg.current);
if (count > minCount) {
if (excluded == null || !excluded.contains(value)) {
InternalShortTermsFacet.ShortEntry entry = new InternalShortTermsFacet.ShortEntry(value, count);
ordered.add(entry);
}
}
}
for (ReaderAggregator aggregator : aggregators) {
CacheRecycler.pushIntArray(aggregator.counts);
}
return new InternalShortTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}
public static class ReaderAggregator implements FieldData.OrdinalInDocProc {
final short[] values;
final int[] counts;
int position = 0;
short current;
int total;
public ReaderAggregator(ShortFieldData fieldData) {
this.values = fieldData.values();
this.counts = CacheRecycler.popIntArray(fieldData.values().length);
}
@Override
public void onOrdinal(int docId, int ordinal) {
counts[ordinal]++;
total++;
}
public boolean nextPosition() {
if (++position >= values.length) {
return false;
}
current = values[position];
return true;
}
}
public static class AggregatorPriorityQueue extends PriorityQueue<ReaderAggregator> {
public AggregatorPriorityQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) {
return a.current < b.current;
}
}
}

View File

@ -28,10 +28,10 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.HashedBytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
@ -41,7 +41,6 @@ import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -50,72 +49,59 @@ import java.util.regex.Pattern;
*/
public class FieldsTermsStringFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String[] indexFieldsNames;
private final InternalStringTermsFacet.ComparatorType comparatorType;
private final int size;
private final int numberOfShards;
private final FieldDataType[] fieldsDataType;
private final IndexFieldData[] indexFieldDatas;
private HashedBytesValues[] values;
private FieldData[] fieldsData;
private final StaticAggregatorValueProc aggregator;
private final StaticAggregatorValueProc[] aggregators;
private final SearchScript script;
public FieldsTermsStringFacetCollector(String facetName, String[] fieldsNames, int size, InternalStringTermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, Pattern pattern, String scriptLang, String script, Map<String, Object> params) {
ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
this.script = script;
fieldsDataType = new FieldDataType[fieldsNames.length];
fieldsData = new FieldData[fieldsNames.length];
indexFieldsNames = new String[fieldsNames.length];
indexFieldDatas = new IndexFieldData[fieldsNames.length];
values = new HashedBytesValues[fieldsNames.length];
aggregators = new StaticAggregatorValueProc[fieldsNames.length];
TObjectIntHashMap<HashedBytesRef> map = CacheRecycler.popObjectIntMap();
for (int i = 0; i < fieldsNames.length; i++) {
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldsNames[i]);
if (smartMappers == null || !smartMappers.hasMapper()) {
this.indexFieldsNames[i] = fieldsNames[i];
this.fieldsDataType[i] = FieldDataType.DefaultTypes.STRING;
FieldMapper mapper = context.smartNameFieldMapper(fieldsNames[i]);
if (mapper == null) {
throw new FacetPhaseExecutionException(facetName, "failed to find mapping for [" + fieldsNames[i] + "]");
}
indexFieldDatas[i] = context.fieldData().getForField(mapper);
if (excluded.isEmpty() && pattern == null && this.script == null) {
aggregators[i] = new StaticAggregatorValueProc(map);
} else {
this.indexFieldsNames[i] = smartMappers.mapper().names().indexName();
this.fieldsDataType[i] = smartMappers.mapper().fieldDataType();
}
}
if (script != null) {
this.script = context.scriptService().search(context.lookup(), scriptLang, script, params);
} else {
this.script = null;
}
if (excluded.isEmpty() && pattern == null && this.script == null) {
aggregator = new StaticAggregatorValueProc(CacheRecycler.<BytesRef>popObjectIntMap());
} else {
aggregator = new AggregatorValueProc(CacheRecycler.<BytesRef>popObjectIntMap(), excluded, pattern, this.script);
}
if (allTerms) {
try {
for (int i = 0; i < fieldsNames.length; i++) {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
FieldData fieldData = fieldDataCache.cache(fieldsDataType[i], readerContext.reader(), indexFieldsNames[i]);
fieldData.forEachValue(aggregator);
}
}
} catch (Exception e) {
throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
aggregators[i] = new AggregatorValueProc(map, excluded, pattern, this.script);
}
}
// TODO: we need to support this flag with the new field data...
// if (allTerms) {
// try {
// for (int i = 0; i < fieldsNames.length; i++) {
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
// FieldData fieldData = fieldDataCache.cache(fieldsDataType[i], readerContext.reader(), indexFieldsNames[i]);
// fieldData.forEachValue(aggregator);
// }
// }
// } catch (Exception e) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
// }
// }
}
@Override
@ -127,8 +113,9 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
for (int i = 0; i < indexFieldsNames.length; i++) {
fieldsData[i] = fieldDataCache.cache(fieldsDataType[i], context.reader(), indexFieldsNames[i]);
for (int i = 0; i < indexFieldDatas.length; i++) {
values[i] = indexFieldDatas[i].load(context).getHashedBytesValues();
aggregators[i].values = values[i];
}
if (script != null) {
script.setNextReader(context);
@ -137,38 +124,44 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector {
@Override
protected void doCollect(int doc) throws IOException {
for (FieldData fieldData : fieldsData) {
fieldData.forEachValueInDoc(doc, aggregator);
for (int i = 0; i < values.length; i++) {
values[i].forEachValueInDoc(doc, aggregators[i]);
}
}
@Override
public Facet facet() {
TObjectIntHashMap<BytesRef> facets = aggregator.facets();
TObjectIntHashMap<HashedBytesRef> facets = aggregators[0].facets(); // we share the map between all aggregators
long totalMissing = 0;
long total = 0;
for (StaticAggregatorValueProc aggregator : aggregators) {
totalMissing += aggregator.missing();
total += aggregator.total();
}
if (facets.isEmpty()) {
CacheRecycler.pushObjectIntMap(facets);
return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalStringTermsFacet.TermEntry>of(), aggregator.missing(), aggregator.total());
return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalStringTermsFacet.TermEntry>of(), totalMissing, total);
} else {
if (size < EntryPriorityQueue.LIMIT) {
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
for (TObjectIntIterator<BytesRef> it = facets.iterator(); it.hasNext(); ) {
for (TObjectIntIterator<HashedBytesRef> it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.insertWithOverflow(new InternalStringTermsFacet.TermEntry(it.key(), it.value()));
ordered.insertWithOverflow(new InternalStringTermsFacet.TermEntry(it.key().bytes, it.value()));
}
InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = ((InternalStringTermsFacet.TermEntry) ordered.pop());
}
CacheRecycler.pushObjectIntMap(facets);
return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total());
return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), totalMissing, total);
} else {
BoundedTreeSet<InternalStringTermsFacet.TermEntry> ordered = new BoundedTreeSet<InternalStringTermsFacet.TermEntry>(comparatorType.comparator(), size);
for (TObjectIntIterator<BytesRef> it = facets.iterator(); it.hasNext(); ) {
for (TObjectIntIterator<HashedBytesRef> it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new InternalStringTermsFacet.TermEntry(it.key(), it.value()));
ordered.add(new InternalStringTermsFacet.TermEntry(it.key().bytes, it.value()));
}
CacheRecycler.pushObjectIntMap(facets);
return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total());
return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, totalMissing, total);
}
}
}
@ -181,7 +174,7 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector {
private final SearchScript script;
public AggregatorValueProc(TObjectIntHashMap<BytesRef> facets, ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) {
public AggregatorValueProc(TObjectIntHashMap<HashedBytesRef> facets, ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) {
super(facets);
this.excluded = excluded;
this.matcher = pattern != null ? pattern.matcher("") : null;
@ -189,19 +182,19 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector {
}
@Override
public void onValue(int docId, BytesRef value) {
if (excluded != null && excluded.contains(value)) {
public void onValue(int docId, HashedBytesRef value) {
if (excluded != null && excluded.contains(value.bytes)) {
return;
}
// LUCENE 4 UPGRADE: use Lucene's RegexCapabilities
if (matcher != null && !matcher.reset(value.utf8ToString()).matches()) {
if (matcher != null && !matcher.reset(value.bytes.utf8ToString()).matches()) {
return;
}
if (script != null) {
script.setNextDocId(docId);
// LUCENE 4 UPGRADE: needs optimization
script.setNextVar("term", value.utf8ToString());
script.setNextVar("term", value.bytes.utf8ToString());
Object scriptValue = script.run();
if (scriptValue == null) {
return;
@ -212,33 +205,29 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector {
}
} else {
// LUCENE 4 UPGRADE: make script return BR?
value = new BytesRef(scriptValue.toString());
value = new HashedBytesRef(scriptValue.toString());
}
}
super.onValue(docId, value);
}
}
public static class StaticAggregatorValueProc implements FieldData.StringValueInDocProc, FieldData.StringValueProc {
public static class StaticAggregatorValueProc implements HashedBytesValues.ValueInDocProc {
// LUCENE 4 UPGRADE: check if hashcode is not too expensive
private final TObjectIntHashMap<BytesRef> facets;
private final TObjectIntHashMap<HashedBytesRef> facets;
HashedBytesValues values;
private int missing;
private int total;
public StaticAggregatorValueProc(TObjectIntHashMap<BytesRef> facets) {
public StaticAggregatorValueProc(TObjectIntHashMap<HashedBytesRef> facets) {
this.facets = facets;
}
@Override
public void onValue(BytesRef value) {
facets.putIfAbsent(value, 0);
}
@Override
public void onValue(int docId, BytesRef value) {
facets.adjustOrPutValue(value, 1, 1);
public void onValue(int docId, HashedBytesRef value) {
facets.adjustOrPutValue(values.makeSafe(value), 1, 1);
total++;
}
@ -247,7 +236,7 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector {
missing++;
}
public final TObjectIntHashMap<BytesRef> facets() {
public final TObjectIntHashMap<HashedBytesRef> facets() {
return facets;
}

View File

@ -28,24 +28,18 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.util.concurrent.ThreadLocals;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.HashedBytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Deque;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -54,17 +48,7 @@ import java.util.regex.Pattern;
*/
public class TermsStringFacetCollector extends AbstractFacetCollector {
static ThreadLocal<ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>>> cache = new ThreadLocal<ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>>>() {
@Override
protected ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>> initialValue() {
return new ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<java.lang.String>>>(new ArrayDeque<TObjectIntHashMap<String>>());
}
};
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final IndexFieldData indexFieldData;
private final TermsFacet.ComparatorType comparatorType;
@ -72,57 +56,37 @@ public class TermsStringFacetCollector extends AbstractFacetCollector {
private final int numberOfShards;
private final FieldDataType fieldDataType;
private FieldData fieldData;
private HashedBytesValues values;
private final StaticAggregatorValueProc aggregator;
private final SearchScript script;
public TermsStringFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, Pattern pattern, String scriptLang, String script, Map<String, Object> params) {
public TermsStringFacetCollector(String facetName, IndexFieldData indexFieldData, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.indexFieldData = indexFieldData;
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
this.indexFieldName = fieldName;
this.fieldDataType = FieldDataType.DefaultTypes.STRING;
} else {
// add type filter if there is exact doc mapper associated with it
if (smartMappers.hasDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
}
if (script != null) {
this.script = context.scriptService().search(context.lookup(), scriptLang, script, params);
} else {
this.script = null;
}
this.script = script;
if (excluded.isEmpty() && pattern == null && this.script == null) {
aggregator = new StaticAggregatorValueProc(CacheRecycler.<BytesRef>popObjectIntMap());
aggregator = new StaticAggregatorValueProc(CacheRecycler.<HashedBytesRef>popObjectIntMap());
} else {
aggregator = new AggregatorValueProc(CacheRecycler.<BytesRef>popObjectIntMap(), excluded, pattern, this.script);
aggregator = new AggregatorValueProc(CacheRecycler.<HashedBytesRef>popObjectIntMap(), excluded, pattern, this.script);
}
if (allTerms) {
try {
for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
FieldData fieldData = fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
fieldData.forEachValue(aggregator);
}
} catch (Exception e) {
throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
}
// TODO: we need to support this back with the new field data!
// try {
// for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) {
// FieldData fieldData = fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName);
// fieldData.forEachValue(aggregator);
// }
// } catch (Exception e) {
// throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e);
// }
}
}
@ -135,7 +99,8 @@ public class TermsStringFacetCollector extends AbstractFacetCollector {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fieldData = fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
values = indexFieldData.load(context).getHashedBytesValues();
aggregator.values = values;
if (script != null) {
script.setNextReader(context);
}
@ -143,21 +108,21 @@ public class TermsStringFacetCollector extends AbstractFacetCollector {
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachValueInDoc(doc, aggregator);
values.forEachValueInDoc(doc, aggregator);
}
@Override
public Facet facet() {
TObjectIntHashMap<BytesRef> facets = aggregator.facets();
TObjectIntHashMap<HashedBytesRef> facets = aggregator.facets();
if (facets.isEmpty()) {
CacheRecycler.pushObjectIntMap(facets);
return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalStringTermsFacet.TermEntry>of(), aggregator.missing(), aggregator.total());
} else {
if (size < EntryPriorityQueue.LIMIT) {
EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());
for (TObjectIntIterator<BytesRef> it = facets.iterator(); it.hasNext(); ) {
for (TObjectIntIterator<HashedBytesRef> it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.insertWithOverflow(new InternalStringTermsFacet.TermEntry(it.key(), it.value()));
ordered.insertWithOverflow(new InternalStringTermsFacet.TermEntry(it.key().bytes, it.value()));
}
InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
@ -167,9 +132,9 @@ public class TermsStringFacetCollector extends AbstractFacetCollector {
return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total());
} else {
BoundedTreeSet<InternalStringTermsFacet.TermEntry> ordered = new BoundedTreeSet<InternalStringTermsFacet.TermEntry>(comparatorType.comparator(), size);
for (TObjectIntIterator<BytesRef> it = facets.iterator(); it.hasNext(); ) {
for (TObjectIntIterator<HashedBytesRef> it = facets.iterator(); it.hasNext(); ) {
it.advance();
ordered.add(new InternalStringTermsFacet.TermEntry(it.key(), it.value()));
ordered.add(new InternalStringTermsFacet.TermEntry(it.key().bytes, it.value()));
}
CacheRecycler.pushObjectIntMap(facets);
return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total());
@ -185,7 +150,7 @@ public class TermsStringFacetCollector extends AbstractFacetCollector {
private final SearchScript script;
public AggregatorValueProc(TObjectIntHashMap<BytesRef> facets, ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) {
public AggregatorValueProc(TObjectIntHashMap<HashedBytesRef> facets, ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) {
super(facets);
this.excluded = excluded;
this.matcher = pattern != null ? pattern.matcher("") : null;
@ -193,18 +158,18 @@ public class TermsStringFacetCollector extends AbstractFacetCollector {
}
@Override
public void onValue(int docId, BytesRef value) {
if (excluded != null && excluded.contains(value)) {
public void onValue(int docId, HashedBytesRef value) {
if (excluded != null && excluded.contains(value.bytes)) {
return;
}
// LUCENE 4 UPGRADE: use Lucene's RegexCapabilities
if (matcher != null && !matcher.reset(value.utf8ToString()).matches()) {
if (matcher != null && !matcher.reset(value.bytes.utf8ToString()).matches()) {
return;
}
if (script != null) {
script.setNextDocId(docId);
// LUCENE 4 UPGRADE: needs optimization
script.setNextVar("term", value.utf8ToString());
script.setNextVar("term", value.bytes.utf8ToString());
Object scriptValue = script.run();
if (scriptValue == null) {
return;
@ -215,33 +180,30 @@ public class TermsStringFacetCollector extends AbstractFacetCollector {
}
} else {
// LUCENE 4 UPGRADE: should be possible to convert directly to BR
value = new BytesRef(scriptValue.toString());
value = new HashedBytesRef(scriptValue.toString());
}
}
super.onValue(docId, value);
}
}
public static class StaticAggregatorValueProc implements FieldData.StringValueInDocProc, FieldData.StringValueProc {
public static class StaticAggregatorValueProc implements HashedBytesValues.ValueInDocProc {
// LUCENE 4 UPGRADE: check if hashcode is not too expensive
private final TObjectIntHashMap<BytesRef> facets;
private final TObjectIntHashMap<HashedBytesRef> facets;
HashedBytesValues values;
private int missing = 0;
private int total = 0;
public StaticAggregatorValueProc(TObjectIntHashMap<BytesRef> facets) {
public StaticAggregatorValueProc(TObjectIntHashMap<HashedBytesRef> facets) {
this.facets = facets;
}
@Override
public void onValue(BytesRef value) {
facets.putIfAbsent(value, 0);
}
@Override
public void onValue(int docId, BytesRef value) {
facets.adjustOrPutValue(value, 1, 1);
public void onValue(int docId, HashedBytesRef value) {
// we have to "makeSafe", even if it exists, since it might not..., need to find a way to optimize it
facets.adjustOrPutValue(values.makeSafe(value), 1, 1);
total++;
}
@ -250,7 +212,7 @@ public class TermsStringFacetCollector extends AbstractFacetCollector {
missing++;
}
public final TObjectIntHashMap<BytesRef> facets() {
public final TObjectIntHashMap<HashedBytesRef> facets() {
return facets;
}

View File

@ -23,14 +23,11 @@ import com.google.common.collect.ImmutableSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldData;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.strings.StringFieldData;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.fielddata.IndexOrdinalFieldData;
import org.elasticsearch.index.fielddata.OrdinalsBytesValues;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.terms.TermsFacet;
@ -49,9 +46,7 @@ import java.util.regex.Pattern;
*/
public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector {
private final FieldDataCache fieldDataCache;
private final String indexFieldName;
private final IndexOrdinalFieldData indexFieldData;
private final TermsFacet.ComparatorType comparatorType;
@ -61,9 +56,7 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector {
private final int minCount;
private final FieldDataType fieldDataType;
private StringFieldData fieldData;
private OrdinalsBytesValues values;
private final List<ReaderAggregator> aggregators;
@ -76,30 +69,14 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector {
private final Matcher matcher;
public TermsStringOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
public TermsStringOrdinalsFacetCollector(String facetName, IndexOrdinalFieldData indexFieldData, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
ImmutableSet<BytesRef> excluded, Pattern pattern) {
super(facetName);
this.fieldDataCache = context.fieldDataCache();
this.indexFieldData = indexFieldData;
this.size = size;
this.comparatorType = comparatorType;
this.numberOfShards = context.numberOfShards();
MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName);
if (smartMappers == null || !smartMappers.hasMapper()) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms long facet collector on it");
}
// add type filter if there is exact doc mapper associated with it
if (smartMappers.explicitTypeInNameWithDocMapper()) {
setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter()));
}
if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.STRING) {
throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of string type, can't run terms string facet collector on it");
}
this.indexFieldName = smartMappers.mapper().names().indexName();
this.fieldDataType = smartMappers.mapper().fieldDataType();
if (excluded == null || excluded.isEmpty()) {
this.excluded = null;
} else {
@ -122,17 +99,17 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
if (current.values.length > 1) {
if (current.values.ordinals().getNumOrds() > 1) {
aggregators.add(current);
}
}
fieldData = (StringFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName);
current = new ReaderAggregator(fieldData);
values = indexFieldData.load(context).getBytesValues();
current = new ReaderAggregator(values);
}
@Override
protected void doCollect(int doc) throws IOException {
fieldData.forEachOrdinalInDoc(doc, current);
values.ordinals().forEachOrdinalInDoc(doc, current);
}
@Override
@ -141,7 +118,7 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector {
missing += current.counts[0];
total += current.total - current.counts[0];
// if we have values for this one, add it
if (current.values.length > 1) {
if (current.values.ordinals().getNumOrds() > 1) {
aggregators.add(current);
}
}
@ -161,7 +138,7 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector {
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
BytesRef value = agg.current;
BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we get around this?)
int count = 0;
do {
count += agg.counts[agg.position];
@ -202,7 +179,7 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector {
while (queue.size() > 0) {
ReaderAggregator agg = queue.top();
BytesRef value = agg.current;
BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we work around that?)
int count = 0;
do {
count += agg.counts[agg.position];
@ -236,18 +213,18 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector {
return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}
public static class ReaderAggregator implements FieldData.OrdinalInDocProc {
public static class ReaderAggregator implements Ordinals.Docs.OrdinalInDocProc {
final BytesRef[] values;
final OrdinalsBytesValues values;
final int[] counts;
int position = 0;
BytesRef current;
int total;
public ReaderAggregator(StringFieldData fieldData) {
this.values = fieldData.values();
this.counts = CacheRecycler.popIntArray(fieldData.values().length);
public ReaderAggregator(OrdinalsBytesValues values) {
this.values = values;
this.counts = CacheRecycler.popIntArray(values.ordinals().getNumOrds());
}
@Override
@ -257,10 +234,10 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector {
}
public boolean nextPosition() {
if (++position >= values.length) {
if (++position >= values.ordinals().getNumOrds()) {
return false;
}
current = values[position];
current = values.getValueByOrd(position);
return true;
}
}

View File

@ -34,11 +34,8 @@ import org.elasticsearch.search.facet.query.QueryFacet;
import org.elasticsearch.search.facet.range.RangeFacet;
import org.elasticsearch.search.facet.statistical.StatisticalFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.bytes.InternalByteTermsFacet;
import org.elasticsearch.search.facet.terms.doubles.InternalDoubleTermsFacet;
import org.elasticsearch.search.facet.terms.ints.InternalIntTermsFacet;
import org.elasticsearch.search.facet.terms.longs.InternalLongTermsFacet;
import org.elasticsearch.search.facet.terms.shorts.InternalShortTermsFacet;
import org.elasticsearch.search.facet.termsstats.TermsStatsFacet;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.joda.time.DateTimeZone;
@ -535,7 +532,6 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.execute().actionGet();
facet = searchResponse.facets().facet("facet1");
assertThat(facet, instanceOf(InternalByteTermsFacet.class));
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(1));
assertThat(facet.entries().get(0).term().string(), equalTo("111"));
@ -547,7 +543,6 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.execute().actionGet();
facet = searchResponse.facets().facet("facet1");
assertThat(facet, instanceOf(InternalIntTermsFacet.class));
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(1));
assertThat(facet.entries().get(0).term().string(), equalTo("111"));
@ -559,7 +554,6 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.execute().actionGet();
facet = searchResponse.facets().facet("facet1");
assertThat(facet, instanceOf(InternalShortTermsFacet.class));
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(1));
assertThat(facet.entries().get(0).term().string(), equalTo("111"));
@ -729,20 +723,21 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.entries().get(3).term().string(), anyOf(equalTo("zzz"), equalTo("xxx")));
assertThat(facet.entries().get(3).count(), equalTo(1));
searchResponse = client.prepareSearch()
.setQuery(termQuery("xxx", "yyy")) // don't match anything
.addFacet(termsFacet("facet1").field("tag").size(10).allTerms(true).executionHint(executionHint))
.execute().actionGet();
facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(3));
assertThat(facet.entries().get(0).term().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
assertThat(facet.entries().get(0).count(), equalTo(0));
assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
assertThat(facet.entries().get(1).count(), equalTo(0));
assertThat(facet.entries().get(2).term().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
assertThat(facet.entries().get(2).count(), equalTo(0));
// TODO: support allTerms with the new field data
// searchResponse = client.prepareSearch()
// .setQuery(termQuery("xxx", "yyy")) // don't match anything
// .addFacet(termsFacet("facet1").field("tag").size(10).allTerms(true).executionHint(executionHint))
// .execute().actionGet();
//
// facet = searchResponse.facets().facet("facet1");
// assertThat(facet.name(), equalTo("facet1"));
// assertThat(facet.entries().size(), equalTo(3));
// assertThat(facet.entries().get(0).term().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
// assertThat(facet.entries().get(0).count(), equalTo(0));
// assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
// assertThat(facet.entries().get(1).count(), equalTo(0));
// assertThat(facet.entries().get(2).term().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz")));
// assertThat(facet.entries().get(2).count(), equalTo(0));
// Script Field