From e4cbdfa05bbe87f636d76aa7539e2ab3f0ec39b1 Mon Sep 17 00:00:00 2001 From: kimchy Date: Fri, 1 Apr 2011 16:30:45 +0300 Subject: [PATCH] Terms Facet: Performance improvements, closes #822. --- .idea/dictionaries/kimchy.xml | 3 + .../facet/TermsFacetSearchBenchmark.java | 121 +++++++- .../client/transport/TransportClient.java | 2 + .../elasticsearch/common/CacheRecycler.java | 291 ++++++++++++++++++ .../index/field/data/FieldData.java | 6 + .../index/field/data/bytes/ByteFieldData.java | 4 + .../data/bytes/MultiValueByteFieldData.java | 6 + .../data/bytes/SingleValueByteFieldData.java | 4 + .../field/data/doubles/DoubleFieldData.java | 4 + .../doubles/MultiValueDoubleFieldData.java | 6 + .../doubles/SingleValueDoubleFieldData.java | 4 + .../field/data/floats/FloatFieldData.java | 4 + .../data/floats/MultiValueFloatFieldData.java | 6 + .../floats/SingleValueFloatFieldData.java | 4 + .../index/field/data/ints/IntFieldData.java | 4 + .../data/ints/MultiValueIntFieldData.java | 6 + .../data/ints/SingleValueIntFieldData.java | 4 + .../index/field/data/longs/LongFieldData.java | 4 + .../data/longs/MultiValueLongFieldData.java | 6 + .../data/longs/SingleValueLongFieldData.java | 4 + .../data/shorts/MultiValueShortFieldData.java | 6 + .../field/data/shorts/ShortFieldData.java | 4 + .../shorts/SingleValueShortFieldData.java | 4 + .../strings/MultiValueStringFieldData.java | 6 + .../strings/SingleValueStringFieldData.java | 4 +- .../field/data/strings/StringFieldData.java | 4 + .../geo/MultiValueGeoPointFieldData.java | 6 + .../geo/SingleValueGeoPointFieldData.java | 4 + .../node/internal/InternalNode.java | 2 + .../search/facet/terms/TermsFacetBuilder.java | 13 + .../facet/terms/TermsFacetProcessor.java | 57 +++- .../terms/bytes/InternalByteTermsFacet.java | 14 +- .../terms/bytes/TermsByteFacetCollector.java | 63 ++-- .../TermsByteOrdinalsFacetCollector.java | 248 +++++++++++++++ .../doubles/InternalDoubleTermsFacet.java | 14 +- .../doubles/TermsDoubleFacetCollector.java | 63 ++-- .../TermsDoubleOrdinalsFacetCollector.java | 248 +++++++++++++++ .../terms/floats/InternalFloatTermsFacet.java | 14 +- .../floats/TermsFloatFacetCollector.java | 63 ++-- .../TermsFloatOrdinalsFacetCollector.java | 248 +++++++++++++++ .../terms/ints/InternalIntTermsFacet.java | 14 +- .../terms/ints/TermsIntFacetCollector.java | 63 ++-- .../ints/TermsIntOrdinalsFacetCollector.java | 248 +++++++++++++++ .../facet/terms/ip/InternalIpTermsFacet.java | 14 +- .../facet/terms/ip/TermsIpFacetCollector.java | 64 ++-- .../ip/TermsIpOrdinalsFacetCollector.java | 248 +++++++++++++++ .../terms/longs/InternalLongTermsFacet.java | 15 +- .../terms/longs/TermsLongFacetCollector.java | 58 ++-- .../TermsLongOrdinalsFacetCollector.java | 248 +++++++++++++++ .../terms/shorts/InternalShortTermsFacet.java | 14 +- .../shorts/TermsShortFacetCollector.java | 63 ++-- .../TermsShortOrdinalsFacetCollector.java | 248 +++++++++++++++ .../FieldsTermsStringFacetCollector.java | 36 ++- .../ScriptTermsStringFieldFacetCollector.java | 34 +- .../strings/TermsStringFacetCollector.java | 54 ++-- .../TermsStringOrdinalsFacetCollector.java | 244 +++++++++++++++ .../terms/support/EntryPriorityQueue.java | 41 +++ .../search/facet/SimpleFacetsTests.java | 28 +- 58 files changed, 2906 insertions(+), 406 deletions(-) create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/common/CacheRecycler.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteOrdinalsFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleOrdinalsFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatOrdinalsFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntOrdinalsFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpOrdinalsFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongOrdinalsFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortOrdinalsFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/support/EntryPriorityQueue.java diff --git a/.idea/dictionaries/kimchy.xml b/.idea/dictionaries/kimchy.xml index c6d798d6b1c..09db370775e 100644 --- a/.idea/dictionaries/kimchy.xml +++ b/.idea/dictionaries/kimchy.xml @@ -2,6 +2,7 @@ addr + aggregators amazonaws apis appendable @@ -121,6 +122,7 @@ rackspace rebalance rebalancing + recycler regex reparse reparsed @@ -133,6 +135,7 @@ scriptable searchable segs + sentinal serializers sigar slurper diff --git a/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/TermsFacetSearchBenchmark.java b/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/TermsFacetSearchBenchmark.java index e49c8218284..3c319f0c2f7 100644 --- a/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/TermsFacetSearchBenchmark.java +++ b/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/TermsFacetSearchBenchmark.java @@ -61,17 +61,17 @@ public class TermsFacetSearchBenchmark { Client client = clientNode.client(); - long COUNT = SizeValue.parseSizeValue("5m").singles(); + long COUNT = SizeValue.parseSizeValue("2m").singles(); int BATCH = 100; int QUERY_WARMUP = 20; int QUERY_COUNT = 200; int NUMBER_OF_TERMS = 200; - int NUMBER_OF_MULTI_VALUE_TERMS = 5; + int NUMBER_OF_MULTI_VALUE_TERMS = 10; int STRING_TERM_SIZE = 5; long[] lValues = new long[NUMBER_OF_TERMS]; for (int i = 0; i < NUMBER_OF_TERMS; i++) { - lValues[i] = i; + lValues[i] = ThreadLocalRandom.current().nextLong(); } String[] sValues = new String[NUMBER_OF_TERMS]; for (int i = 0; i < NUMBER_OF_TERMS; i++) { @@ -104,6 +104,12 @@ public class TermsFacetSearchBenchmark { } builder.endArray(); + builder.startArray("lm_value"); + for (int k = 0; k < NUMBER_OF_MULTI_VALUE_TERMS; k++) { + builder.value(lValues[ThreadLocalRandom.current().nextInt(sValues.length)]); + } + builder.endArray(); + builder.endObject(); request.add(Requests.indexRequest("test").type("type1").id(Integer.toString(counter)) @@ -127,12 +133,14 @@ public class TermsFacetSearchBenchmark { } } client.admin().indices().prepareRefresh().execute().actionGet(); - System.out.println("--> Number of docs in index: " + client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().count()); + COUNT = client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().count(); + System.out.println("--> Number of docs in index: " + COUNT); long totalQueryTime = 0; // S_VALUE + client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet(); System.out.println("--> Warmup (s_value) ..."); // run just the child query, warm up first @@ -163,6 +171,40 @@ public class TermsFacetSearchBenchmark { } System.out.println("--> Terms Facet (s_value) " + (totalQueryTime / QUERY_COUNT) + "ms"); + + // S_VALUE (Map) + client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet(); + + System.out.println("--> Warmup (s_value) ..."); + // run just the child query, warm up first + for (int j = 0; j < QUERY_WARMUP; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(termsFacet("s_value").field("s_value").executionHint("map")) + .execute().actionGet(); + if (j == 0) { + System.out.println("--> Loading (s_value) took: " + searchResponse.took()); + } + if (searchResponse.hits().totalHits() != COUNT) { + System.err.println("--> mismatch on hits"); + } + } + System.out.println("--> Warmup (s_value) DONE"); + + totalQueryTime = 0; + for (int j = 0; j < QUERY_COUNT; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(termsFacet("s_value").field("s_value").executionHint("map")) + .execute().actionGet(); + if (searchResponse.hits().totalHits() != COUNT) { + System.err.println("--> mismatch on hits"); + } + totalQueryTime += searchResponse.tookInMillis(); + } + System.out.println("--> Terms Facet (map) (s_value) " + (totalQueryTime / QUERY_COUNT) + "ms"); + + // L VALUE client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet(); System.out.println("--> Warmup (l_value) ..."); @@ -194,6 +236,8 @@ public class TermsFacetSearchBenchmark { } System.out.println("--> Terms Facet (l_value) " + (totalQueryTime / QUERY_COUNT) + "ms"); + // SM VALUE + client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet(); System.out.println("--> Warmup (sm_value) ..."); @@ -226,6 +270,75 @@ public class TermsFacetSearchBenchmark { } System.out.println("--> Terms Facet (sm_value) " + (totalQueryTime / QUERY_COUNT) + "ms"); + // SM VALUE (map) + + client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet(); + + System.out.println("--> Warmup (sm_value) ..."); + // run just the child query, warm up first + for (int j = 0; j < QUERY_WARMUP; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(termsFacet("sm_value").field("sm_value").executionHint("map")) + .execute().actionGet(); + if (j == 0) { + System.out.println("--> Loading (sm_value) took: " + searchResponse.took()); + } + if (searchResponse.hits().totalHits() != COUNT) { + System.err.println("--> mismatch on hits"); + } + } + System.out.println("--> Warmup (sm_value) DONE"); + + + totalQueryTime = 0; + for (int j = 0; j < QUERY_COUNT; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(termsFacet("sm_value").field("sm_value").executionHint("map")) + .execute().actionGet(); + if (searchResponse.hits().totalHits() != COUNT) { + System.err.println("--> mismatch on hits"); + } + totalQueryTime += searchResponse.tookInMillis(); + } + System.out.println("--> Terms Facet (map) (sm_value) " + (totalQueryTime / QUERY_COUNT) + "ms"); + + + // LM VALUE + + client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet(); + + System.out.println("--> Warmup (lm_value) ..."); + // run just the child query, warm up first + for (int j = 0; j < QUERY_WARMUP; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(termsFacet("lm_value").field("lm_value")) + .execute().actionGet(); + if (j == 0) { + System.out.println("--> Loading (lm_value) took: " + searchResponse.took()); + } + if (searchResponse.hits().totalHits() != COUNT) { + System.err.println("--> mismatch on hits"); + } + } + System.out.println("--> Warmup (lm_value) DONE"); + + + totalQueryTime = 0; + for (int j = 0; j < QUERY_COUNT; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(termsFacet("lm_value").field("lm_value")) + .execute().actionGet(); + if (searchResponse.hits().totalHits() != COUNT) { + System.err.println("--> mismatch on hits"); + } + totalQueryTime += searchResponse.tookInMillis(); + } + System.out.println("--> Terms Facet (lm_value) " + (totalQueryTime / QUERY_COUNT) + "ms"); + clientNode.close(); node1.close(); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/client/transport/TransportClient.java b/modules/elasticsearch/src/main/java/org/elasticsearch/client/transport/TransportClient.java index 6cb03bdc85f..39a6f772365 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/client/transport/TransportClient.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/client/transport/TransportClient.java @@ -46,6 +46,7 @@ import org.elasticsearch.client.transport.action.ClientTransportActionModule; import org.elasticsearch.client.transport.support.InternalTransportClient; import org.elasticsearch.cluster.ClusterNameModule; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.inject.Injector; @@ -229,6 +230,7 @@ public class TransportClient extends AbstractClient { // ignore } + CacheRecycler.clear(); ThreadLocals.clearReferencesThreadLocals(); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/common/CacheRecycler.java b/modules/elasticsearch/src/main/java/org/elasticsearch/common/CacheRecycler.java new file mode 100644 index 00000000000..f1cc58bd868 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/common/CacheRecycler.java @@ -0,0 +1,291 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common; + +import org.elasticsearch.common.trove.map.hash.*; + +import java.lang.ref.SoftReference; +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Deque; + +public class CacheRecycler { + + public static void clear() { + intIntHashMap.remove(); + floatIntHashMap.remove(); + doubleIntHashMap.remove(); + shortIntHashMap.remove(); + longIntHashMap.remove(); + objectIntHashMap.remove(); + intArray.remove(); + } + + // ----- TIntIntHashMap ---- + + private static ThreadLocal>> intIntHashMap = new ThreadLocal>>(); + + + public static TIntIntHashMap popIntIntMap() { + SoftReference> ref = intIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + intIntHashMap.set(new SoftReference>(deque)); + } + if (deque.isEmpty()) { + return new TIntIntHashMap(); + } + TIntIntHashMap map = deque.pollFirst(); + map.clear(); + return map; + } + + public static void pushIntIntMap(TIntIntHashMap map) { + SoftReference> ref = intIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + intIntHashMap.set(new SoftReference>(deque)); + } + deque.add(map); + } + + + // ----- TFloatIntHashMap --- + + private static ThreadLocal>> floatIntHashMap = new ThreadLocal>>(); + + + public static TFloatIntHashMap popFloatIntMap() { + SoftReference> ref = floatIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + floatIntHashMap.set(new SoftReference>(deque)); + } + if (deque.isEmpty()) { + return new TFloatIntHashMap(); + } + TFloatIntHashMap map = deque.pollFirst(); + map.clear(); + return map; + } + + public static void pushFloatIntMap(TFloatIntHashMap map) { + SoftReference> ref = floatIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + floatIntHashMap.set(new SoftReference>(deque)); + } + deque.add(map); + } + + + // ----- TDoubleIntHashMap --- + + private static ThreadLocal>> doubleIntHashMap = new ThreadLocal>>(); + + + public static TDoubleIntHashMap popDoubleIntMap() { + SoftReference> ref = doubleIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + doubleIntHashMap.set(new SoftReference>(deque)); + } + if (deque.isEmpty()) { + return new TDoubleIntHashMap(); + } + TDoubleIntHashMap map = deque.pollFirst(); + map.clear(); + return map; + } + + public static void pushDoubleIntMap(TDoubleIntHashMap map) { + SoftReference> ref = doubleIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + doubleIntHashMap.set(new SoftReference>(deque)); + } + deque.add(map); + } + + + // ----- TByteIntHashMap --- + + private static ThreadLocal>> byteIntHashMap = new ThreadLocal>>(); + + + public static TByteIntHashMap popByteIntMap() { + SoftReference> ref = byteIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + byteIntHashMap.set(new SoftReference>(deque)); + } + if (deque.isEmpty()) { + return new TByteIntHashMap(); + } + TByteIntHashMap map = deque.pollFirst(); + map.clear(); + return map; + } + + public static void pushByteIntMap(TByteIntHashMap map) { + SoftReference> ref = byteIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + byteIntHashMap.set(new SoftReference>(deque)); + } + deque.add(map); + } + + // ----- TShortIntHashMap --- + + private static ThreadLocal>> shortIntHashMap = new ThreadLocal>>(); + + + public static TShortIntHashMap popShortIntMap() { + SoftReference> ref = shortIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + shortIntHashMap.set(new SoftReference>(deque)); + } + if (deque.isEmpty()) { + return new TShortIntHashMap(); + } + TShortIntHashMap map = deque.pollFirst(); + map.clear(); + return map; + } + + public static void pushShortIntMap(TShortIntHashMap map) { + SoftReference> ref = shortIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + shortIntHashMap.set(new SoftReference>(deque)); + } + deque.add(map); + } + + + // ----- TLongIntHashMap ---- + + private static ThreadLocal>> longIntHashMap = new ThreadLocal>>(); + + + public static TLongIntHashMap popLongIntMap() { + SoftReference> ref = longIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + longIntHashMap.set(new SoftReference>(deque)); + } + if (deque.isEmpty()) { + return new TLongIntHashMap(); + } + TLongIntHashMap map = deque.pollFirst(); + map.clear(); + return map; + } + + public static void pushLongIntMap(TLongIntHashMap map) { + SoftReference> ref = longIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + longIntHashMap.set(new SoftReference>(deque)); + } + deque.add(map); + } + + // ------ TObjectIntHashMap ----- + + private static ThreadLocal>> objectIntHashMap = new ThreadLocal>>(); + + + @SuppressWarnings({"unchecked"}) + public static TObjectIntHashMap popObjectIntMap() { + SoftReference> ref = objectIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + objectIntHashMap.set(new SoftReference>(deque)); + } + if (deque.isEmpty()) { + return new TObjectIntHashMap(); + } + TObjectIntHashMap map = deque.pollFirst(); + map.clear(); + return map; + } + + public static void pushObjectIntMap(TObjectIntHashMap map) { + SoftReference> ref = objectIntHashMap.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + objectIntHashMap.set(new SoftReference>(deque)); + } + deque.add(map); + } + + // ----- int[] ----- + + private static ThreadLocal>> intArray = new ThreadLocal>>(); + + public static int[] popIntArray(int size) { + return popIntArray(size, 0); + } + + public static int[] popIntArray(int size, int sentinal) { + SoftReference> ref = intArray.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + intArray.set(new SoftReference>(deque)); + } + if (deque.isEmpty()) { + return new int[size]; + } + int[] ints = deque.pollFirst(); + if (ints.length < size) { + return new int[size]; + } + Arrays.fill(ints, sentinal); + return ints; + } + + public static void pushIntArray(int[] ints) { + SoftReference> ref = intArray.get(); + Deque deque = ref == null ? null : ref.get(); + if (deque == null) { + deque = new ArrayDeque(); + intArray.set(new SoftReference>(deque)); + } + deque.add(ints); + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/FieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/FieldData.java index 36fd7052202..082f38d6c95 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/FieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/FieldData.java @@ -95,6 +95,12 @@ public abstract class FieldData { void onMissing(int docId); } + public abstract void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc); + + public static interface OrdinalInDocProc { + void onOrdinal(int docId, int ordinal); + } + /** * The type of this field data. */ diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java index 3aba9bf5563..123ac5cb808 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java @@ -47,6 +47,10 @@ public abstract class ByteFieldData extends NumericFieldData { return 1 * values.length + RamUsage.NUM_BYTES_ARRAY_HEADER; } + public final byte[] values() { + return this.values; + } + abstract public byte value(int docId); abstract public byte[] values(int docId); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java index 6ba79915e46..f55455cbe57 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java @@ -122,6 +122,12 @@ public class MultiValueByteFieldData extends ByteFieldData { } } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + for (int[] ordinal : ordinals) { + proc.onOrdinal(docId, ordinal[docId]); + } + } + @Override public double[] doubleValues(int docId) { int length = 0; for (int[] ordinal : ordinals) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java index 303dc4b551d..63add03e666 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java @@ -88,6 +88,10 @@ public class SingleValueByteFieldData extends ByteFieldData { proc.onValue(docId, values[loc]); } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + proc.onOrdinal(docId, ordinals[docId]); + } + @Override public byte value(int docId) { return values[ordinals[docId]]; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldData.java index ad210df004d..c253e0175f1 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldData.java @@ -47,6 +47,10 @@ public abstract class DoubleFieldData extends NumericFieldData return RamUsage.NUM_BYTES_FLOAT * values.length + RamUsage.NUM_BYTES_ARRAY_HEADER; } + public final float[] values() { + return this.values; + } + abstract public float value(int docId); abstract public float[] values(int docId); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/floats/MultiValueFloatFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/floats/MultiValueFloatFieldData.java index aba388bed18..6fccb26ebc9 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/floats/MultiValueFloatFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/floats/MultiValueFloatFieldData.java @@ -122,6 +122,12 @@ public class MultiValueFloatFieldData extends FloatFieldData { } } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + for (int[] ordinal : ordinals) { + proc.onOrdinal(docId, ordinal[docId]); + } + } + @Override public double[] doubleValues(int docId) { int length = 0; for (int[] ordinal : ordinals) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/floats/SingleValueFloatFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/floats/SingleValueFloatFieldData.java index 03d8a75aa65..c0603da7618 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/floats/SingleValueFloatFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/floats/SingleValueFloatFieldData.java @@ -88,6 +88,10 @@ public class SingleValueFloatFieldData extends FloatFieldData { proc.onValue(docId, values[loc]); } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + proc.onOrdinal(docId, ordinals[docId]); + } + @Override public double[] doubleValues(int docId) { int loc = ordinals[docId]; if (loc == 0) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldData.java index be0ef7d80c0..eab29451f23 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldData.java @@ -47,6 +47,10 @@ public abstract class IntFieldData extends NumericFieldData { return RamUsage.NUM_BYTES_INT * values.length + RamUsage.NUM_BYTES_ARRAY_HEADER; } + public final int[] values() { + return this.values; + } + abstract public int value(int docId); abstract public int[] values(int docId); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/MultiValueIntFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/MultiValueIntFieldData.java index 5ca04dacbac..7ff800fee89 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/MultiValueIntFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/MultiValueIntFieldData.java @@ -122,6 +122,12 @@ public class MultiValueIntFieldData extends IntFieldData { } } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + for (int[] ordinal : ordinals) { + proc.onOrdinal(docId, ordinal[docId]); + } + } + @Override public double[] doubleValues(int docId) { int length = 0; for (int[] ordinal : ordinals) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/SingleValueIntFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/SingleValueIntFieldData.java index 8e97a2c1828..91980f6963a 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/SingleValueIntFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/ints/SingleValueIntFieldData.java @@ -88,6 +88,10 @@ public class SingleValueIntFieldData extends IntFieldData { proc.onValue(docId, values[loc]); } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + proc.onOrdinal(docId, ordinals[docId]); + } + @Override public double[] doubleValues(int docId) { int loc = ordinals[docId]; if (loc == 0) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldData.java index d73db5bbb6b..fe87d4fcf76 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldData.java @@ -57,6 +57,10 @@ public abstract class LongFieldData extends NumericFieldData { return RamUsage.NUM_BYTES_LONG * values.length + RamUsage.NUM_BYTES_ARRAY_HEADER; } + public final long[] values() { + return this.values; + } + abstract public long value(int docId); abstract public long[] values(int docId); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/MultiValueLongFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/MultiValueLongFieldData.java index 07b6431add3..778d6888dee 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/MultiValueLongFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/MultiValueLongFieldData.java @@ -124,6 +124,12 @@ public class MultiValueLongFieldData extends LongFieldData { } } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + for (int[] ordinal : ordinals) { + proc.onOrdinal(docId, ordinal[docId]); + } + } + @Override public void forEachValueInDoc(int docId, ValueInDocProc proc) { boolean found = false; for (int[] ordinal : ordinals) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/SingleValueLongFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/SingleValueLongFieldData.java index 72865ea9a16..187c4824407 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/SingleValueLongFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/longs/SingleValueLongFieldData.java @@ -89,6 +89,10 @@ public class SingleValueLongFieldData extends LongFieldData { proc.onValue(docId, values[loc]); } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + proc.onOrdinal(docId, ordinals[docId]); + } + @Override public void forEachValueInDoc(int docId, ValueInDocProc proc) { int loc = ordinals[docId]; if (loc == 0) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/MultiValueShortFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/MultiValueShortFieldData.java index 7d4e7caeb01..d535f4a3472 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/MultiValueShortFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/MultiValueShortFieldData.java @@ -122,6 +122,12 @@ public class MultiValueShortFieldData extends ShortFieldData { } } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + for (int[] ordinal : ordinals) { + proc.onOrdinal(docId, ordinal[docId]); + } + } + @Override public double[] doubleValues(int docId) { int length = 0; for (int[] ordinal : ordinals) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldData.java index 9932ca100cc..6c787308aa6 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldData.java @@ -47,6 +47,10 @@ public abstract class ShortFieldData extends NumericFieldData return RamUsage.NUM_BYTES_SHORT * values.length + RamUsage.NUM_BYTES_ARRAY_HEADER; } + public final short[] values() { + return this.values; + } + abstract public short value(int docId); abstract public short[] values(int docId); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/SingleValueShortFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/SingleValueShortFieldData.java index 51a76497609..6853fadcf8f 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/SingleValueShortFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/shorts/SingleValueShortFieldData.java @@ -88,6 +88,10 @@ public class SingleValueShortFieldData extends ShortFieldData { proc.onValue(docId, values[loc]); } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + proc.onOrdinal(docId, ordinals[docId]); + } + @Override public short value(int docId) { return values[ordinals[docId]]; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/MultiValueStringFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/MultiValueStringFieldData.java index d8a690d8314..589cf6bebba 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/MultiValueStringFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/MultiValueStringFieldData.java @@ -84,6 +84,12 @@ public class MultiValueStringFieldData extends StringFieldData { } } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + for (int[] ordinal : ordinals) { + proc.onOrdinal(docId, ordinal[docId]); + } + } + @Override public String value(int docId) { for (int[] ordinal : ordinals) { int loc = ordinal[docId]; diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/SingleValueStringFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/SingleValueStringFieldData.java index 771cb2b88f9..719043a8569 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/SingleValueStringFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/SingleValueStringFieldData.java @@ -51,8 +51,8 @@ public class SingleValueStringFieldData extends StringFieldData { return ordinals; } - String[] values() { - return this.values; + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + proc.onOrdinal(docId, ordinals[docId]); } @Override public boolean multiValued() { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldData.java index 9a091360be3..a8312313bcf 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldData.java @@ -50,6 +50,10 @@ public abstract class StringFieldData extends FieldData { return size; } + public String[] values() { + return this.values; + } + abstract public String value(int docId); abstract public String[] values(int docId); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/geo/MultiValueGeoPointFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/geo/MultiValueGeoPointFieldData.java index 3416c6addd8..73d69bdde4d 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/geo/MultiValueGeoPointFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/geo/MultiValueGeoPointFieldData.java @@ -108,6 +108,12 @@ public class MultiValueGeoPointFieldData extends GeoPointFieldData { } } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + for (int[] ordinal : ordinals) { + proc.onOrdinal(docId, ordinal[docId]); + } + } + @Override public GeoPoint value(int docId) { for (int[] ordinal : ordinals) { int loc = ordinal[docId]; diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/geo/SingleValueGeoPointFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/geo/SingleValueGeoPointFieldData.java index 40cd40043e6..a3e88655ab6 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/geo/SingleValueGeoPointFieldData.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/geo/SingleValueGeoPointFieldData.java @@ -80,6 +80,10 @@ public class SingleValueGeoPointFieldData extends GeoPointFieldData { proc.onValue(docId, GeoHashUtils.encode(lat[loc], lon[loc])); } + @Override public void forEachOrdinalInDoc(int docId, OrdinalInDocProc proc) { + proc.onOrdinal(docId, ordinals[docId]); + } + @Override public GeoPoint value(int docId) { int loc = ordinals[docId]; if (loc == 0) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/node/internal/InternalNode.java b/modules/elasticsearch/src/main/java/org/elasticsearch/node/internal/InternalNode.java index ddf8e87abaf..d043032dcd4 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/node/internal/InternalNode.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/node/internal/InternalNode.java @@ -30,6 +30,7 @@ import org.elasticsearch.cluster.ClusterModule; import org.elasticsearch.cluster.ClusterNameModule; import org.elasticsearch.cluster.ClusterService; import org.elasticsearch.cluster.routing.RoutingService; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.StopWatch; import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.component.Lifecycle; @@ -302,6 +303,7 @@ public final class InternalNode implements Node { } stopWatch.stop(); + CacheRecycler.clear(); ThreadLocals.clearReferencesThreadLocals(); if (logger.isTraceEnabled()) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetBuilder.java index d4c8000ea7c..1e01537a2da 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetBuilder.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetBuilder.java @@ -46,6 +46,7 @@ public class TermsFacetBuilder extends AbstractFacetBuilder { private String script; private String lang; private Map params; + String executionHint; /** * Construct a new term facet with the provided facet name. @@ -163,6 +164,14 @@ public class TermsFacetBuilder extends AbstractFacetBuilder { return this; } + /** + * An execution hint to how the facet is computed. + */ + public TermsFacetBuilder executionHint(String executionHint) { + this.executionHint = executionHint; + return this; + } + /** * A parameter that will be passed to the script. * @@ -233,6 +242,10 @@ public class TermsFacetBuilder extends AbstractFacetBuilder { } } + if (executionHint != null) { + builder.field("execution_hint", executionHint); + } + builder.endObject(); addFilterFacetAndGlobal(builder, params); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java index 5fc07dbd8d9..12cf4e7af70 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java @@ -33,16 +33,24 @@ import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetCollector; import org.elasticsearch.search.facet.FacetProcessor; import org.elasticsearch.search.facet.terms.bytes.TermsByteFacetCollector; +import org.elasticsearch.search.facet.terms.bytes.TermsByteOrdinalsFacetCollector; import org.elasticsearch.search.facet.terms.doubles.TermsDoubleFacetCollector; +import org.elasticsearch.search.facet.terms.doubles.TermsDoubleOrdinalsFacetCollector; import org.elasticsearch.search.facet.terms.floats.TermsFloatFacetCollector; +import org.elasticsearch.search.facet.terms.floats.TermsFloatOrdinalsFacetCollector; import org.elasticsearch.search.facet.terms.index.IndexNameFacetCollector; import org.elasticsearch.search.facet.terms.ints.TermsIntFacetCollector; +import org.elasticsearch.search.facet.terms.ints.TermsIntOrdinalsFacetCollector; import org.elasticsearch.search.facet.terms.ip.TermsIpFacetCollector; +import org.elasticsearch.search.facet.terms.ip.TermsIpOrdinalsFacetCollector; import org.elasticsearch.search.facet.terms.longs.TermsLongFacetCollector; +import org.elasticsearch.search.facet.terms.longs.TermsLongOrdinalsFacetCollector; import org.elasticsearch.search.facet.terms.shorts.TermsShortFacetCollector; +import org.elasticsearch.search.facet.terms.shorts.TermsShortOrdinalsFacetCollector; import org.elasticsearch.search.facet.terms.strings.FieldsTermsStringFacetCollector; import org.elasticsearch.search.facet.terms.strings.ScriptTermsStringFieldFacetCollector; import org.elasticsearch.search.facet.terms.strings.TermsStringFacetCollector; +import org.elasticsearch.search.facet.terms.strings.TermsStringOrdinalsFacetCollector; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -77,6 +85,7 @@ public class TermsFacetProcessor extends AbstractComponent implements FacetProce String script = null; Map params = null; boolean allTerms = false; + String executionHint = null; String currentFieldName = null; XContentParser.Token token; @@ -120,6 +129,8 @@ public class TermsFacetProcessor extends AbstractComponent implements FacetProce script = parser.text(); } else if ("lang".equals(currentFieldName)) { scriptLang = parser.text(); + } else if ("execution_hint".equals(currentFieldName) || "executionHint".equals(currentFieldName)) { + executionHint = parser.textOrNull(); } } } @@ -142,19 +153,51 @@ public class TermsFacetProcessor extends AbstractComponent implements FacetProce FieldMapper fieldMapper = context.mapperService().smartNameFieldMapper(field); if (fieldMapper != null) { if (fieldMapper instanceof IpFieldMapper) { - return new TermsIpFacetCollector(facetName, field, size, comparatorType, allTerms, context, scriptLang, script, params); + if (script != null || "map".equals(executionHint)) { + return new TermsIpFacetCollector(facetName, field, size, comparatorType, allTerms, context, scriptLang, script, params); + } else { + return new TermsIpOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, null); + } } else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.LONG) { - return new TermsLongFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + if (script != null || "map".equals(executionHint)) { + return new TermsLongFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + } else { + return new TermsLongOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded); + } } else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.DOUBLE) { - return new TermsDoubleFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + if (script != null) { + return new TermsDoubleFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + } else { + return new TermsDoubleOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded); + } } else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.INT) { - return new TermsIntFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + if (script != null || "map".equals(executionHint)) { + return new TermsIntFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + } else { + return new TermsIntOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded); + } } else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.FLOAT) { - return new TermsFloatFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + if (script != null || "map".equals(executionHint)) { + return new TermsFloatFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + } else { + return new TermsFloatOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded); + } } else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.SHORT) { - return new TermsShortFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + if (script != null || "map".equals(executionHint)) { + return new TermsShortFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + } else { + return new TermsShortOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded); + } } else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.BYTE) { - return new TermsByteFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + if (script != null || "map".equals(executionHint)) { + return new TermsByteFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, scriptLang, script, params); + } else { + return new TermsByteOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded); + } + } else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.STRING) { + if (script == null && pattern == null && !"map".equals(executionHint)) { + return new TermsStringOrdinalsFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded); + } } } return new TermsStringFacetCollector(facetName, field, size, comparatorType, allTerms, context, excluded, pattern, scriptLang, script, params); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java index 0380ba5531a..2a9f4af560c 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java @@ -19,11 +19,11 @@ package org.elasticsearch.search.facet.terms.bytes; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TByteIntIterator; import org.elasticsearch.common.trove.map.hash.TByteIntHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -167,19 +167,12 @@ public class InternalByteTermsFacet extends InternalTermsFacet { } - private static ThreadLocal> aggregateCache = new ThreadLocal>() { - @Override protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new TByteIntHashMap()); - } - }; - - @Override public Facet reduce(String name, List facets) { if (facets.size() == 1) { return facets.get(0); } InternalByteTermsFacet first = (InternalByteTermsFacet) facets.get(0); - TByteIntHashMap aggregated = aggregateCache.get().get(); + TByteIntHashMap aggregated = CacheRecycler.popByteIntMap(); aggregated.clear(); long missing = 0; @@ -198,6 +191,9 @@ public class InternalByteTermsFacet extends InternalTermsFacet { } first.entries = ordered; first.missing = missing; + + CacheRecycler.pushByteIntMap(aggregated); + return first; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java index f4f5b75834c..52793a58a00 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java @@ -22,10 +22,10 @@ package org.elasticsearch.search.facet.terms.bytes; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableSet; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TByteIntIterator; import org.elasticsearch.common.trove.map.hash.TByteIntHashMap; import org.elasticsearch.common.trove.set.hash.TByteHashSet; @@ -38,11 +38,11 @@ import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; +import java.util.Arrays; import java.util.Map; import java.util.Set; @@ -51,12 +51,6 @@ import java.util.Set; */ public class TermsByteFacetCollector extends AbstractFacetCollector { - static ThreadLocal>> cache = new ThreadLocal>>() { - @Override protected ThreadLocals.CleanableValue> initialValue() { - return new ThreadLocals.CleanableValue>(new ArrayDeque()); - } - }; - private final FieldDataCache fieldDataCache; private final String indexFieldName; @@ -107,9 +101,9 @@ public class TermsByteFacetCollector extends AbstractFacetCollector { } if (this.script == null && excluded.isEmpty()) { - aggregator = new StaticAggregatorValueProc(popFacets()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popByteIntMap()); } else { - aggregator = new AggregatorValueProc(popFacets(), excluded, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popByteIntMap(), excluded, this.script); } if (allTerms) { @@ -144,35 +138,30 @@ public class TermsByteFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { TByteIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { - pushFacets(facets); + CacheRecycler.pushByteIntMap(facets); return new InternalByteTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing()); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TByteIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalByteTermsFacet.ByteEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TByteIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalByteTermsFacet.ByteEntry(it.key(), it.value())); + } + InternalByteTermsFacet.ByteEntry[] list = new InternalByteTermsFacet.ByteEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalByteTermsFacet.ByteEntry) ordered.pop(); + } + CacheRecycler.pushByteIntMap(facets); + return new InternalByteTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TByteIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalByteTermsFacet.ByteEntry(it.key(), it.value())); + } + CacheRecycler.pushByteIntMap(facets); + return new InternalByteTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } - pushFacets(facets); - return new InternalByteTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); - } - } - - static TByteIntHashMap popFacets() { - Deque deque = cache.get().get(); - if (deque.isEmpty()) { - deque.add(new TByteIntHashMap()); - } - TByteIntHashMap facets = deque.pollFirst(); - facets.clear(); - return facets; - } - - static void pushFacets(TByteIntHashMap facets) { - facets.clear(); - Deque deque = cache.get().get(); - if (deque != null) { - deque.add(facets); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteOrdinalsFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteOrdinalsFacetCollector.java new file mode 100644 index 00000000000..4b7dadad49b --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteOrdinalsFacetCollector.java @@ -0,0 +1,248 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.bytes; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableSet; +import org.elasticsearch.common.trove.set.hash.TByteHashSet; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldData; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.bytes.ByteFieldData; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class TermsByteOrdinalsFacetCollector extends AbstractFacetCollector { + + private final FieldDataCache fieldDataCache; + + private final String indexFieldName; + + private final TermsFacet.ComparatorType comparatorType; + + private final int size; + + private final int numberOfShards; + + private final int minCount; + + private final FieldDataType fieldDataType; + + private ByteFieldData fieldData; + + private final List aggregators; + + private ReaderAggregator current; + + long missing; + + private final TByteHashSet excluded; + + public TermsByteOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, + ImmutableSet excluded) { + super(facetName); + this.fieldDataCache = context.fieldDataCache(); + this.size = size; + this.comparatorType = comparatorType; + this.numberOfShards = context.numberOfShards(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms byte facet collector on it"); + } else { + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.BYTE) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of byte type, can't run terms byte facet collector on it"); + } + + this.indexFieldName = smartMappers.mapper().names().indexName(); + this.fieldDataType = smartMappers.mapper().fieldDataType(); + } + + if (excluded == null || excluded.isEmpty()) { + this.excluded = null; + } else { + this.excluded = new TByteHashSet(excluded.size()); + for (String s : excluded) { + this.excluded.add(Byte.parseByte(s)); + } + } + + // minCount is offset by -1 + if (allTerms) { + minCount = -1; + } else { + minCount = 0; + } + + this.aggregators = new ArrayList(context.searcher().subReaders().length); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + if (current != null) { + missing += current.counts[0]; + if (current.values.length > 1) { + aggregators.add(current); + } + } + fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + current = new ReaderAggregator(fieldData); + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachOrdinalInDoc(doc, current); + } + + @Override public Facet facet() { + if (current != null) { + missing += current.counts[0]; + // if we have values for this one, add it + if (current.values.length > 1) { + aggregators.add(current); + } + } + + AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); + + for (ReaderAggregator aggregator : aggregators) { + CacheRecycler.pushIntArray(aggregator.counts); // release it here, anyhow we are on the same thread so won't be corrupted + if (aggregator.nextPosition()) { + queue.add(aggregator); + } + } + + // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes + if (size < EntryPriorityQueue.LIMIT) { + // optimize to use priority size + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + byte value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalByteTermsFacet.ByteEntry entry = new InternalByteTermsFacet.ByteEntry(value, count); + ordered.insertWithOverflow(entry); + } + } + } + InternalByteTermsFacet.ByteEntry[] list = new InternalByteTermsFacet.ByteEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalByteTermsFacet.ByteEntry) ordered.pop(); + } + return new InternalByteTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing); + } + + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + byte value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalByteTermsFacet.ByteEntry entry = new InternalByteTermsFacet.ByteEntry(value, count); + ordered.add(entry); + } + } + } + return new InternalByteTermsFacet(facetName, comparatorType, size, ordered, missing); + } + + public static class ReaderAggregator implements FieldData.OrdinalInDocProc { + + final byte[] values; + final int[] counts; + + int position = 0; + byte current; + + public ReaderAggregator(ByteFieldData fieldData) { + this.values = fieldData.values(); + this.counts = CacheRecycler.popIntArray(fieldData.values().length); + } + + @Override public void onOrdinal(int docId, int ordinal) { + counts[ordinal]++; + } + + public boolean nextPosition() { + if (++position >= values.length) { + return false; + } + current = values[position]; + return true; + } + } + + public static class AggregatorPriorityQueue extends PriorityQueue { + + public AggregatorPriorityQueue(int size) { + initialize(size); + } + + @Override protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) { + return a.current < b.current; + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java index 10832338fc9..119a57760d1 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java @@ -19,11 +19,11 @@ package org.elasticsearch.search.facet.terms.doubles; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TDoubleIntIterator; import org.elasticsearch.common.trove.map.hash.TDoubleIntHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -169,19 +169,12 @@ public class InternalDoubleTermsFacet extends InternalTermsFacet { return missingCount(); } - private static ThreadLocal> aggregateCache = new ThreadLocal>() { - @Override protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new TDoubleIntHashMap()); - } - }; - - @Override public Facet reduce(String name, List facets) { if (facets.size() == 1) { return facets.get(0); } InternalDoubleTermsFacet first = (InternalDoubleTermsFacet) facets.get(0); - TDoubleIntHashMap aggregated = aggregateCache.get().get(); + TDoubleIntHashMap aggregated = CacheRecycler.popDoubleIntMap(); aggregated.clear(); long missing = 0; for (Facet facet : facets) { @@ -199,6 +192,9 @@ public class InternalDoubleTermsFacet extends InternalTermsFacet { } first.entries = ordered; first.missing = missing; + + CacheRecycler.pushDoubleIntMap(aggregated); + return first; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java index db2a382c6d8..90bd4bc946d 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java @@ -22,10 +22,10 @@ package org.elasticsearch.search.facet.terms.doubles; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableSet; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TDoubleIntIterator; import org.elasticsearch.common.trove.map.hash.TDoubleIntHashMap; import org.elasticsearch.common.trove.set.hash.TDoubleHashSet; @@ -38,11 +38,11 @@ import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; +import java.util.Arrays; import java.util.Map; import java.util.Set; @@ -51,12 +51,6 @@ import java.util.Set; */ public class TermsDoubleFacetCollector extends AbstractFacetCollector { - static ThreadLocal>> cache = new ThreadLocal>>() { - @Override protected ThreadLocals.CleanableValue> initialValue() { - return new ThreadLocals.CleanableValue>(new ArrayDeque()); - } - }; - private final FieldDataCache fieldDataCache; private final String indexFieldName; @@ -107,9 +101,9 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector { } if (this.script == null && excluded.isEmpty()) { - aggregator = new StaticAggregatorValueProc(popFacets()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popDoubleIntMap()); } else { - aggregator = new AggregatorValueProc(popFacets(), excluded, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popDoubleIntMap(), excluded, this.script); } if (allTerms) { @@ -144,35 +138,30 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { TDoubleIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { - pushFacets(facets); + CacheRecycler.pushDoubleIntMap(facets); return new InternalDoubleTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing()); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TDoubleIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalDoubleTermsFacet.DoubleEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TDoubleIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalDoubleTermsFacet.DoubleEntry(it.key(), it.value())); + } + InternalDoubleTermsFacet.DoubleEntry[] list = new InternalDoubleTermsFacet.DoubleEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalDoubleTermsFacet.DoubleEntry) ordered.pop(); + } + CacheRecycler.pushDoubleIntMap(facets); + return new InternalDoubleTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TDoubleIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalDoubleTermsFacet.DoubleEntry(it.key(), it.value())); + } + CacheRecycler.pushDoubleIntMap(facets); + return new InternalDoubleTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } - pushFacets(facets); - return new InternalDoubleTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); - } - } - - static TDoubleIntHashMap popFacets() { - Deque deque = cache.get().get(); - if (deque.isEmpty()) { - deque.add(new TDoubleIntHashMap()); - } - TDoubleIntHashMap facets = deque.pollFirst(); - facets.clear(); - return facets; - } - - static void pushFacets(TDoubleIntHashMap facets) { - facets.clear(); - Deque deque = cache.get().get(); - if (deque != null) { - deque.add(facets); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleOrdinalsFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleOrdinalsFacetCollector.java new file mode 100644 index 00000000000..20a3c3d5ae9 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleOrdinalsFacetCollector.java @@ -0,0 +1,248 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.doubles; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableSet; +import org.elasticsearch.common.trove.set.hash.TDoubleHashSet; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldData; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.doubles.DoubleFieldData; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class TermsDoubleOrdinalsFacetCollector extends AbstractFacetCollector { + + private final FieldDataCache fieldDataCache; + + private final String indexFieldName; + + private final TermsFacet.ComparatorType comparatorType; + + private final int size; + + private final int numberOfShards; + + private final int minCount; + + private final FieldDataType fieldDataType; + + private DoubleFieldData fieldData; + + private final List aggregators; + + private ReaderAggregator current; + + long missing; + + private final TDoubleHashSet excluded; + + public TermsDoubleOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, + ImmutableSet excluded) { + super(facetName); + this.fieldDataCache = context.fieldDataCache(); + this.size = size; + this.comparatorType = comparatorType; + this.numberOfShards = context.numberOfShards(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms double facet collector on it"); + } else { + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.DOUBLE) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of double type, can't run terms double facet collector on it"); + } + + this.indexFieldName = smartMappers.mapper().names().indexName(); + this.fieldDataType = smartMappers.mapper().fieldDataType(); + } + + if (excluded == null || excluded.isEmpty()) { + this.excluded = null; + } else { + this.excluded = new TDoubleHashSet(excluded.size()); + for (String s : excluded) { + this.excluded.add(Double.parseDouble(s)); + } + } + + // minCount is offset by -1 + if (allTerms) { + minCount = -1; + } else { + minCount = 0; + } + + this.aggregators = new ArrayList(context.searcher().subReaders().length); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + if (current != null) { + missing += current.counts[0]; + if (current.values.length > 1) { + aggregators.add(current); + } + } + fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + current = new ReaderAggregator(fieldData); + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachOrdinalInDoc(doc, current); + } + + @Override public Facet facet() { + if (current != null) { + missing += current.counts[0]; + // if we have values for this one, add it + if (current.values.length > 1) { + aggregators.add(current); + } + } + + AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); + + for (ReaderAggregator aggregator : aggregators) { + CacheRecycler.pushIntArray(aggregator.counts); // release it here, anyhow we are on the same thread so won't be corrupted + if (aggregator.nextPosition()) { + queue.add(aggregator); + } + } + + // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes + if (size < EntryPriorityQueue.LIMIT) { + // optimize to use priority size + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + double value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalDoubleTermsFacet.DoubleEntry entry = new InternalDoubleTermsFacet.DoubleEntry(value, count); + ordered.insertWithOverflow(entry); + } + } + } + InternalDoubleTermsFacet.DoubleEntry[] list = new InternalDoubleTermsFacet.DoubleEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalDoubleTermsFacet.DoubleEntry) ordered.pop(); + } + return new InternalDoubleTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing); + } + + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + double value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalDoubleTermsFacet.DoubleEntry entry = new InternalDoubleTermsFacet.DoubleEntry(value, count); + ordered.add(entry); + } + } + } + return new InternalDoubleTermsFacet(facetName, comparatorType, size, ordered, missing); + } + + public static class ReaderAggregator implements FieldData.OrdinalInDocProc { + + final double[] values; + final int[] counts; + + int position = 0; + double current; + + public ReaderAggregator(DoubleFieldData fieldData) { + this.values = fieldData.values(); + this.counts = CacheRecycler.popIntArray(fieldData.values().length); + } + + @Override public void onOrdinal(int docId, int ordinal) { + counts[ordinal]++; + } + + public boolean nextPosition() { + if (++position >= values.length) { + return false; + } + current = values[position]; + return true; + } + } + + public static class AggregatorPriorityQueue extends PriorityQueue { + + public AggregatorPriorityQueue(int size) { + initialize(size); + } + + @Override protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) { + return a.current < b.current; + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java index 8d257733ca1..6ca146e4ebb 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java @@ -19,11 +19,11 @@ package org.elasticsearch.search.facet.terms.floats; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TFloatIntIterator; import org.elasticsearch.common.trove.map.hash.TFloatIntHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -169,19 +169,12 @@ public class InternalFloatTermsFacet extends InternalTermsFacet { return missingCount(); } - private static ThreadLocal> aggregateCache = new ThreadLocal>() { - @Override protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new TFloatIntHashMap()); - } - }; - - @Override public Facet reduce(String name, List facets) { if (facets.size() == 1) { return facets.get(0); } InternalFloatTermsFacet first = (InternalFloatTermsFacet) facets.get(0); - TFloatIntHashMap aggregated = aggregateCache.get().get(); + TFloatIntHashMap aggregated = CacheRecycler.popFloatIntMap(); aggregated.clear(); long missing = 0; @@ -200,6 +193,9 @@ public class InternalFloatTermsFacet extends InternalTermsFacet { } first.entries = ordered; first.missing = missing; + + CacheRecycler.pushFloatIntMap(aggregated); + return first; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java index d06dfde505a..51303a8aefa 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java @@ -22,10 +22,10 @@ package org.elasticsearch.search.facet.terms.floats; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableSet; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TFloatIntIterator; import org.elasticsearch.common.trove.map.hash.TFloatIntHashMap; import org.elasticsearch.common.trove.set.hash.TFloatHashSet; @@ -38,11 +38,11 @@ import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; +import java.util.Arrays; import java.util.Map; import java.util.Set; @@ -51,12 +51,6 @@ import java.util.Set; */ public class TermsFloatFacetCollector extends AbstractFacetCollector { - static ThreadLocal>> cache = new ThreadLocal>>() { - @Override protected ThreadLocals.CleanableValue> initialValue() { - return new ThreadLocals.CleanableValue>(new ArrayDeque()); - } - }; - private final FieldDataCache fieldDataCache; private final String indexFieldName; @@ -107,9 +101,9 @@ public class TermsFloatFacetCollector extends AbstractFacetCollector { } if (this.script == null && excluded.isEmpty()) { - aggregator = new StaticAggregatorValueProc(popFacets()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popFloatIntMap()); } else { - aggregator = new AggregatorValueProc(popFacets(), excluded, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popFloatIntMap(), excluded, this.script); } if (allTerms) { @@ -144,35 +138,30 @@ public class TermsFloatFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { TFloatIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { - pushFacets(facets); + CacheRecycler.pushFloatIntMap(facets); return new InternalFloatTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing()); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TFloatIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalFloatTermsFacet.FloatEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TFloatIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalFloatTermsFacet.FloatEntry(it.key(), it.value())); + } + InternalFloatTermsFacet.FloatEntry[] list = new InternalFloatTermsFacet.FloatEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalFloatTermsFacet.FloatEntry) ordered.pop(); + } + CacheRecycler.pushFloatIntMap(facets); + return new InternalFloatTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TFloatIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalFloatTermsFacet.FloatEntry(it.key(), it.value())); + } + CacheRecycler.pushFloatIntMap(facets); + return new InternalFloatTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } - pushFacets(facets); - return new InternalFloatTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); - } - } - - static TFloatIntHashMap popFacets() { - Deque deque = cache.get().get(); - if (deque.isEmpty()) { - deque.add(new TFloatIntHashMap()); - } - TFloatIntHashMap facets = deque.pollFirst(); - facets.clear(); - return facets; - } - - static void pushFacets(TFloatIntHashMap facets) { - facets.clear(); - Deque deque = cache.get().get(); - if (deque != null) { - deque.add(facets); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatOrdinalsFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatOrdinalsFacetCollector.java new file mode 100644 index 00000000000..1498a020508 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatOrdinalsFacetCollector.java @@ -0,0 +1,248 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.floats; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableSet; +import org.elasticsearch.common.trove.set.hash.TFloatHashSet; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldData; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.floats.FloatFieldData; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class TermsFloatOrdinalsFacetCollector extends AbstractFacetCollector { + + private final FieldDataCache fieldDataCache; + + private final String indexFieldName; + + private final TermsFacet.ComparatorType comparatorType; + + private final int size; + + private final int numberOfShards; + + private final int minCount; + + private final FieldDataType fieldDataType; + + private FloatFieldData fieldData; + + private final List aggregators; + + private ReaderAggregator current; + + long missing; + + private final TFloatHashSet excluded; + + public TermsFloatOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, + ImmutableSet excluded) { + super(facetName); + this.fieldDataCache = context.fieldDataCache(); + this.size = size; + this.comparatorType = comparatorType; + this.numberOfShards = context.numberOfShards(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms float facet collector on it"); + } else { + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.FLOAT) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of float type, can't run terms float facet collector on it"); + } + + this.indexFieldName = smartMappers.mapper().names().indexName(); + this.fieldDataType = smartMappers.mapper().fieldDataType(); + } + + if (excluded == null || excluded.isEmpty()) { + this.excluded = null; + } else { + this.excluded = new TFloatHashSet(excluded.size()); + for (String s : excluded) { + this.excluded.add(Float.parseFloat(s)); + } + } + + // minCount is offset by -1 + if (allTerms) { + minCount = -1; + } else { + minCount = 0; + } + + this.aggregators = new ArrayList(context.searcher().subReaders().length); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + if (current != null) { + missing += current.counts[0]; + if (current.values.length > 1) { + aggregators.add(current); + } + } + fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + current = new ReaderAggregator(fieldData); + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachOrdinalInDoc(doc, current); + } + + @Override public Facet facet() { + if (current != null) { + missing += current.counts[0]; + // if we have values for this one, add it + if (current.values.length > 1) { + aggregators.add(current); + } + } + + AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); + + for (ReaderAggregator aggregator : aggregators) { + CacheRecycler.pushIntArray(aggregator.counts); // release it here, anyhow we are on the same thread so won't be corrupted + if (aggregator.nextPosition()) { + queue.add(aggregator); + } + } + + // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes + if (size < EntryPriorityQueue.LIMIT) { + // optimize to use priority size + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + float value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalFloatTermsFacet.FloatEntry entry = new InternalFloatTermsFacet.FloatEntry(value, count); + ordered.insertWithOverflow(entry); + } + } + } + InternalFloatTermsFacet.FloatEntry[] list = new InternalFloatTermsFacet.FloatEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalFloatTermsFacet.FloatEntry) ordered.pop(); + } + return new InternalFloatTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing); + } + + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + float value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalFloatTermsFacet.FloatEntry entry = new InternalFloatTermsFacet.FloatEntry(value, count); + ordered.add(entry); + } + } + } + return new InternalFloatTermsFacet(facetName, comparatorType, size, ordered, missing); + } + + public static class ReaderAggregator implements FieldData.OrdinalInDocProc { + + final float[] values; + final int[] counts; + + int position = 0; + float current; + + public ReaderAggregator(FloatFieldData fieldData) { + this.values = fieldData.values(); + this.counts = CacheRecycler.popIntArray(fieldData.values().length); + } + + @Override public void onOrdinal(int docId, int ordinal) { + counts[ordinal]++; + } + + public boolean nextPosition() { + if (++position >= values.length) { + return false; + } + current = values[position]; + return true; + } + } + + public static class AggregatorPriorityQueue extends PriorityQueue { + + public AggregatorPriorityQueue(int size) { + initialize(size); + } + + @Override protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) { + return a.current < b.current; + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java index 6a50bddb134..ee966e6618a 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java @@ -19,11 +19,11 @@ package org.elasticsearch.search.facet.terms.ints; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TIntIntIterator; import org.elasticsearch.common.trove.map.hash.TIntIntHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -166,19 +166,12 @@ public class InternalIntTermsFacet extends InternalTermsFacet { return missingCount(); } - private static ThreadLocal> aggregateCache = new ThreadLocal>() { - @Override protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new TIntIntHashMap()); - } - }; - - @Override public Facet reduce(String name, List facets) { if (facets.size() == 1) { return facets.get(0); } InternalIntTermsFacet first = (InternalIntTermsFacet) facets.get(0); - TIntIntHashMap aggregated = aggregateCache.get().get(); + TIntIntHashMap aggregated = CacheRecycler.popIntIntMap(); aggregated.clear(); long missing = 0; @@ -197,6 +190,9 @@ public class InternalIntTermsFacet extends InternalTermsFacet { } first.entries = ordered; first.missing = missing; + + CacheRecycler.pushIntIntMap(aggregated); + return first; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java index 2b35b7c7444..379725b72e5 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java @@ -22,10 +22,10 @@ package org.elasticsearch.search.facet.terms.ints; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableSet; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TIntIntIterator; import org.elasticsearch.common.trove.map.hash.TIntIntHashMap; import org.elasticsearch.common.trove.set.hash.TIntHashSet; @@ -38,11 +38,11 @@ import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; +import java.util.Arrays; import java.util.Map; import java.util.Set; @@ -51,12 +51,6 @@ import java.util.Set; */ public class TermsIntFacetCollector extends AbstractFacetCollector { - static ThreadLocal>> cache = new ThreadLocal>>() { - @Override protected ThreadLocals.CleanableValue> initialValue() { - return new ThreadLocals.CleanableValue>(new ArrayDeque()); - } - }; - private final FieldDataCache fieldDataCache; private final String indexFieldName; @@ -107,9 +101,9 @@ public class TermsIntFacetCollector extends AbstractFacetCollector { } if (this.script == null && excluded.isEmpty()) { - aggregator = new StaticAggregatorValueProc(popFacets()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popIntIntMap()); } else { - aggregator = new AggregatorValueProc(popFacets(), excluded, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popIntIntMap(), excluded, this.script); } if (allTerms) { @@ -144,35 +138,30 @@ public class TermsIntFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { TIntIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { - pushFacets(facets); + CacheRecycler.pushIntIntMap(facets); return new InternalIntTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing()); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TIntIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalIntTermsFacet.IntEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TIntIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalIntTermsFacet.IntEntry(it.key(), it.value())); + } + InternalIntTermsFacet.IntEntry[] list = new InternalIntTermsFacet.IntEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalIntTermsFacet.IntEntry) ordered.pop(); + } + CacheRecycler.pushIntIntMap(facets); + return new InternalIntTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TIntIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalIntTermsFacet.IntEntry(it.key(), it.value())); + } + CacheRecycler.pushIntIntMap(facets); + return new InternalIntTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } - pushFacets(facets); - return new InternalIntTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); - } - } - - static TIntIntHashMap popFacets() { - Deque deque = cache.get().get(); - if (deque.isEmpty()) { - deque.add(new TIntIntHashMap()); - } - TIntIntHashMap facets = deque.pollFirst(); - facets.clear(); - return facets; - } - - static void pushFacets(TIntIntHashMap facets) { - facets.clear(); - Deque deque = cache.get().get(); - if (deque != null) { - deque.add(facets); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntOrdinalsFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntOrdinalsFacetCollector.java new file mode 100644 index 00000000000..cd863834660 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntOrdinalsFacetCollector.java @@ -0,0 +1,248 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.ints; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableSet; +import org.elasticsearch.common.trove.set.hash.TIntHashSet; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldData; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.ints.IntFieldData; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class TermsIntOrdinalsFacetCollector extends AbstractFacetCollector { + + private final FieldDataCache fieldDataCache; + + private final String indexFieldName; + + private final TermsFacet.ComparatorType comparatorType; + + private final int size; + + private final int numberOfShards; + + private final int minCount; + + private final FieldDataType fieldDataType; + + private IntFieldData fieldData; + + private final List aggregators; + + private ReaderAggregator current; + + long missing; + + private final TIntHashSet excluded; + + public TermsIntOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, + ImmutableSet excluded) { + super(facetName); + this.fieldDataCache = context.fieldDataCache(); + this.size = size; + this.comparatorType = comparatorType; + this.numberOfShards = context.numberOfShards(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms int facet collector on it"); + } else { + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.INT) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of int type, can't run terms int facet collector on it"); + } + + this.indexFieldName = smartMappers.mapper().names().indexName(); + this.fieldDataType = smartMappers.mapper().fieldDataType(); + } + + if (excluded == null || excluded.isEmpty()) { + this.excluded = null; + } else { + this.excluded = new TIntHashSet(excluded.size()); + for (String s : excluded) { + this.excluded.add(Integer.parseInt(s)); + } + } + + // minCount is offset by -1 + if (allTerms) { + minCount = -1; + } else { + minCount = 0; + } + + this.aggregators = new ArrayList(context.searcher().subReaders().length); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + if (current != null) { + missing += current.counts[0]; + if (current.values.length > 1) { + aggregators.add(current); + } + } + fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + current = new ReaderAggregator(fieldData); + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachOrdinalInDoc(doc, current); + } + + @Override public Facet facet() { + if (current != null) { + missing += current.counts[0]; + // if we have values for this one, add it + if (current.values.length > 1) { + aggregators.add(current); + } + } + + AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); + + for (ReaderAggregator aggregator : aggregators) { + CacheRecycler.pushIntArray(aggregator.counts); // release it here, anyhow we are on the same thread so won't be corrupted + if (aggregator.nextPosition()) { + queue.add(aggregator); + } + } + + // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes + if (size < EntryPriorityQueue.LIMIT) { + // optimize to use priority size + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + int value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalIntTermsFacet.IntEntry entry = new InternalIntTermsFacet.IntEntry(value, count); + ordered.insertWithOverflow(entry); + } + } + } + InternalIntTermsFacet.IntEntry[] list = new InternalIntTermsFacet.IntEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalIntTermsFacet.IntEntry) ordered.pop(); + } + return new InternalIntTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing); + } + + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + int value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalIntTermsFacet.IntEntry entry = new InternalIntTermsFacet.IntEntry(value, count); + ordered.add(entry); + } + } + } + return new InternalIntTermsFacet(facetName, comparatorType, size, ordered, missing); + } + + public static class ReaderAggregator implements FieldData.OrdinalInDocProc { + + final int[] values; + final int[] counts; + + int position = 0; + int current; + + public ReaderAggregator(IntFieldData fieldData) { + this.values = fieldData.values(); + this.counts = CacheRecycler.popIntArray(fieldData.values().length); + } + + @Override public void onOrdinal(int docId, int ordinal) { + counts[ordinal]++; + } + + public boolean nextPosition() { + if (++position >= values.length) { + return false; + } + current = values[position]; + return true; + } + } + + public static class AggregatorPriorityQueue extends PriorityQueue { + + public AggregatorPriorityQueue(int size) { + initialize(size); + } + + @Override protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) { + return a.current < b.current; + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java index 9dc7cb12c9d..008ab86193b 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java @@ -19,11 +19,11 @@ package org.elasticsearch.search.facet.terms.ip; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TLongIntIterator; import org.elasticsearch.common.trove.map.hash.TLongIntHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -170,19 +170,12 @@ public class InternalIpTermsFacet extends InternalTermsFacet { return missingCount(); } - private static ThreadLocal> aggregateCache = new ThreadLocal>() { - @Override protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new TLongIntHashMap()); - } - }; - - @Override public Facet reduce(String name, List facets) { if (facets.size() == 1) { return facets.get(0); } InternalIpTermsFacet first = (InternalIpTermsFacet) facets.get(0); - TLongIntHashMap aggregated = aggregateCache.get().get(); + TLongIntHashMap aggregated = CacheRecycler.popLongIntMap(); aggregated.clear(); long missing = 0; @@ -201,6 +194,9 @@ public class InternalIpTermsFacet extends InternalTermsFacet { } first.entries = ordered; first.missing = missing; + + CacheRecycler.pushLongIntMap(aggregated); + return first; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java index a2e636880c9..c2022ddc6c9 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java @@ -22,9 +22,9 @@ package org.elasticsearch.search.facet.terms.ip; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TLongIntIterator; import org.elasticsearch.common.trove.map.hash.TLongIntHashMap; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -36,11 +36,11 @@ import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; +import java.util.Arrays; import java.util.Map; /** @@ -48,13 +48,6 @@ import java.util.Map; */ public class TermsIpFacetCollector extends AbstractFacetCollector { - static ThreadLocal>> cache = new ThreadLocal>>() { - @Override protected ThreadLocals.CleanableValue> initialValue() { - return new ThreadLocals.CleanableValue>(new ArrayDeque()); - } - }; - - private final FieldDataCache fieldDataCache; private final String indexFieldName; @@ -105,9 +98,9 @@ public class TermsIpFacetCollector extends AbstractFacetCollector { } if (this.script == null) { - aggregator = new StaticAggregatorValueProc(popFacets()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popLongIntMap()); } else { - aggregator = new AggregatorValueProc(popFacets(), this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popLongIntMap(), this.script); } if (allTerms) { @@ -142,35 +135,30 @@ public class TermsIpFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { TLongIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { - pushFacets(facets); + CacheRecycler.pushLongIntMap(facets); return new InternalIpTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing()); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TLongIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalIpTermsFacet.LongEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TLongIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalIpTermsFacet.LongEntry(it.key(), it.value())); + } + InternalIpTermsFacet.LongEntry[] list = new InternalIpTermsFacet.LongEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalIpTermsFacet.LongEntry) ordered.pop(); + } + CacheRecycler.pushLongIntMap(facets); + return new InternalIpTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TLongIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalIpTermsFacet.LongEntry(it.key(), it.value())); + } + CacheRecycler.pushLongIntMap(facets); + return new InternalIpTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } - pushFacets(facets); - return new InternalIpTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); - } - } - - static TLongIntHashMap popFacets() { - Deque deque = cache.get().get(); - if (deque.isEmpty()) { - deque.add(new TLongIntHashMap()); - } - TLongIntHashMap facets = deque.pollFirst(); - facets.clear(); - return facets; - } - - static void pushFacets(TLongIntHashMap facets) { - facets.clear(); - Deque deque = cache.get().get(); - if (deque != null) { - deque.add(facets); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpOrdinalsFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpOrdinalsFacetCollector.java new file mode 100644 index 00000000000..0d630733f69 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpOrdinalsFacetCollector.java @@ -0,0 +1,248 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.ip; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableSet; +import org.elasticsearch.common.trove.set.hash.TLongHashSet; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldData; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.longs.LongFieldData; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class TermsIpOrdinalsFacetCollector extends AbstractFacetCollector { + + private final FieldDataCache fieldDataCache; + + private final String indexFieldName; + + private final TermsFacet.ComparatorType comparatorType; + + private final int size; + + private final int numberOfShards; + + private final int minCount; + + private final FieldDataType fieldDataType; + + private LongFieldData fieldData; + + private final List aggregators; + + private ReaderAggregator current; + + long missing; + + private final TLongHashSet excluded; + + public TermsIpOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, + ImmutableSet excluded) { + super(facetName); + this.fieldDataCache = context.fieldDataCache(); + this.size = size; + this.comparatorType = comparatorType; + this.numberOfShards = context.numberOfShards(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms long facet collector on it"); + } else { + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.LONG) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of long type, can't run terms long facet collector on it"); + } + + this.indexFieldName = smartMappers.mapper().names().indexName(); + this.fieldDataType = smartMappers.mapper().fieldDataType(); + } + + if (excluded == null || excluded.isEmpty()) { + this.excluded = null; + } else { + this.excluded = new TLongHashSet(excluded.size()); + for (String s : excluded) { + this.excluded.add(Long.parseLong(s)); + } + } + + // minCount is offset by -1 + if (allTerms) { + minCount = -1; + } else { + minCount = 0; + } + + this.aggregators = new ArrayList(context.searcher().subReaders().length); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + if (current != null) { + missing += current.counts[0]; + if (current.values.length > 1) { + aggregators.add(current); + } + } + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + current = new ReaderAggregator(fieldData); + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachOrdinalInDoc(doc, current); + } + + @Override public Facet facet() { + if (current != null) { + missing += current.counts[0]; + // if we have values for this one, add it + if (current.values.length > 1) { + aggregators.add(current); + } + } + + AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); + + for (ReaderAggregator aggregator : aggregators) { + CacheRecycler.pushIntArray(aggregator.counts); // release it here, anyhow we are on the same thread so won't be corrupted + if (aggregator.nextPosition()) { + queue.add(aggregator); + } + } + + // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes + if (size < EntryPriorityQueue.LIMIT) { + // optimize to use priority size + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + long value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalIpTermsFacet.LongEntry entry = new InternalIpTermsFacet.LongEntry(value, count); + ordered.insertWithOverflow(entry); + } + } + } + InternalIpTermsFacet.LongEntry[] list = new InternalIpTermsFacet.LongEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalIpTermsFacet.LongEntry) ordered.pop(); + } + return new InternalIpTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing); + } + + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + long value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalIpTermsFacet.LongEntry entry = new InternalIpTermsFacet.LongEntry(value, count); + ordered.add(entry); + } + } + } + return new InternalIpTermsFacet(facetName, comparatorType, size, ordered, missing); + } + + public static class ReaderAggregator implements FieldData.OrdinalInDocProc { + + final long[] values; + final int[] counts; + + int position = 0; + long current = Integer.MIN_VALUE; + + public ReaderAggregator(LongFieldData fieldData) { + this.values = fieldData.values(); + this.counts = CacheRecycler.popIntArray(fieldData.values().length); + } + + @Override public void onOrdinal(int docId, int ordinal) { + counts[ordinal]++; + } + + public boolean nextPosition() { + if (++position >= values.length) { + return false; + } + current = values[position]; + return true; + } + } + + public static class AggregatorPriorityQueue extends PriorityQueue { + + public AggregatorPriorityQueue(int size) { + initialize(size); + } + + @Override protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) { + return a.current < b.current; + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java index 87d1b45b6c7..f7683eaa006 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java @@ -19,11 +19,11 @@ package org.elasticsearch.search.facet.terms.longs; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TLongIntIterator; import org.elasticsearch.common.trove.map.hash.TLongIntHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -169,20 +169,12 @@ public class InternalLongTermsFacet extends InternalTermsFacet { return missingCount(); } - private static ThreadLocal> aggregateCache = new ThreadLocal>() { - @Override protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new TLongIntHashMap()); - } - }; - - @Override public Facet reduce(String name, List facets) { if (facets.size() == 1) { return facets.get(0); } InternalLongTermsFacet first = (InternalLongTermsFacet) facets.get(0); - TLongIntHashMap aggregated = aggregateCache.get().get(); - aggregated.clear(); + TLongIntHashMap aggregated = CacheRecycler.popLongIntMap(); long missing = 0; for (Facet facet : facets) { @@ -200,6 +192,9 @@ public class InternalLongTermsFacet extends InternalTermsFacet { } first.entries = ordered; first.missing = missing; + + CacheRecycler.pushLongIntMap(aggregated); + return first; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java index afa966605ef..115b0324253 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java @@ -22,6 +22,7 @@ package org.elasticsearch.search.facet.terms.longs; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableSet; @@ -38,13 +39,11 @@ import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * @author kimchy (shay.banon) @@ -108,9 +107,9 @@ public class TermsLongFacetCollector extends AbstractFacetCollector { } if (this.script == null && excluded.isEmpty()) { - aggregator = new StaticAggregatorValueProc(popFacets()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popLongIntMap()); } else { - aggregator = new AggregatorValueProc(popFacets(), excluded, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popLongIntMap(), excluded, this.script); } if (allTerms) { @@ -145,35 +144,30 @@ public class TermsLongFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { TLongIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { - pushFacets(facets); + CacheRecycler.pushLongIntMap(facets); return new InternalLongTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing()); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TLongIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalLongTermsFacet.LongEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TLongIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalLongTermsFacet.LongEntry(it.key(), it.value())); + } + InternalLongTermsFacet.LongEntry[] list = new InternalLongTermsFacet.LongEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalLongTermsFacet.LongEntry) ordered.pop(); + } + CacheRecycler.pushLongIntMap(facets); + return new InternalLongTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TLongIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalLongTermsFacet.LongEntry(it.key(), it.value())); + } + CacheRecycler.pushLongIntMap(facets); + return new InternalLongTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } - pushFacets(facets); - return new InternalLongTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); - } - } - - static TLongIntHashMap popFacets() { - Deque deque = cache.get().get(); - if (deque.isEmpty()) { - deque.add(new TLongIntHashMap()); - } - TLongIntHashMap facets = deque.pollFirst(); - facets.clear(); - return facets; - } - - static void pushFacets(TLongIntHashMap facets) { - facets.clear(); - Deque deque = cache.get().get(); - if (deque != null) { - deque.add(facets); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongOrdinalsFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongOrdinalsFacetCollector.java new file mode 100644 index 00000000000..705624d0579 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongOrdinalsFacetCollector.java @@ -0,0 +1,248 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.longs; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableSet; +import org.elasticsearch.common.trove.set.hash.TLongHashSet; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldData; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.longs.LongFieldData; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class TermsLongOrdinalsFacetCollector extends AbstractFacetCollector { + + private final FieldDataCache fieldDataCache; + + private final String indexFieldName; + + private final TermsFacet.ComparatorType comparatorType; + + private final int size; + + private final int numberOfShards; + + private final int minCount; + + private final FieldDataType fieldDataType; + + private LongFieldData fieldData; + + private final List aggregators; + + private ReaderAggregator current; + + long missing; + + private final TLongHashSet excluded; + + public TermsLongOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, + ImmutableSet excluded) { + super(facetName); + this.fieldDataCache = context.fieldDataCache(); + this.size = size; + this.comparatorType = comparatorType; + this.numberOfShards = context.numberOfShards(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms long facet collector on it"); + } else { + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.LONG) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of long type, can't run terms long facet collector on it"); + } + + this.indexFieldName = smartMappers.mapper().names().indexName(); + this.fieldDataType = smartMappers.mapper().fieldDataType(); + } + + if (excluded == null || excluded.isEmpty()) { + this.excluded = null; + } else { + this.excluded = new TLongHashSet(excluded.size()); + for (String s : excluded) { + this.excluded.add(Long.parseLong(s)); + } + } + + // minCount is offset by -1 + if (allTerms) { + minCount = -1; + } else { + minCount = 0; + } + + this.aggregators = new ArrayList(context.searcher().subReaders().length); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + if (current != null) { + missing += current.counts[0]; + if (current.values.length > 1) { + aggregators.add(current); + } + } + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + current = new ReaderAggregator(fieldData); + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachOrdinalInDoc(doc, current); + } + + @Override public Facet facet() { + if (current != null) { + missing += current.counts[0]; + // if we have values for this one, add it + if (current.values.length > 1) { + aggregators.add(current); + } + } + + AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); + + for (ReaderAggregator aggregator : aggregators) { + CacheRecycler.pushIntArray(aggregator.counts); // release it here, anyhow we are on the same thread so won't be corrupted + if (aggregator.nextPosition()) { + queue.add(aggregator); + } + } + + // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes + if (size < EntryPriorityQueue.LIMIT) { + // optimize to use priority size + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + long value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalLongTermsFacet.LongEntry entry = new InternalLongTermsFacet.LongEntry(value, count); + ordered.insertWithOverflow(entry); + } + } + } + InternalLongTermsFacet.LongEntry[] list = new InternalLongTermsFacet.LongEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalLongTermsFacet.LongEntry) ordered.pop(); + } + return new InternalLongTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing); + } + + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + long value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalLongTermsFacet.LongEntry entry = new InternalLongTermsFacet.LongEntry(value, count); + ordered.add(entry); + } + } + } + return new InternalLongTermsFacet(facetName, comparatorType, size, ordered, missing); + } + + public static class ReaderAggregator implements FieldData.OrdinalInDocProc { + + final long[] values; + final int[] counts; + + int position = 0; + long current = Integer.MIN_VALUE; + + public ReaderAggregator(LongFieldData fieldData) { + this.values = fieldData.values(); + this.counts = CacheRecycler.popIntArray(fieldData.values().length); + } + + @Override public void onOrdinal(int docId, int ordinal) { + counts[ordinal]++; + } + + public boolean nextPosition() { + if (++position >= values.length) { + return false; + } + current = values[position]; + return true; + } + } + + public static class AggregatorPriorityQueue extends PriorityQueue { + + public AggregatorPriorityQueue(int size) { + initialize(size); + } + + @Override protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) { + return a.current < b.current; + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java index 83f4518c850..24006322770 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java @@ -19,11 +19,11 @@ package org.elasticsearch.search.facet.terms.shorts; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TShortIntIterator; import org.elasticsearch.common.trove.map.hash.TShortIntHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -166,19 +166,12 @@ public class InternalShortTermsFacet extends InternalTermsFacet { return missingCount(); } - private static ThreadLocal> aggregateCache = new ThreadLocal>() { - @Override protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new TShortIntHashMap()); - } - }; - - @Override public Facet reduce(String name, List facets) { if (facets.size() == 1) { return facets.get(0); } InternalShortTermsFacet first = (InternalShortTermsFacet) facets.get(0); - TShortIntHashMap aggregated = aggregateCache.get().get(); + TShortIntHashMap aggregated = CacheRecycler.popShortIntMap(); aggregated.clear(); long missing = 0; for (Facet facet : facets) { @@ -196,6 +189,9 @@ public class InternalShortTermsFacet extends InternalTermsFacet { } first.entries = ordered; first.missing = missing; + + CacheRecycler.pushShortIntMap(aggregated); + return first; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java index 0ee64fd9dac..928859457b3 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java @@ -22,10 +22,10 @@ package org.elasticsearch.search.facet.terms.shorts; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableSet; -import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.trove.iterator.TShortIntIterator; import org.elasticsearch.common.trove.map.hash.TShortIntHashMap; import org.elasticsearch.common.trove.set.hash.TShortHashSet; @@ -38,11 +38,11 @@ import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; +import java.util.Arrays; import java.util.Map; import java.util.Set; @@ -51,12 +51,6 @@ import java.util.Set; */ public class TermsShortFacetCollector extends AbstractFacetCollector { - static ThreadLocal>> cache = new ThreadLocal>>() { - @Override protected ThreadLocals.CleanableValue> initialValue() { - return new ThreadLocals.CleanableValue>(new ArrayDeque()); - } - }; - private final FieldDataCache fieldDataCache; private final String indexFieldName; @@ -107,9 +101,9 @@ public class TermsShortFacetCollector extends AbstractFacetCollector { } if (this.script == null && excluded.isEmpty()) { - aggregator = new StaticAggregatorValueProc(popFacets()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popShortIntMap()); } else { - aggregator = new AggregatorValueProc(popFacets(), excluded, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popShortIntMap(), excluded, this.script); } if (allTerms) { @@ -144,35 +138,30 @@ public class TermsShortFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { TShortIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { - pushFacets(facets); + CacheRecycler.pushShortIntMap(facets); return new InternalShortTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing()); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TShortIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalShortTermsFacet.ShortEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TShortIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalShortTermsFacet.ShortEntry(it.key(), it.value())); + } + InternalShortTermsFacet.ShortEntry[] list = new InternalShortTermsFacet.ShortEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalShortTermsFacet.ShortEntry) ordered.pop(); + } + CacheRecycler.pushShortIntMap(facets); + return new InternalShortTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TShortIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalShortTermsFacet.ShortEntry(it.key(), it.value())); + } + CacheRecycler.pushShortIntMap(facets); + return new InternalShortTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } - pushFacets(facets); - return new InternalShortTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); - } - } - - static TShortIntHashMap popFacets() { - Deque deque = cache.get().get(); - if (deque.isEmpty()) { - deque.add(new TShortIntHashMap()); - } - TShortIntHashMap facets = deque.pollFirst(); - facets.clear(); - return facets; - } - - static void pushFacets(TShortIntHashMap facets) { - facets.clear(); - Deque deque = cache.get().get(); - if (deque != null) { - deque.add(facets); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortOrdinalsFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortOrdinalsFacetCollector.java new file mode 100644 index 00000000000..f08f1d86e39 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortOrdinalsFacetCollector.java @@ -0,0 +1,248 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.shorts; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableSet; +import org.elasticsearch.common.trove.set.hash.TShortHashSet; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldData; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.shorts.ShortFieldData; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class TermsShortOrdinalsFacetCollector extends AbstractFacetCollector { + + private final FieldDataCache fieldDataCache; + + private final String indexFieldName; + + private final TermsFacet.ComparatorType comparatorType; + + private final int size; + + private final int numberOfShards; + + private final int minCount; + + private final FieldDataType fieldDataType; + + private ShortFieldData fieldData; + + private final List aggregators; + + private ReaderAggregator current; + + long missing; + + private final TShortHashSet excluded; + + public TermsShortOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, + ImmutableSet excluded) { + super(facetName); + this.fieldDataCache = context.fieldDataCache(); + this.size = size; + this.comparatorType = comparatorType; + this.numberOfShards = context.numberOfShards(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms short facet collector on it"); + } else { + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.SHORT) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of short type, can't run terms short facet collector on it"); + } + + this.indexFieldName = smartMappers.mapper().names().indexName(); + this.fieldDataType = smartMappers.mapper().fieldDataType(); + } + + if (excluded == null || excluded.isEmpty()) { + this.excluded = null; + } else { + this.excluded = new TShortHashSet(excluded.size()); + for (String s : excluded) { + this.excluded.add(Short.parseShort(s)); + } + } + + // minCount is offset by -1 + if (allTerms) { + minCount = -1; + } else { + minCount = 0; + } + + this.aggregators = new ArrayList(context.searcher().subReaders().length); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + if (current != null) { + missing += current.counts[0]; + if (current.values.length > 1) { + aggregators.add(current); + } + } + fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + current = new ReaderAggregator(fieldData); + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachOrdinalInDoc(doc, current); + } + + @Override public Facet facet() { + if (current != null) { + missing += current.counts[0]; + // if we have values for this one, add it + if (current.values.length > 1) { + aggregators.add(current); + } + } + + AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); + + for (ReaderAggregator aggregator : aggregators) { + CacheRecycler.pushIntArray(aggregator.counts); // release it here, anyhow we are on the same thread so won't be corrupted + if (aggregator.nextPosition()) { + queue.add(aggregator); + } + } + + // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes + if (size < EntryPriorityQueue.LIMIT) { + // optimize to use priority size + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + short value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalShortTermsFacet.ShortEntry entry = new InternalShortTermsFacet.ShortEntry(value, count); + ordered.insertWithOverflow(entry); + } + } + } + InternalShortTermsFacet.ShortEntry[] list = new InternalShortTermsFacet.ShortEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalShortTermsFacet.ShortEntry) ordered.pop(); + } + return new InternalShortTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing); + } + + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + short value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value == agg.current); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalShortTermsFacet.ShortEntry entry = new InternalShortTermsFacet.ShortEntry(value, count); + ordered.add(entry); + } + } + } + return new InternalShortTermsFacet(facetName, comparatorType, size, ordered, missing); + } + + public static class ReaderAggregator implements FieldData.OrdinalInDocProc { + + final short[] values; + final int[] counts; + + int position = 0; + short current; + + public ReaderAggregator(ShortFieldData fieldData) { + this.values = fieldData.values(); + this.counts = CacheRecycler.popIntArray(fieldData.values().length); + } + + @Override public void onOrdinal(int docId, int ordinal) { + counts[ordinal]++; + } + + public boolean nextPosition() { + if (++position >= values.length) { + return false; + } + current = values[position]; + return true; + } + } + + public static class AggregatorPriorityQueue extends PriorityQueue { + + public AggregatorPriorityQueue(int size) { + initialize(size); + } + + @Override protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) { + return a.current < b.current; + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java index 4ca006a8305..69305c13672 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.facet.terms.strings; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableSet; @@ -34,9 +35,11 @@ import org.elasticsearch.script.SearchScript; import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; +import java.util.Arrays; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -95,9 +98,9 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { } if (excluded.isEmpty() && pattern == null && this.script == null) { - aggregator = new StaticAggregatorValueProc(TermsStringFacetCollector.popFacets()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popObjectIntMap()); } else { - aggregator = new AggregatorValueProc(TermsStringFacetCollector.popFacets(), excluded, pattern, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popObjectIntMap(), excluded, pattern, this.script); } if (allTerms) { @@ -138,17 +141,30 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { TObjectIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { - TermsStringFacetCollector.pushFacets(facets); + CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing()); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TObjectIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TObjectIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + } + InternalStringTermsFacet.StringEntry[] list = new InternalStringTermsFacet.StringEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = ((InternalStringTermsFacet.StringEntry) ordered.pop()); + } + CacheRecycler.pushObjectIntMap(facets); + return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TObjectIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + } + CacheRecycler.pushObjectIntMap(facets); + return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } - TermsStringFacetCollector.pushFacets(facets); - return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java index 5c1a48c9e66..3304416b980 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.facet.terms.strings; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableSet; @@ -29,9 +30,11 @@ import org.elasticsearch.common.trove.map.hash.TObjectIntHashMap; import org.elasticsearch.script.SearchScript; import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; +import java.util.Arrays; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -68,7 +71,7 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector this.excluded = excluded; this.matcher = pattern != null ? pattern.matcher("") : null; - this.facets = TermsStringFacetCollector.popFacets(); + this.facets = CacheRecycler.popObjectIntMap(); } @Override public void setScorer(Scorer scorer) throws IOException { @@ -132,17 +135,30 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector @Override public Facet facet() { if (facets.isEmpty()) { - TermsStringFacetCollector.pushFacets(facets); + CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.of(), missing); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TObjectIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TObjectIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + } + InternalStringTermsFacet.StringEntry[] list = new InternalStringTermsFacet.StringEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = ((InternalStringTermsFacet.StringEntry) ordered.pop()); + } + CacheRecycler.pushObjectIntMap(facets); + return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TObjectIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + } + CacheRecycler.pushObjectIntMap(facets); + return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing); } - TermsStringFacetCollector.pushFacets(facets); - return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing); } } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java index f64bdecb9f1..8e717a10a59 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.facet.terms.strings; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; +import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableSet; @@ -36,10 +37,12 @@ import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.ArrayDeque; +import java.util.Arrays; import java.util.Deque; import java.util.Map; import java.util.regex.Matcher; @@ -104,9 +107,9 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { } if (excluded.isEmpty() && pattern == null && this.script == null) { - aggregator = new StaticAggregatorValueProc(popFacets()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popObjectIntMap()); } else { - aggregator = new AggregatorValueProc(popFacets(), excluded, pattern, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popObjectIntMap(), excluded, pattern, this.script); } if (allTerms) { @@ -141,35 +144,30 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { TObjectIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { - pushFacets(facets); + CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing()); } else { - // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); - for (TObjectIntIterator it = facets.iterator(); it.hasNext();) { - it.advance(); - ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + if (size < EntryPriorityQueue.LIMIT) { + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + for (TObjectIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.insertWithOverflow(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + } + InternalStringTermsFacet.StringEntry[] list = new InternalStringTermsFacet.StringEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = ((InternalStringTermsFacet.StringEntry) ordered.pop()); + } + CacheRecycler.pushObjectIntMap(facets); + return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); + } else { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TObjectIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + } + CacheRecycler.pushObjectIntMap(facets); + return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); } - pushFacets(facets); - return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing()); - } - } - - static TObjectIntHashMap popFacets() { - Deque> deque = cache.get().get(); - if (deque.isEmpty()) { - deque.add(new TObjectIntHashMap()); - } - TObjectIntHashMap facets = deque.pollFirst(); - facets.clear(); - return facets; - } - - static void pushFacets(TObjectIntHashMap facets) { - facets.clear(); - Deque> deque = cache.get().get(); - if (deque != null) { - deque.add(facets); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetCollector.java new file mode 100644 index 00000000000..2ea1092e08d --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetCollector.java @@ -0,0 +1,244 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.strings; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableSet; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldData; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.strings.StringFieldData; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { + + private final FieldDataCache fieldDataCache; + + private final String indexFieldName; + + private final TermsFacet.ComparatorType comparatorType; + + private final int size; + + private final int numberOfShards; + + private final int minCount; + + private final FieldDataType fieldDataType; + + private StringFieldData fieldData; + + private final List aggregators; + + private ReaderAggregator current; + + long missing; + + private final ImmutableSet excluded; + + public TermsStringOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, + ImmutableSet excluded) { + super(facetName); + this.fieldDataCache = context.fieldDataCache(); + this.size = size; + this.comparatorType = comparatorType; + this.numberOfShards = context.numberOfShards(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms long facet collector on it"); + } else { + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.STRING) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of string type, can't run terms string facet collector on it"); + } + + this.indexFieldName = smartMappers.mapper().names().indexName(); + this.fieldDataType = smartMappers.mapper().fieldDataType(); + } + + if (excluded == null || excluded.isEmpty()) { + this.excluded = null; + } else { + this.excluded = excluded; + } + + // minCount is offset by -1 + if (allTerms) { + minCount = -1; + } else { + minCount = 0; + } + + this.aggregators = new ArrayList(context.searcher().subReaders().length); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + if (current != null) { + missing += current.counts[0]; + if (current.values.length > 1) { + aggregators.add(current); + } + } + fieldData = (StringFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + current = new ReaderAggregator(fieldData); + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachOrdinalInDoc(doc, current); + } + + @Override public Facet facet() { + if (current != null) { + missing += current.counts[0]; + // if we have values for this one, add it + if (current.values.length > 1) { + aggregators.add(current); + } + } + + AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); + + for (ReaderAggregator aggregator : aggregators) { + CacheRecycler.pushIntArray(aggregator.counts); // release it here, anyhow we are on the same thread so won't be corrupted + if (aggregator.nextPosition()) { + queue.add(aggregator); + } + } + + // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes + if (size < EntryPriorityQueue.LIMIT) { + // optimize to use priority size + EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + String value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value.equals(agg.current)); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalStringTermsFacet.StringEntry entry = new InternalStringTermsFacet.StringEntry(value, count); + ordered.insertWithOverflow(entry); + } + } + } + InternalStringTermsFacet.StringEntry[] list = new InternalStringTermsFacet.StringEntry[ordered.size()]; + for (int i = ordered.size() - 1; i >= 0; i--) { + list[i] = (InternalStringTermsFacet.StringEntry) ordered.pop(); + } + return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing); + } + + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + + while (queue.size() > 0) { + ReaderAggregator agg = queue.top(); + String value = agg.current; + int count = 0; + do { + count += agg.counts[agg.position]; + if (agg.nextPosition()) { + agg = queue.updateTop(); + } else { + // we are done with this reader + queue.pop(); + agg = queue.top(); + } + } while (agg != null && value.equals(agg.current)); + + if (count > minCount) { + if (excluded == null || !excluded.contains(value)) { + InternalStringTermsFacet.StringEntry entry = new InternalStringTermsFacet.StringEntry(value, count); + ordered.add(entry); + } + } + } + return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing); + } + + public static class ReaderAggregator implements FieldData.OrdinalInDocProc { + + final String[] values; + final int[] counts; + + int position = 0; + String current; + + public ReaderAggregator(StringFieldData fieldData) { + this.values = fieldData.values(); + this.counts = CacheRecycler.popIntArray(fieldData.values().length); + } + + @Override public void onOrdinal(int docId, int ordinal) { + counts[ordinal]++; + } + + public boolean nextPosition() { + if (++position >= values.length) { + return false; + } + current = values[position]; + return true; + } + } + + public static class AggregatorPriorityQueue extends PriorityQueue { + + public AggregatorPriorityQueue(int size) { + initialize(size); + } + + @Override protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) { + return a.current.compareTo(b.current) < 0; + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/support/EntryPriorityQueue.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/support/EntryPriorityQueue.java new file mode 100644 index 00000000000..fd94c34fe2a --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/support/EntryPriorityQueue.java @@ -0,0 +1,41 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.support; + +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.search.facet.terms.TermsFacet; + +import java.util.Comparator; + +public class EntryPriorityQueue extends PriorityQueue { + + public static final int LIMIT = 5000; + + private final Comparator comparator; + + public EntryPriorityQueue(int size, Comparator comparator) { + initialize(size); + this.comparator = comparator; + } + + @Override protected boolean lessThan(TermsFacet.Entry a, TermsFacet.Entry b) { + return comparator.compare(a, b) > 0; // reverse, since we reverse again when adding to a list + } +} diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java index 11603157bf8..76185606b6d 100644 --- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java @@ -742,7 +742,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { SearchResponse searchResponse = client.prepareSearch() .setQuery(matchAllQuery()) - .addFacet(termsFacet("facet1").field("text").size(3)) + .addFacet(termsFacet("facet1").field("text").size(10)) .execute().actionGet(); TermsFacet facet = searchResponse.facets().facet("facet1"); @@ -752,32 +752,6 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet.entries().get(i).term(), anyOf(equalTo("foo"), equalTo("bar"), equalTo("baz"))); assertThat(facet.entries().get(i).count(), equalTo(10)); } - - searchResponse = client.prepareSearch() - .setQuery(matchAllQuery()) - .addFacet(termsFacet("facet1").field("text").size(2)) - .execute().actionGet(); - - facet = searchResponse.facets().facet("facet1"); - assertThat(facet.name(), equalTo("facet1")); - assertThat(facet.entries().size(), equalTo(2)); - for (int i = 0; i < 2; i++) { - assertThat(facet.entries().get(i).term(), anyOf(equalTo("foo"), equalTo("bar"), equalTo("baz"))); - assertThat(facet.entries().get(i).count(), equalTo(10)); - } - - searchResponse = client.prepareSearch() - .setQuery(matchAllQuery()) - .addFacet(termsFacet("facet1").field("text").size(1)) - .execute().actionGet(); - - facet = searchResponse.facets().facet("facet1"); - assertThat(facet.name(), equalTo("facet1")); - assertThat(facet.entries().size(), equalTo(1)); - for (int i = 0; i < 1; i++) { - assertThat(facet.entries().get(i).term(), anyOf(equalTo("foo"), equalTo("bar"), equalTo("baz"))); - assertThat(facet.entries().get(i).count(), equalTo(10)); - } } @Test public void testStatsFacets() throws Exception {