From eb9805389ab7abcac75c020970af2f1c6d5fe235 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 18 Apr 2014 20:40:16 +0700 Subject: [PATCH] Use segment ordinals as global ordinals if a segment contains all values for a field on a shard level. Relates to #5854 Closes #5873 --- .../GlobalOrdinalsIndexFieldData.java | 8 +++-- .../InternalGlobalOrdinalsBuilder.java | 29 ++++++++++++++----- .../bucket/BucketsAggregator.java | 14 +++++++++ .../GlobalOrdinalsStringTermsAggregator.java | 3 +- .../aggregations/GlobalOrdinalsBenchmark.java | 13 +++++---- 5 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalsIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalsIndexFieldData.java index 3f78d6cda0d..c94d74bee0c 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalsIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalsIndexFieldData.java @@ -127,8 +127,12 @@ public final class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent i public BytesValues.WithOrdinals getBytesValues(boolean needsHashes) { BytesValues.WithOrdinals values = afd.getBytesValues(false); Ordinals.Docs segmentOrdinals = values.ordinals(); - Ordinals.Docs globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals); - + final Ordinals.Docs globalOrdinals; + if (segmentOrdToGlobalOrdLookup != null) { + globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals); + } else { + globalOrdinals = segmentOrdinals; + } final BytesValues.WithOrdinals[] bytesValues = new BytesValues.WithOrdinals[atomicReaders.length]; for (int i = 0; i < bytesValues.length; i++) { bytesValues[i] = atomicReaders[i].afd.getBytesValues(false); diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/InternalGlobalOrdinalsBuilder.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/InternalGlobalOrdinalsBuilder.java index a1c5b8da516..fee9846934e 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/InternalGlobalOrdinalsBuilder.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/InternalGlobalOrdinalsBuilder.java @@ -104,7 +104,8 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes); if (logger.isDebugEnabled()) { - String implName = segmentOrdToGlobalOrdLookups[0].getClass().getSimpleName(); + // this does include the [] from the array in the impl name + String implName = segmentOrdToGlobalOrdLookups.getClass().getSimpleName(); logger.debug( "Global-ordinals[{}][{}][{}] took {} ms", implName, @@ -225,19 +226,31 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem PackedIntOrdinalMappingSource[] sources = new PackedIntOrdinalMappingSource[numSegments]; for (int i = 0; i < newSegmentOrdToGlobalOrdDeltas.length; i++) { PackedInts.Reader segmentOrdToGlobalOrdDelta = newSegmentOrdToGlobalOrdDeltas[i]; - long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed(); - sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd); - memorySizeInBytesCounter += ramUsed; + if (segmentOrdToGlobalOrdDelta.size() == maxOrd) { + // This means that a segment contains all the value and in that case segment ordinals + // can be used as global ordinals. This will save an extra lookup per hit. + sources[i] = null; + } else { + long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed(); + sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd); + memorySizeInBytesCounter += ramUsed; + } + } return sources; } else { OrdinalMappingSource[] sources = new OrdinalMappingSource[segmentOrdToGlobalOrdDeltas.length]; for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) { MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdDeltas[i]; - segmentOrdToGlobalOrdLookup.freeze(); - long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed(); - sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd); - memorySizeInBytesCounter += ramUsed; + if (segmentOrdToGlobalOrdLookup.size() == maxOrd) { + // idem as above + sources[i] = null; + } else { + segmentOrdToGlobalOrdLookup.freeze(); + long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed(); + sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd); + memorySizeInBytesCounter += ramUsed; + } } return sources; } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index e808da1d442..da062c2e8da 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -65,12 +65,26 @@ public abstract class BucketsAggregator extends Aggregator { */ protected final void collectBucket(int doc, long bucketOrd) throws IOException { docCounts = bigArrays.grow(docCounts, bucketOrd + 1); + collectExistingBucket(doc, bucketOrd); + } + + /** + * Same as {@link #collectBucket(int, long)}, but doesn't check if the docCounts needs to be re-sized. + */ + protected final void collectExistingBucket(int doc, long bucketOrd) throws IOException { docCounts.increment(bucketOrd, 1); for (int i = 0; i < collectableSugAggregators.length; i++) { collectableSugAggregators[i].collect(doc, bucketOrd); } } + /** + * Initializes the docCounts to the specified size. + */ + public void initializeDocCounts(long maxOrd) { + docCounts = bigArrays.grow(docCounts, maxOrd); + } + /** * Utility method to collect the given doc in the given bucket but not to update the doc counts of the bucket */ diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index d1616b7068a..3f24f3ed9ad 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -69,6 +69,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr public void setNextReader(AtomicReaderContext reader) { globalValues = valuesSource.globalBytesValues(); globalOrdinals = globalValues.ordinals(); + initializeDocCounts(globalOrdinals.getMaxOrd()); } @Override @@ -76,7 +77,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr final int numOrds = globalOrdinals.setDocument(doc); for (int i = 0; i < numOrds; i++) { final long globalOrd = globalOrdinals.nextOrd(); - collectBucket(doc, createBucketOrd(globalOrd)); + collectExistingBucket(doc, createBucketOrd(globalOrd)); } } diff --git a/src/test/java/org/elasticsearch/benchmark/search/aggregations/GlobalOrdinalsBenchmark.java b/src/test/java/org/elasticsearch/benchmark/search/aggregations/GlobalOrdinalsBenchmark.java index 2189409307b..ad6fc73b5d3 100644 --- a/src/test/java/org/elasticsearch/benchmark/search/aggregations/GlobalOrdinalsBenchmark.java +++ b/src/test/java/org/elasticsearch/benchmark/search/aggregations/GlobalOrdinalsBenchmark.java @@ -177,10 +177,11 @@ public class GlobalOrdinalsBenchmark { int[] thresholds = new int[]{2048}; for (int threshold : thresholds) { updateThresholdInMapping(threshold); + System.out.println("--> Threshold: " + threshold); for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) { String fieldName = "field_" + fieldSuffix; - String name = threshold + "-" + fieldName; + String name = "global_ordinals-" + fieldName; if (USE_DOC_VALUES) { fieldName = fieldName + ".doc_values"; name = name + "_doc_values"; // can't have . in agg name @@ -191,7 +192,7 @@ public class GlobalOrdinalsBenchmark { for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) { String fieldName = "field_" + fieldSuffix; - String name = "segment-ordinals-" + fieldName; + String name = "ordinals-" + fieldName; if (USE_DOC_VALUES) { fieldName = fieldName + ".doc_values"; name = name + "_doc_values"; // can't have . in agg name @@ -199,12 +200,12 @@ public class GlobalOrdinalsBenchmark { stats.add(terms(name, fieldName, "ordinals")); } - System.out.println("------------------ SUMMARY ----------------------------------------------"); - System.out.format(Locale.ENGLISH, "%40s%10s%10s%15s\n", "name", "took", "millis", "fieldata size"); + System.out.println("------------------ SUMMARY -----------------------------------------"); + System.out.format(Locale.ENGLISH, "%30s%10s%10s%15s\n", "name", "took", "millis", "fieldata size"); for (StatsResult stat : stats) { - System.out.format(Locale.ENGLISH, "%40s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed); + System.out.format(Locale.ENGLISH, "%30s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed); } - System.out.println("------------------ SUMMARY ----------------------------------------------"); + System.out.println("------------------ SUMMARY -----------------------------------------"); client.close(); node.close();