Use segment ordinals as global ordinals if a segment contains all values for a field on a shard level.

Relates to #5854
Closes #5873
This commit is contained in:
Martijn van Groningen 2014-04-18 20:40:16 +07:00
parent 65bc017271
commit eb9805389a
5 changed files with 50 additions and 17 deletions

View File

@ -127,8 +127,12 @@ public final class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent i
public BytesValues.WithOrdinals getBytesValues(boolean needsHashes) { public BytesValues.WithOrdinals getBytesValues(boolean needsHashes) {
BytesValues.WithOrdinals values = afd.getBytesValues(false); BytesValues.WithOrdinals values = afd.getBytesValues(false);
Ordinals.Docs segmentOrdinals = values.ordinals(); Ordinals.Docs segmentOrdinals = values.ordinals();
Ordinals.Docs globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals); final Ordinals.Docs globalOrdinals;
if (segmentOrdToGlobalOrdLookup != null) {
globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals);
} else {
globalOrdinals = segmentOrdinals;
}
final BytesValues.WithOrdinals[] bytesValues = new BytesValues.WithOrdinals[atomicReaders.length]; final BytesValues.WithOrdinals[] bytesValues = new BytesValues.WithOrdinals[atomicReaders.length];
for (int i = 0; i < bytesValues.length; i++) { for (int i = 0; i < bytesValues.length; i++) {
bytesValues[i] = atomicReaders[i].afd.getBytesValues(false); bytesValues[i] = atomicReaders[i].afd.getBytesValues(false);

View File

@ -104,7 +104,8 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes); breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);
if (logger.isDebugEnabled()) { if (logger.isDebugEnabled()) {
String implName = segmentOrdToGlobalOrdLookups[0].getClass().getSimpleName(); // this does include the [] from the array in the impl name
String implName = segmentOrdToGlobalOrdLookups.getClass().getSimpleName();
logger.debug( logger.debug(
"Global-ordinals[{}][{}][{}] took {} ms", "Global-ordinals[{}][{}][{}] took {} ms",
implName, implName,
@ -225,19 +226,31 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
PackedIntOrdinalMappingSource[] sources = new PackedIntOrdinalMappingSource[numSegments]; PackedIntOrdinalMappingSource[] sources = new PackedIntOrdinalMappingSource[numSegments];
for (int i = 0; i < newSegmentOrdToGlobalOrdDeltas.length; i++) { for (int i = 0; i < newSegmentOrdToGlobalOrdDeltas.length; i++) {
PackedInts.Reader segmentOrdToGlobalOrdDelta = newSegmentOrdToGlobalOrdDeltas[i]; PackedInts.Reader segmentOrdToGlobalOrdDelta = newSegmentOrdToGlobalOrdDeltas[i];
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed(); if (segmentOrdToGlobalOrdDelta.size() == maxOrd) {
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd); // This means that a segment contains all the value and in that case segment ordinals
memorySizeInBytesCounter += ramUsed; // can be used as global ordinals. This will save an extra lookup per hit.
sources[i] = null;
} else {
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
}
} }
return sources; return sources;
} else { } else {
OrdinalMappingSource[] sources = new OrdinalMappingSource[segmentOrdToGlobalOrdDeltas.length]; OrdinalMappingSource[] sources = new OrdinalMappingSource[segmentOrdToGlobalOrdDeltas.length];
for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) { for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) {
MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdDeltas[i]; MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdDeltas[i];
segmentOrdToGlobalOrdLookup.freeze(); if (segmentOrdToGlobalOrdLookup.size() == maxOrd) {
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed(); // idem as above
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd); sources[i] = null;
memorySizeInBytesCounter += ramUsed; } else {
segmentOrdToGlobalOrdLookup.freeze();
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
}
} }
return sources; return sources;
} }

View File

@ -65,12 +65,26 @@ public abstract class BucketsAggregator extends Aggregator {
*/ */
protected final void collectBucket(int doc, long bucketOrd) throws IOException { protected final void collectBucket(int doc, long bucketOrd) throws IOException {
docCounts = bigArrays.grow(docCounts, bucketOrd + 1); docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
collectExistingBucket(doc, bucketOrd);
}
/**
* Same as {@link #collectBucket(int, long)}, but doesn't check if the docCounts needs to be re-sized.
*/
protected final void collectExistingBucket(int doc, long bucketOrd) throws IOException {
docCounts.increment(bucketOrd, 1); docCounts.increment(bucketOrd, 1);
for (int i = 0; i < collectableSugAggregators.length; i++) { for (int i = 0; i < collectableSugAggregators.length; i++) {
collectableSugAggregators[i].collect(doc, bucketOrd); collectableSugAggregators[i].collect(doc, bucketOrd);
} }
} }
/**
* Initializes the docCounts to the specified size.
*/
public void initializeDocCounts(long maxOrd) {
docCounts = bigArrays.grow(docCounts, maxOrd);
}
/** /**
* Utility method to collect the given doc in the given bucket but not to update the doc counts of the bucket * Utility method to collect the given doc in the given bucket but not to update the doc counts of the bucket
*/ */

View File

@ -69,6 +69,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
public void setNextReader(AtomicReaderContext reader) { public void setNextReader(AtomicReaderContext reader) {
globalValues = valuesSource.globalBytesValues(); globalValues = valuesSource.globalBytesValues();
globalOrdinals = globalValues.ordinals(); globalOrdinals = globalValues.ordinals();
initializeDocCounts(globalOrdinals.getMaxOrd());
} }
@Override @Override
@ -76,7 +77,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
final int numOrds = globalOrdinals.setDocument(doc); final int numOrds = globalOrdinals.setDocument(doc);
for (int i = 0; i < numOrds; i++) { for (int i = 0; i < numOrds; i++) {
final long globalOrd = globalOrdinals.nextOrd(); final long globalOrd = globalOrdinals.nextOrd();
collectBucket(doc, createBucketOrd(globalOrd)); collectExistingBucket(doc, createBucketOrd(globalOrd));
} }
} }

View File

@ -177,10 +177,11 @@ public class GlobalOrdinalsBenchmark {
int[] thresholds = new int[]{2048}; int[] thresholds = new int[]{2048};
for (int threshold : thresholds) { for (int threshold : thresholds) {
updateThresholdInMapping(threshold); updateThresholdInMapping(threshold);
System.out.println("--> Threshold: " + threshold);
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) { for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
String fieldName = "field_" + fieldSuffix; String fieldName = "field_" + fieldSuffix;
String name = threshold + "-" + fieldName; String name = "global_ordinals-" + fieldName;
if (USE_DOC_VALUES) { if (USE_DOC_VALUES) {
fieldName = fieldName + ".doc_values"; fieldName = fieldName + ".doc_values";
name = name + "_doc_values"; // can't have . in agg name name = name + "_doc_values"; // can't have . in agg name
@ -191,7 +192,7 @@ public class GlobalOrdinalsBenchmark {
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) { for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
String fieldName = "field_" + fieldSuffix; String fieldName = "field_" + fieldSuffix;
String name = "segment-ordinals-" + fieldName; String name = "ordinals-" + fieldName;
if (USE_DOC_VALUES) { if (USE_DOC_VALUES) {
fieldName = fieldName + ".doc_values"; fieldName = fieldName + ".doc_values";
name = name + "_doc_values"; // can't have . in agg name name = name + "_doc_values"; // can't have . in agg name
@ -199,12 +200,12 @@ public class GlobalOrdinalsBenchmark {
stats.add(terms(name, fieldName, "ordinals")); stats.add(terms(name, fieldName, "ordinals"));
} }
System.out.println("------------------ SUMMARY ----------------------------------------------"); System.out.println("------------------ SUMMARY -----------------------------------------");
System.out.format(Locale.ENGLISH, "%40s%10s%10s%15s\n", "name", "took", "millis", "fieldata size"); System.out.format(Locale.ENGLISH, "%30s%10s%10s%15s\n", "name", "took", "millis", "fieldata size");
for (StatsResult stat : stats) { for (StatsResult stat : stats) {
System.out.format(Locale.ENGLISH, "%40s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed); System.out.format(Locale.ENGLISH, "%30s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed);
} }
System.out.println("------------------ SUMMARY ----------------------------------------------"); System.out.println("------------------ SUMMARY -----------------------------------------");
client.close(); client.close();
node.close(); node.close();