Use segment ordinals as global ordinals if a segment contains all values for a field on a shard level.

Relates to #5854
Closes #5873
This commit is contained in:
Martijn van Groningen 2014-04-18 20:40:16 +07:00
parent 65bc017271
commit eb9805389a
5 changed files with 50 additions and 17 deletions

View File

@ -127,8 +127,12 @@ public final class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent i
public BytesValues.WithOrdinals getBytesValues(boolean needsHashes) {
BytesValues.WithOrdinals values = afd.getBytesValues(false);
Ordinals.Docs segmentOrdinals = values.ordinals();
Ordinals.Docs globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals);
final Ordinals.Docs globalOrdinals;
if (segmentOrdToGlobalOrdLookup != null) {
globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals);
} else {
globalOrdinals = segmentOrdinals;
}
final BytesValues.WithOrdinals[] bytesValues = new BytesValues.WithOrdinals[atomicReaders.length];
for (int i = 0; i < bytesValues.length; i++) {
bytesValues[i] = atomicReaders[i].afd.getBytesValues(false);

View File

@ -104,7 +104,8 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);
if (logger.isDebugEnabled()) {
String implName = segmentOrdToGlobalOrdLookups[0].getClass().getSimpleName();
// this does include the [] from the array in the impl name
String implName = segmentOrdToGlobalOrdLookups.getClass().getSimpleName();
logger.debug(
"Global-ordinals[{}][{}][{}] took {} ms",
implName,
@ -225,19 +226,31 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
PackedIntOrdinalMappingSource[] sources = new PackedIntOrdinalMappingSource[numSegments];
for (int i = 0; i < newSegmentOrdToGlobalOrdDeltas.length; i++) {
PackedInts.Reader segmentOrdToGlobalOrdDelta = newSegmentOrdToGlobalOrdDeltas[i];
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
if (segmentOrdToGlobalOrdDelta.size() == maxOrd) {
// This means that a segment contains all the value and in that case segment ordinals
// can be used as global ordinals. This will save an extra lookup per hit.
sources[i] = null;
} else {
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
}
}
return sources;
} else {
OrdinalMappingSource[] sources = new OrdinalMappingSource[segmentOrdToGlobalOrdDeltas.length];
for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) {
MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdDeltas[i];
segmentOrdToGlobalOrdLookup.freeze();
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
if (segmentOrdToGlobalOrdLookup.size() == maxOrd) {
// idem as above
sources[i] = null;
} else {
segmentOrdToGlobalOrdLookup.freeze();
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
}
}
return sources;
}

View File

@ -65,12 +65,26 @@ public abstract class BucketsAggregator extends Aggregator {
*/
protected final void collectBucket(int doc, long bucketOrd) throws IOException {
docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
collectExistingBucket(doc, bucketOrd);
}
/**
* Same as {@link #collectBucket(int, long)}, but doesn't check if the docCounts needs to be re-sized.
*/
protected final void collectExistingBucket(int doc, long bucketOrd) throws IOException {
docCounts.increment(bucketOrd, 1);
for (int i = 0; i < collectableSugAggregators.length; i++) {
collectableSugAggregators[i].collect(doc, bucketOrd);
}
}
/**
* Initializes the docCounts to the specified size.
*/
public void initializeDocCounts(long maxOrd) {
docCounts = bigArrays.grow(docCounts, maxOrd);
}
/**
* Utility method to collect the given doc in the given bucket but not to update the doc counts of the bucket
*/

View File

@ -69,6 +69,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
public void setNextReader(AtomicReaderContext reader) {
globalValues = valuesSource.globalBytesValues();
globalOrdinals = globalValues.ordinals();
initializeDocCounts(globalOrdinals.getMaxOrd());
}
@Override
@ -76,7 +77,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
final int numOrds = globalOrdinals.setDocument(doc);
for (int i = 0; i < numOrds; i++) {
final long globalOrd = globalOrdinals.nextOrd();
collectBucket(doc, createBucketOrd(globalOrd));
collectExistingBucket(doc, createBucketOrd(globalOrd));
}
}

View File

@ -177,10 +177,11 @@ public class GlobalOrdinalsBenchmark {
int[] thresholds = new int[]{2048};
for (int threshold : thresholds) {
updateThresholdInMapping(threshold);
System.out.println("--> Threshold: " + threshold);
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
String fieldName = "field_" + fieldSuffix;
String name = threshold + "-" + fieldName;
String name = "global_ordinals-" + fieldName;
if (USE_DOC_VALUES) {
fieldName = fieldName + ".doc_values";
name = name + "_doc_values"; // can't have . in agg name
@ -191,7 +192,7 @@ public class GlobalOrdinalsBenchmark {
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
String fieldName = "field_" + fieldSuffix;
String name = "segment-ordinals-" + fieldName;
String name = "ordinals-" + fieldName;
if (USE_DOC_VALUES) {
fieldName = fieldName + ".doc_values";
name = name + "_doc_values"; // can't have . in agg name
@ -199,12 +200,12 @@ public class GlobalOrdinalsBenchmark {
stats.add(terms(name, fieldName, "ordinals"));
}
System.out.println("------------------ SUMMARY ----------------------------------------------");
System.out.format(Locale.ENGLISH, "%40s%10s%10s%15s\n", "name", "took", "millis", "fieldata size");
System.out.println("------------------ SUMMARY -----------------------------------------");
System.out.format(Locale.ENGLISH, "%30s%10s%10s%15s\n", "name", "took", "millis", "fieldata size");
for (StatsResult stat : stats) {
System.out.format(Locale.ENGLISH, "%40s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed);
System.out.format(Locale.ENGLISH, "%30s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed);
}
System.out.println("------------------ SUMMARY ----------------------------------------------");
System.out.println("------------------ SUMMARY -----------------------------------------");
client.close();
node.close();