Use segment ordinals as global ordinals if a segment contains all values for a field on a shard level.
Relates to #5854 Closes #5873
This commit is contained in:
parent
65bc017271
commit
eb9805389a
|
@ -127,8 +127,12 @@ public final class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent i
|
|||
public BytesValues.WithOrdinals getBytesValues(boolean needsHashes) {
|
||||
BytesValues.WithOrdinals values = afd.getBytesValues(false);
|
||||
Ordinals.Docs segmentOrdinals = values.ordinals();
|
||||
Ordinals.Docs globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals);
|
||||
|
||||
final Ordinals.Docs globalOrdinals;
|
||||
if (segmentOrdToGlobalOrdLookup != null) {
|
||||
globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals);
|
||||
} else {
|
||||
globalOrdinals = segmentOrdinals;
|
||||
}
|
||||
final BytesValues.WithOrdinals[] bytesValues = new BytesValues.WithOrdinals[atomicReaders.length];
|
||||
for (int i = 0; i < bytesValues.length; i++) {
|
||||
bytesValues[i] = atomicReaders[i].afd.getBytesValues(false);
|
||||
|
|
|
@ -104,7 +104,8 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
|
|||
breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
String implName = segmentOrdToGlobalOrdLookups[0].getClass().getSimpleName();
|
||||
// this does include the [] from the array in the impl name
|
||||
String implName = segmentOrdToGlobalOrdLookups.getClass().getSimpleName();
|
||||
logger.debug(
|
||||
"Global-ordinals[{}][{}][{}] took {} ms",
|
||||
implName,
|
||||
|
@ -225,19 +226,31 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
|
|||
PackedIntOrdinalMappingSource[] sources = new PackedIntOrdinalMappingSource[numSegments];
|
||||
for (int i = 0; i < newSegmentOrdToGlobalOrdDeltas.length; i++) {
|
||||
PackedInts.Reader segmentOrdToGlobalOrdDelta = newSegmentOrdToGlobalOrdDeltas[i];
|
||||
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
|
||||
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
|
||||
memorySizeInBytesCounter += ramUsed;
|
||||
if (segmentOrdToGlobalOrdDelta.size() == maxOrd) {
|
||||
// This means that a segment contains all the value and in that case segment ordinals
|
||||
// can be used as global ordinals. This will save an extra lookup per hit.
|
||||
sources[i] = null;
|
||||
} else {
|
||||
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
|
||||
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
|
||||
memorySizeInBytesCounter += ramUsed;
|
||||
}
|
||||
|
||||
}
|
||||
return sources;
|
||||
} else {
|
||||
OrdinalMappingSource[] sources = new OrdinalMappingSource[segmentOrdToGlobalOrdDeltas.length];
|
||||
for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) {
|
||||
MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdDeltas[i];
|
||||
segmentOrdToGlobalOrdLookup.freeze();
|
||||
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
|
||||
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd);
|
||||
memorySizeInBytesCounter += ramUsed;
|
||||
if (segmentOrdToGlobalOrdLookup.size() == maxOrd) {
|
||||
// idem as above
|
||||
sources[i] = null;
|
||||
} else {
|
||||
segmentOrdToGlobalOrdLookup.freeze();
|
||||
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
|
||||
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd);
|
||||
memorySizeInBytesCounter += ramUsed;
|
||||
}
|
||||
}
|
||||
return sources;
|
||||
}
|
||||
|
|
|
@ -65,12 +65,26 @@ public abstract class BucketsAggregator extends Aggregator {
|
|||
*/
|
||||
protected final void collectBucket(int doc, long bucketOrd) throws IOException {
|
||||
docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
|
||||
collectExistingBucket(doc, bucketOrd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as {@link #collectBucket(int, long)}, but doesn't check if the docCounts needs to be re-sized.
|
||||
*/
|
||||
protected final void collectExistingBucket(int doc, long bucketOrd) throws IOException {
|
||||
docCounts.increment(bucketOrd, 1);
|
||||
for (int i = 0; i < collectableSugAggregators.length; i++) {
|
||||
collectableSugAggregators[i].collect(doc, bucketOrd);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the docCounts to the specified size.
|
||||
*/
|
||||
public void initializeDocCounts(long maxOrd) {
|
||||
docCounts = bigArrays.grow(docCounts, maxOrd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to collect the given doc in the given bucket but not to update the doc counts of the bucket
|
||||
*/
|
||||
|
|
|
@ -69,6 +69,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
public void setNextReader(AtomicReaderContext reader) {
|
||||
globalValues = valuesSource.globalBytesValues();
|
||||
globalOrdinals = globalValues.ordinals();
|
||||
initializeDocCounts(globalOrdinals.getMaxOrd());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -76,7 +77,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
final int numOrds = globalOrdinals.setDocument(doc);
|
||||
for (int i = 0; i < numOrds; i++) {
|
||||
final long globalOrd = globalOrdinals.nextOrd();
|
||||
collectBucket(doc, createBucketOrd(globalOrd));
|
||||
collectExistingBucket(doc, createBucketOrd(globalOrd));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -177,10 +177,11 @@ public class GlobalOrdinalsBenchmark {
|
|||
int[] thresholds = new int[]{2048};
|
||||
for (int threshold : thresholds) {
|
||||
updateThresholdInMapping(threshold);
|
||||
System.out.println("--> Threshold: " + threshold);
|
||||
|
||||
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
|
||||
String fieldName = "field_" + fieldSuffix;
|
||||
String name = threshold + "-" + fieldName;
|
||||
String name = "global_ordinals-" + fieldName;
|
||||
if (USE_DOC_VALUES) {
|
||||
fieldName = fieldName + ".doc_values";
|
||||
name = name + "_doc_values"; // can't have . in agg name
|
||||
|
@ -191,7 +192,7 @@ public class GlobalOrdinalsBenchmark {
|
|||
|
||||
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
|
||||
String fieldName = "field_" + fieldSuffix;
|
||||
String name = "segment-ordinals-" + fieldName;
|
||||
String name = "ordinals-" + fieldName;
|
||||
if (USE_DOC_VALUES) {
|
||||
fieldName = fieldName + ".doc_values";
|
||||
name = name + "_doc_values"; // can't have . in agg name
|
||||
|
@ -199,12 +200,12 @@ public class GlobalOrdinalsBenchmark {
|
|||
stats.add(terms(name, fieldName, "ordinals"));
|
||||
}
|
||||
|
||||
System.out.println("------------------ SUMMARY ----------------------------------------------");
|
||||
System.out.format(Locale.ENGLISH, "%40s%10s%10s%15s\n", "name", "took", "millis", "fieldata size");
|
||||
System.out.println("------------------ SUMMARY -----------------------------------------");
|
||||
System.out.format(Locale.ENGLISH, "%30s%10s%10s%15s\n", "name", "took", "millis", "fieldata size");
|
||||
for (StatsResult stat : stats) {
|
||||
System.out.format(Locale.ENGLISH, "%40s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed);
|
||||
System.out.format(Locale.ENGLISH, "%30s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed);
|
||||
}
|
||||
System.out.println("------------------ SUMMARY ----------------------------------------------");
|
||||
System.out.println("------------------ SUMMARY -----------------------------------------");
|
||||
|
||||
client.close();
|
||||
node.close();
|
||||
|
|
Loading…
Reference in New Issue