Use segment ordinals as global ordinals if a segment contains all values for a field on a shard level.
Relates to #5854 Closes #5873
This commit is contained in:
parent
65bc017271
commit
eb9805389a
|
@ -127,8 +127,12 @@ public final class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent i
|
||||||
public BytesValues.WithOrdinals getBytesValues(boolean needsHashes) {
|
public BytesValues.WithOrdinals getBytesValues(boolean needsHashes) {
|
||||||
BytesValues.WithOrdinals values = afd.getBytesValues(false);
|
BytesValues.WithOrdinals values = afd.getBytesValues(false);
|
||||||
Ordinals.Docs segmentOrdinals = values.ordinals();
|
Ordinals.Docs segmentOrdinals = values.ordinals();
|
||||||
Ordinals.Docs globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals);
|
final Ordinals.Docs globalOrdinals;
|
||||||
|
if (segmentOrdToGlobalOrdLookup != null) {
|
||||||
|
globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals);
|
||||||
|
} else {
|
||||||
|
globalOrdinals = segmentOrdinals;
|
||||||
|
}
|
||||||
final BytesValues.WithOrdinals[] bytesValues = new BytesValues.WithOrdinals[atomicReaders.length];
|
final BytesValues.WithOrdinals[] bytesValues = new BytesValues.WithOrdinals[atomicReaders.length];
|
||||||
for (int i = 0; i < bytesValues.length; i++) {
|
for (int i = 0; i < bytesValues.length; i++) {
|
||||||
bytesValues[i] = atomicReaders[i].afd.getBytesValues(false);
|
bytesValues[i] = atomicReaders[i].afd.getBytesValues(false);
|
||||||
|
|
|
@ -104,7 +104,8 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
|
||||||
breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);
|
breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);
|
||||||
|
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
String implName = segmentOrdToGlobalOrdLookups[0].getClass().getSimpleName();
|
// this does include the [] from the array in the impl name
|
||||||
|
String implName = segmentOrdToGlobalOrdLookups.getClass().getSimpleName();
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Global-ordinals[{}][{}][{}] took {} ms",
|
"Global-ordinals[{}][{}][{}] took {} ms",
|
||||||
implName,
|
implName,
|
||||||
|
@ -225,20 +226,32 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
|
||||||
PackedIntOrdinalMappingSource[] sources = new PackedIntOrdinalMappingSource[numSegments];
|
PackedIntOrdinalMappingSource[] sources = new PackedIntOrdinalMappingSource[numSegments];
|
||||||
for (int i = 0; i < newSegmentOrdToGlobalOrdDeltas.length; i++) {
|
for (int i = 0; i < newSegmentOrdToGlobalOrdDeltas.length; i++) {
|
||||||
PackedInts.Reader segmentOrdToGlobalOrdDelta = newSegmentOrdToGlobalOrdDeltas[i];
|
PackedInts.Reader segmentOrdToGlobalOrdDelta = newSegmentOrdToGlobalOrdDeltas[i];
|
||||||
|
if (segmentOrdToGlobalOrdDelta.size() == maxOrd) {
|
||||||
|
// This means that a segment contains all the value and in that case segment ordinals
|
||||||
|
// can be used as global ordinals. This will save an extra lookup per hit.
|
||||||
|
sources[i] = null;
|
||||||
|
} else {
|
||||||
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
|
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
|
||||||
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
|
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
|
||||||
memorySizeInBytesCounter += ramUsed;
|
memorySizeInBytesCounter += ramUsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
return sources;
|
return sources;
|
||||||
} else {
|
} else {
|
||||||
OrdinalMappingSource[] sources = new OrdinalMappingSource[segmentOrdToGlobalOrdDeltas.length];
|
OrdinalMappingSource[] sources = new OrdinalMappingSource[segmentOrdToGlobalOrdDeltas.length];
|
||||||
for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) {
|
for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) {
|
||||||
MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdDeltas[i];
|
MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdDeltas[i];
|
||||||
|
if (segmentOrdToGlobalOrdLookup.size() == maxOrd) {
|
||||||
|
// idem as above
|
||||||
|
sources[i] = null;
|
||||||
|
} else {
|
||||||
segmentOrdToGlobalOrdLookup.freeze();
|
segmentOrdToGlobalOrdLookup.freeze();
|
||||||
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
|
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
|
||||||
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd);
|
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd);
|
||||||
memorySizeInBytesCounter += ramUsed;
|
memorySizeInBytesCounter += ramUsed;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return sources;
|
return sources;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,12 +65,26 @@ public abstract class BucketsAggregator extends Aggregator {
|
||||||
*/
|
*/
|
||||||
protected final void collectBucket(int doc, long bucketOrd) throws IOException {
|
protected final void collectBucket(int doc, long bucketOrd) throws IOException {
|
||||||
docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
|
docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
|
||||||
|
collectExistingBucket(doc, bucketOrd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as {@link #collectBucket(int, long)}, but doesn't check if the docCounts needs to be re-sized.
|
||||||
|
*/
|
||||||
|
protected final void collectExistingBucket(int doc, long bucketOrd) throws IOException {
|
||||||
docCounts.increment(bucketOrd, 1);
|
docCounts.increment(bucketOrd, 1);
|
||||||
for (int i = 0; i < collectableSugAggregators.length; i++) {
|
for (int i = 0; i < collectableSugAggregators.length; i++) {
|
||||||
collectableSugAggregators[i].collect(doc, bucketOrd);
|
collectableSugAggregators[i].collect(doc, bucketOrd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the docCounts to the specified size.
|
||||||
|
*/
|
||||||
|
public void initializeDocCounts(long maxOrd) {
|
||||||
|
docCounts = bigArrays.grow(docCounts, maxOrd);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility method to collect the given doc in the given bucket but not to update the doc counts of the bucket
|
* Utility method to collect the given doc in the given bucket but not to update the doc counts of the bucket
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -69,6 +69,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
public void setNextReader(AtomicReaderContext reader) {
|
public void setNextReader(AtomicReaderContext reader) {
|
||||||
globalValues = valuesSource.globalBytesValues();
|
globalValues = valuesSource.globalBytesValues();
|
||||||
globalOrdinals = globalValues.ordinals();
|
globalOrdinals = globalValues.ordinals();
|
||||||
|
initializeDocCounts(globalOrdinals.getMaxOrd());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -76,7 +77,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
final int numOrds = globalOrdinals.setDocument(doc);
|
final int numOrds = globalOrdinals.setDocument(doc);
|
||||||
for (int i = 0; i < numOrds; i++) {
|
for (int i = 0; i < numOrds; i++) {
|
||||||
final long globalOrd = globalOrdinals.nextOrd();
|
final long globalOrd = globalOrdinals.nextOrd();
|
||||||
collectBucket(doc, createBucketOrd(globalOrd));
|
collectExistingBucket(doc, createBucketOrd(globalOrd));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -177,10 +177,11 @@ public class GlobalOrdinalsBenchmark {
|
||||||
int[] thresholds = new int[]{2048};
|
int[] thresholds = new int[]{2048};
|
||||||
for (int threshold : thresholds) {
|
for (int threshold : thresholds) {
|
||||||
updateThresholdInMapping(threshold);
|
updateThresholdInMapping(threshold);
|
||||||
|
System.out.println("--> Threshold: " + threshold);
|
||||||
|
|
||||||
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
|
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
|
||||||
String fieldName = "field_" + fieldSuffix;
|
String fieldName = "field_" + fieldSuffix;
|
||||||
String name = threshold + "-" + fieldName;
|
String name = "global_ordinals-" + fieldName;
|
||||||
if (USE_DOC_VALUES) {
|
if (USE_DOC_VALUES) {
|
||||||
fieldName = fieldName + ".doc_values";
|
fieldName = fieldName + ".doc_values";
|
||||||
name = name + "_doc_values"; // can't have . in agg name
|
name = name + "_doc_values"; // can't have . in agg name
|
||||||
|
@ -191,7 +192,7 @@ public class GlobalOrdinalsBenchmark {
|
||||||
|
|
||||||
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
|
for (int fieldSuffix = FIELD_START; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
|
||||||
String fieldName = "field_" + fieldSuffix;
|
String fieldName = "field_" + fieldSuffix;
|
||||||
String name = "segment-ordinals-" + fieldName;
|
String name = "ordinals-" + fieldName;
|
||||||
if (USE_DOC_VALUES) {
|
if (USE_DOC_VALUES) {
|
||||||
fieldName = fieldName + ".doc_values";
|
fieldName = fieldName + ".doc_values";
|
||||||
name = name + "_doc_values"; // can't have . in agg name
|
name = name + "_doc_values"; // can't have . in agg name
|
||||||
|
@ -199,12 +200,12 @@ public class GlobalOrdinalsBenchmark {
|
||||||
stats.add(terms(name, fieldName, "ordinals"));
|
stats.add(terms(name, fieldName, "ordinals"));
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println("------------------ SUMMARY ----------------------------------------------");
|
System.out.println("------------------ SUMMARY -----------------------------------------");
|
||||||
System.out.format(Locale.ENGLISH, "%40s%10s%10s%15s\n", "name", "took", "millis", "fieldata size");
|
System.out.format(Locale.ENGLISH, "%30s%10s%10s%15s\n", "name", "took", "millis", "fieldata size");
|
||||||
for (StatsResult stat : stats) {
|
for (StatsResult stat : stats) {
|
||||||
System.out.format(Locale.ENGLISH, "%40s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed);
|
System.out.format(Locale.ENGLISH, "%30s%10s%10d%15s\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT), stat.fieldDataMemoryUsed);
|
||||||
}
|
}
|
||||||
System.out.println("------------------ SUMMARY ----------------------------------------------");
|
System.out.println("------------------ SUMMARY -----------------------------------------");
|
||||||
|
|
||||||
client.close();
|
client.close();
|
||||||
node.close();
|
node.close();
|
||||||
|
|
Loading…
Reference in New Issue