From 7c9641ef9d2f1115f1e2f54dc09697625d4dd85a Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 6 Mar 2020 15:27:51 -0500 Subject: [PATCH] Simplify BucketedSort (#53199) (#53240) Our lovely `BitArray` compactly stores "flags", lazilly growing its underlying storage. It is super useful when you need to store one bit of data for a zillion buckets or a documents or something. Usefully, it defaults to `false`. But there is a wrinkle! If you ask it whether or not a bit is set but it hasn't grown its underlying storage array "around" that index then it'll throw an `ArrayIndexOutOfBoundsException`. The per-document use cases tend to show up in order and don't tend to mind this too much. But the use case in aggregations, the per-bucket use case, does. Because buckets are collected out of order all the time. This changes `BitArray` so it'll return `false` if the index is too big for the underlying storage. After all, that index *can't* have been set or else we would have grown the underlying array. Logically, I believe this makes sense. And it makes my life easy. At the cost of three lines. *but* this adds an extra test to every call to `get`. I think this is likely ok because it is "very close" to an array index lookup that already runs the same test. So I *think* it'll end up merged with the array bounds check. --- .../org/elasticsearch/common/util/BitArray.java | 16 ++++++++++++++++ .../elasticsearch/search/sort/BucketedSort.java | 10 +--------- .../elasticsearch/common/util/BitArrayTests.java | 13 +++++++++++++ 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/util/BitArray.java b/server/src/main/java/org/elasticsearch/common/util/BitArray.java index 225dbf04e37..cfe2716e47e 100644 --- a/server/src/main/java/org/elasticsearch/common/util/BitArray.java +++ b/server/src/main/java/org/elasticsearch/common/util/BitArray.java @@ -37,16 +37,32 @@ public final class BitArray implements Releasable { this.bits = bigArrays.newLongArray(initialSize, true); } + /** + * Set the {@code index}th bit. + */ public void set(int index) { fill(index, true); } + /** + * Clear the {@code index}th bit. + */ public void clear(int index) { fill(index, false); } + /** + * Is the {@code index}th bit set? + */ public boolean get(int index) { int wordNum = index >> 6; + if (wordNum >= bits.size()) { + /* + * If the word is bigger than the array then it could *never* have + * been set. + */ + return false; + } long bitmask = 1L << index; return (bits.get(wordNum) & bitmask) != 0; } diff --git a/server/src/main/java/org/elasticsearch/search/sort/BucketedSort.java b/server/src/main/java/org/elasticsearch/search/sort/BucketedSort.java index 7144d8bbc24..b8e8e4b0690 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/BucketedSort.java +++ b/server/src/main/java/org/elasticsearch/search/sort/BucketedSort.java @@ -134,13 +134,6 @@ public abstract class BucketedSort implements Releasable { * it is still gathering. */ private final BitArray heapMode; - /** - * The highest bucket ordinal that has been converted into a heap. This is - * required because calling {@link BitArray#get(int)} on an index higher - * than the highest one that was {@link BitArray#set(int) set} could throw - * and {@link ArrayIndexOutOfBoundsException}. So we check this first. - */ - private long maxHeapBucket = 0; protected BucketedSort(BigArrays bigArrays, SortOrder order, DocValueFormat format, int bucketSize, ExtraData extra) { this.bigArrays = bigArrays; @@ -216,7 +209,7 @@ public abstract class BucketedSort implements Releasable { * Is this bucket a min heap {@code true} or in gathering mode {@code false}? */ private boolean inHeapMode(long bucket) { - return bucket <= maxHeapBucket && heapMode.get((int) bucket); + return heapMode.get((int) bucket); } /** @@ -430,7 +423,6 @@ public abstract class BucketedSort implements Releasable { throw new UnsupportedOperationException("Bucketed sort doesn't support more than [" + Integer.MAX_VALUE + "] buckets"); // BitArray needs int keys and this'd take a ton of memory to use that many buckets. So we just don't. } - maxHeapBucket = Math.max(bucket, maxHeapBucket); heapMode.set((int) bucket); heapify(rootIndex); } else { diff --git a/server/src/test/java/org/elasticsearch/common/util/BitArrayTests.java b/server/src/test/java/org/elasticsearch/common/util/BitArrayTests.java index 518bbc08f4c..665315e171f 100644 --- a/server/src/test/java/org/elasticsearch/common/util/BitArrayTests.java +++ b/server/src/test/java/org/elasticsearch/common/util/BitArrayTests.java @@ -50,4 +50,17 @@ public class BitArrayTests extends ESTestCase { } } } + + public void testTooBigIsNotSet() { + try (BitArray bitArray = new BitArray(1, BigArrays.NON_RECYCLING_INSTANCE)) { + for (int i = 0; i < 1000; i++) { + /* + * The first few times this is called we check within the + * array. But we quickly go beyond it and those all return + * false as well. + */ + assertFalse(bitArray.get(i)); + } + } + } }