From e386ec973b8a4ec2de2bfc43f51df511a365d60f Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Tue, 11 Apr 2017 11:47:31 -0400 Subject: [PATCH] LUCENE-7777: fix AIOOBE from ByteBlockPool.readBytes when byte block exceeds 32 KB --- lucene/CHANGES.txt | 6 ++ .../commongrams/CommonGramsFilter.java | 2 +- .../org/apache/lucene/util/ByteBlockPool.java | 61 +++++++------------ .../apache/lucene/util/TestByteBlockPool.java | 34 ++++++++++- 4 files changed, 61 insertions(+), 42 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index cd25deeac6b..6f01ee3c91c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -91,6 +91,12 @@ Other ======================= Lucene 6.6.0 ======================= +Bug Fixes + +* LUCENE-7777: ByteBlockPool.readBytes sometimes throws + ArrayIndexOutOfBoundsException when byte blocks larger than 32 KB + were added (Mike McCandless) + Other * LUCENE-7754: Inner classes should be static whenever possible. diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java index 75e991fde8e..c01e2638042 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java @@ -106,7 +106,7 @@ public final class CommonGramsFilter extends TokenFilter { saveTermBuffer(); return true; } else if (!input.incrementToken()) { - return false; + return false; } /* We build n-grams before and after stopwords. diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index 1b71440ae9c..af8e19583bb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -324,28 +324,25 @@ public final class ByteBlockPool { * the current position. */ public void append(final BytesRef bytes) { - int length = bytes.length; - if (length == 0) { - return; - } + int bytesLeft = bytes.length; int offset = bytes.offset; - int overflow = (length + byteUpto) - BYTE_BLOCK_SIZE; - do { - if (overflow <= 0) { - System.arraycopy(bytes.bytes, offset, buffer, byteUpto, length); - byteUpto += length; + while (bytesLeft > 0) { + int bufferLeft = BYTE_BLOCK_SIZE - byteUpto; + if (bytesLeft < bufferLeft) { + // fits within current buffer + System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bytesLeft); + byteUpto += bytesLeft; break; } else { - final int bytesToCopy = length-overflow; - if (bytesToCopy > 0) { - System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bytesToCopy); - offset += bytesToCopy; - length -= bytesToCopy; + // fill up this buffer and move to next one + if (bufferLeft > 0) { + System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bufferLeft); } nextBuffer(); - overflow = overflow - BYTE_BLOCK_SIZE; + bytesLeft -= bufferLeft; + offset += bufferLeft; } - } while(true); + } } /** @@ -353,30 +350,18 @@ public final class ByteBlockPool { * length into the given byte array at offset off. *

Note: this method allows to copy across block boundaries.

*/ - public void readBytes(final long offset, final byte bytes[], final int off, final int length) { - if (length == 0) { - return; - } - int bytesOffset = off; - int bytesLength = length; + public void readBytes(final long offset, final byte bytes[], int bytesOffset, int bytesLength) { + int bytesLeft = bytesLength; int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT); - byte[] buffer = buffers[bufferIndex]; int pos = (int) (offset & BYTE_BLOCK_MASK); - int overflow = (pos + length) - BYTE_BLOCK_SIZE; - do { - if (overflow <= 0) { - System.arraycopy(buffer, pos, bytes, bytesOffset, bytesLength); - break; - } else { - final int bytesToCopy = length - overflow; - System.arraycopy(buffer, pos, bytes, bytesOffset, bytesToCopy); - pos = 0; - bytesLength -= bytesToCopy; - bytesOffset += bytesToCopy; - buffer = buffers[++bufferIndex]; - overflow = overflow - BYTE_BLOCK_SIZE; - } - } while (true); + while (bytesLeft > 0) { + byte[] buffer = buffers[bufferIndex++]; + int chunk = Math.min(bytesLeft, BYTE_BLOCK_SIZE - pos); + System.arraycopy(buffer, pos, bytes, bytesOffset, chunk); + bytesOffset += chunk; + bytesLeft -= chunk; + pos = 0; + } } /** diff --git a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java index df73687d17c..475f716acf5 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java @@ -18,6 +18,7 @@ package org.apache.lucene.util; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; public class TestByteBlockPool extends LuceneTestCase { @@ -34,8 +35,7 @@ public class TestByteBlockPool extends LuceneTestCase { final int numValues = atLeast(100); BytesRefBuilder ref = new BytesRefBuilder(); for (int i = 0; i < numValues; i++) { - final String value = TestUtil.randomRealisticUnicodeString(random(), - maxLength); + final String value = TestUtil.randomRealisticUnicodeString(random(), maxLength); list.add(new BytesRef(value)); ref.copyChars(value); pool.append(ref.get()); @@ -76,5 +76,33 @@ public class TestByteBlockPool extends LuceneTestCase { pool.nextBuffer(); // prepare for next iter } } - } + } + + public void testLargeRandomBlocks() throws IOException { + Counter bytesUsed = Counter.newCounter(); + ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); + pool.nextBuffer(); + + List items = new ArrayList<>(); + for (int i=0;i<100;i++) { + int size; + if (random().nextBoolean()) { + size = TestUtil.nextInt(random(), 100, 1000); + } else { + size = TestUtil.nextInt(random(), 50000, 100000); + } + byte[] bytes = new byte[size]; + random().nextBytes(bytes); + items.add(bytes); + pool.append(new BytesRef(bytes)); + } + + long position = 0; + for (byte[] expected : items) { + byte[] actual = new byte[expected.length]; + pool.readBytes(position, actual, 0, actual.length); + assertTrue(Arrays.equals(expected, actual)); + position += expected.length; + } + } }