From 8e90661734c86dad3a556c046aee020e742c5c32 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 29 May 2012 15:44:04 +0000 Subject: [PATCH] LUCENE-3590: fix AIOOBE in copyBytes/copyChars, off-by-one in subSequence git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1343786 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 ++ .../codecs/lucene40/values/DirectSource.java | 2 +- .../lucene40/values/FixedSortedBytesImpl.java | 2 +- .../lucene40/values/VarSortedBytesImpl.java | 2 +- .../java/org/apache/lucene/util/BytesRef.java | 17 ++-- .../java/org/apache/lucene/util/CharsRef.java | 58 ++++++++----- .../java/org/apache/lucene/util/IntsRef.java | 17 ++-- .../lucene/util/fst/BytesRefFSTEnum.java | 3 +- .../lucene/util/fst/IntsRefFSTEnum.java | 3 +- .../lucene/index/TestDocValuesIndexing.java | 2 +- .../org/apache/lucene/util/TestBytesRef.java | 16 ++++ .../org/apache/lucene/util/TestCharsRef.java | 85 +++++++++++++++++++ .../org/apache/lucene/util/TestIntsRef.java | 40 +++++++++ 13 files changed, 213 insertions(+), 39 deletions(-) create mode 100644 lucene/core/src/test/org/apache/lucene/util/TestIntsRef.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 71b317525ea..8ba01ca37b7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -985,6 +985,11 @@ Bug fixes addTaxonomy and now takes only one Directory and one OrdinalMap. (Shai Erera, Gilad Barkai) +* LUCENE-3590: Fix AIOOBE in BytesRef/CharsRef copyBytes/copyChars when + offset is nonzero, fix off-by-one in CharsRef.subSequence, and fix + CharsRef's CharSequence methods to throw exceptions in boundary cases + to properly meet the specification. (Robert Muir) + Documentation * LUCENE-3958: Javadocs corrections for IndexWriter. diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java index 5f38e3cf352..839d9f83f34 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java @@ -58,10 +58,10 @@ abstract class DirectSource extends Source { public BytesRef getBytes(int docID, BytesRef ref) { try { final int sizeToRead = position(docID); + ref.offset = 0; ref.grow(sizeToRead); data.readBytes(ref.bytes, 0, sizeToRead); ref.length = sizeToRead; - ref.offset = 0; return ref; } catch (IOException ex) { throw new IllegalStateException("failed to get value for docID: " + docID, ex); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java index e5f0a7d460a..ed0368c9c10 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java @@ -213,10 +213,10 @@ class FixedSortedBytesImpl { public BytesRef getByOrd(int ord, BytesRef bytesRef) { try { datIn.seek(basePointer + size * ord); + bytesRef.offset = 0; bytesRef.grow(size); datIn.readBytes(bytesRef.bytes, 0, size); bytesRef.length = size; - bytesRef.offset = 0; return bytesRef; } catch (IOException ex) { throw new IllegalStateException("failed to getByOrd", ex); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java index 1d822e0308a..274208fface 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java @@ -239,10 +239,10 @@ final class VarSortedBytesImpl { final long nextOffset = ordToOffsetIndex.get(1+ord); datIn.seek(basePointer + offset); final int length = (int) (nextOffset - offset); + bytesRef.offset = 0; bytesRef.grow(length); datIn.readBytes(bytesRef.bytes, 0, length); bytesRef.length = length; - bytesRef.offset = 0; return bytesRef; } catch (IOException ex) { throw new IllegalStateException("failed", ex); diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java index 900a96f5746..89f02faf518 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java @@ -47,6 +47,9 @@ public final class BytesRef implements Comparable,Cloneable { */ public BytesRef(byte[] bytes, int offset, int length) { assert bytes != null; + assert offset >= 0; + assert length >= 0; + assert bytes.length >= offset + length; this.bytes = bytes; this.offset = offset; this.length = length; @@ -84,8 +87,8 @@ public final class BytesRef implements Comparable,Cloneable { * @param text Must be well-formed unicode text, with no * unpaired surrogates or invalid UTF16 code units. */ - // TODO broken if offset != 0 public void copyChars(CharSequence text) { + assert offset == 0; // TODO broken if offset != 0 UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this); } @@ -180,7 +183,7 @@ public final class BytesRef implements Comparable,Cloneable { * new reference array. */ public void copyBytes(BytesRef other) { - if (bytes.length < other.length) { + if (bytes.length - offset < other.length) { bytes = new byte[other.length]; offset = 0; } @@ -196,7 +199,7 @@ public final class BytesRef implements Comparable,Cloneable { */ public void append(BytesRef other) { int newLen = length + other.length; - if (bytes.length < newLen) { + if (bytes.length - offset < newLen) { byte[] newBytes = new byte[newLen]; System.arraycopy(bytes, offset, newBytes, 0, length); offset = 0; @@ -206,9 +209,13 @@ public final class BytesRef implements Comparable,Cloneable { length = newLen; } - // TODO: stupid if existing offset is non-zero. - /** @lucene.internal */ + /** + * Used to grow the reference array. + * + * In general this should not be used as it does not take the offset into account. + * @lucene.internal */ public void grow(int newLength) { + assert offset == 0; // NOTE: senseless if offset != 0 bytes = ArrayUtil.grow(bytes, newLength); } diff --git a/lucene/core/src/java/org/apache/lucene/util/CharsRef.java b/lucene/core/src/java/org/apache/lucene/util/CharsRef.java index a1e34cda08f..fa9ebc34fb8 100644 --- a/lucene/core/src/java/org/apache/lucene/util/CharsRef.java +++ b/lucene/core/src/java/org/apache/lucene/util/CharsRef.java @@ -52,6 +52,8 @@ public final class CharsRef implements Comparable, CharSequence, Clone */ public CharsRef(char[] chars, int offset, int length) { assert chars != null; + assert offset >= 0; + assert length >= 0; assert chars.length >= offset + length; this.chars = chars; this.offset = offset; @@ -138,50 +140,52 @@ public final class CharsRef implements Comparable, CharSequence, Clone } /** - * Copies the given {@link CharsRef} referenced content into this instance - * starting at offset 0. + * Copies the given {@link CharsRef} referenced content into this instance. * * @param other * the {@link CharsRef} to copy */ - // TODO: why does this behave differently/not invoke copyChars(char[], int, int) ??? public void copyChars(CharsRef other) { - if (chars == null) { - chars = new char[other.length]; - } else { - chars = ArrayUtil.grow(chars, other.length); - } - System.arraycopy(other.chars, other.offset, chars, 0, other.length); - length = other.length; - offset = 0; + copyChars(other.chars, other.offset, other.length); } + /** + * Used to grow the reference array. + * + * In general this should not be used as it does not take the offset into account. + * @lucene.internal */ public void grow(int newLength) { + assert offset == 0; if (chars.length < newLength) { chars = ArrayUtil.grow(chars, newLength); } } /** - * Copies the given array into this CharsRef starting at offset 0 + * Copies the given array into this CharsRef. */ public void copyChars(char[] otherChars, int otherOffset, int otherLength) { - grow(otherLength); - System.arraycopy(otherChars, otherOffset, this.chars, 0, - otherLength); - this.offset = 0; - this.length = otherLength; + if (chars.length - offset < otherLength) { + chars = new char[otherLength]; + offset = 0; + } + System.arraycopy(otherChars, otherOffset, chars, offset, otherLength); + length = otherLength; } /** * Appends the given array to this CharsRef */ public void append(char[] otherChars, int otherOffset, int otherLength) { - final int newLength = length + otherLength; - grow(this.offset + newLength); - System.arraycopy(otherChars, otherOffset, this.chars, this.offset+length, - otherLength); - this.length += otherLength; + int newLen = length + otherLength; + if (chars.length - offset < newLen) { + char[] newChars = new char[newLen]; + System.arraycopy(chars, offset, newChars, 0, length); + offset = 0; + chars = newChars; + } + System.arraycopy(otherChars, otherOffset, chars, length+offset, otherLength); + length = newLen; } @Override @@ -194,11 +198,19 @@ public final class CharsRef implements Comparable, CharSequence, Clone } public char charAt(int index) { + // NOTE: must do a real check here to meet the specs of CharSequence + if (index < 0 || index >= length) { + throw new IndexOutOfBoundsException(); + } return chars[offset + index]; } public CharSequence subSequence(int start, int end) { - return new CharsRef(chars, offset + start, offset + end - 1); + // NOTE: must do a real check here to meet the specs of CharSequence + if (start < 0 || end > length || start > end) { + throw new IndexOutOfBoundsException(); + } + return new CharsRef(chars, offset + start, offset + end); } /** @deprecated */ diff --git a/lucene/core/src/java/org/apache/lucene/util/IntsRef.java b/lucene/core/src/java/org/apache/lucene/util/IntsRef.java index e2a4dc9d5f8..34da6348270 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntsRef.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntsRef.java @@ -40,6 +40,9 @@ public final class IntsRef implements Comparable, Cloneable { public IntsRef(int[] ints, int offset, int length) { assert ints != null; + assert offset >= 0; + assert length >= 0; + assert ints.length >= offset + length; this.ints = ints; this.offset = offset; this.length = length; @@ -114,17 +117,21 @@ public final class IntsRef implements Comparable, Cloneable { } public void copyInts(IntsRef other) { - if (ints == null) { + if (ints.length - offset < other.length) { ints = new int[other.length]; - } else { - ints = ArrayUtil.grow(ints, other.length); + offset = 0; } - System.arraycopy(other.ints, other.offset, ints, 0, other.length); + System.arraycopy(other.ints, other.offset, ints, offset, other.length); length = other.length; - offset = 0; } + /** + * Used to grow the reference array. + * + * In general this should not be used as it does not take the offset into account. + * @lucene.internal */ public void grow(int newLength) { + assert offset == 0; if (ints.length < newLength) { ints = ArrayUtil.grow(ints, newLength); } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/BytesRefFSTEnum.java b/lucene/core/src/java/org/apache/lucene/util/fst/BytesRefFSTEnum.java index e4cc48f3114..1738e7dc118 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/BytesRefFSTEnum.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/BytesRefFSTEnum.java @@ -19,6 +19,7 @@ package org.apache.lucene.util.fst; import java.io.IOException; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; /** Enumerates all input (BytesRef) + output pairs in an @@ -110,7 +111,7 @@ public final class BytesRefFSTEnum extends FSTEnum { @Override protected void grow() { - current.grow(upto+1); + current.bytes = ArrayUtil.grow(current.bytes, upto+1); } private InputOutput setResult() { diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/IntsRefFSTEnum.java b/lucene/core/src/java/org/apache/lucene/util/fst/IntsRefFSTEnum.java index ecc24fd1dbd..127091c68eb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/IntsRefFSTEnum.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/IntsRefFSTEnum.java @@ -17,6 +17,7 @@ package org.apache.lucene.util.fst; * limitations under the License. */ +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.IntsRef; import java.io.IOException; @@ -110,7 +111,7 @@ public final class IntsRefFSTEnum extends FSTEnum { @Override protected void grow() { - current.grow(upto+1); + current.ints = ArrayUtil.grow(current.ints, upto+1); } private InputOutput setResult() { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index 926eba3f4d2..a3f892cf6cf 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -858,7 +858,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { for (Entry entry : entrySet) { int docId = docId(slowR, new Term("id", entry.getKey())); - expected.copyChars(entry.getValue()); + expected = new BytesRef(entry.getValue()); assertEquals(expected, asSortedSource.getBytes(docId, actual)); } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestBytesRef.java b/lucene/core/src/test/org/apache/lucene/util/TestBytesRef.java index ced6ae05349..bf9efc1f331 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestBytesRef.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestBytesRef.java @@ -48,4 +48,20 @@ public class TestBytesRef extends LuceneTestCase { // only for 4.x assertEquals("\uFFFF", new BytesRef("\uFFFF").utf8ToString()); } + + // LUCENE-3590, AIOOBE if you append to a bytesref with offset != 0 + public void testAppend() { + byte bytes[] = new byte[] { (byte)'a', (byte)'b', (byte)'c', (byte)'d' }; + BytesRef b = new BytesRef(bytes, 1, 3); // bcd + b.append(new BytesRef("e")); + assertEquals("bcde", b.utf8ToString()); + } + + // LUCENE-3590, AIOOBE if you copy to a bytesref with offset != 0 + public void testCopyBytes() { + byte bytes[] = new byte[] { (byte)'a', (byte)'b', (byte)'c', (byte)'d' }; + BytesRef b = new BytesRef(bytes, 1, 3); // bcd + b.copyBytes(new BytesRef("bcde")); + assertEquals("bcde", b.utf8ToString()); + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestCharsRef.java b/lucene/core/src/test/org/apache/lucene/util/TestCharsRef.java index 365ae451553..995a623ab5e 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestCharsRef.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestCharsRef.java @@ -67,4 +67,89 @@ public class TestCharsRef extends LuceneTestCase { } } + + // LUCENE-3590, AIOOBE if you append to a charsref with offset != 0 + public void testAppendChars() { + char chars[] = new char[] { 'a', 'b', 'c', 'd' }; + CharsRef c = new CharsRef(chars, 1, 3); // bcd + c.append(new char[] { 'e' }, 0, 1); + assertEquals("bcde", c.toString()); + } + + // LUCENE-3590, AIOOBE if you copy to a charsref with offset != 0 + public void testCopyChars() { + char chars[] = new char[] { 'a', 'b', 'c', 'd' }; + CharsRef c = new CharsRef(chars, 1, 3); // bcd + char otherchars[] = new char[] { 'b', 'c', 'd', 'e' }; + c.copyChars(otherchars, 0, 4); + assertEquals("bcde", c.toString()); + } + + // LUCENE-3590, AIOOBE if you copy to a charsref with offset != 0 + public void testCopyCharsRef() { + char chars[] = new char[] { 'a', 'b', 'c', 'd' }; + CharsRef c = new CharsRef(chars, 1, 3); // bcd + char otherchars[] = new char[] { 'b', 'c', 'd', 'e' }; + c.copyChars(new CharsRef(otherchars, 0, 4)); + assertEquals("bcde", c.toString()); + } + + // LUCENE-3590: fix charsequence to fully obey interface + public void testCharSequenceCharAt() { + CharsRef c = new CharsRef("abc"); + + assertEquals('b', c.charAt(1)); + + try { + c.charAt(-1); + fail(); + } catch (IndexOutOfBoundsException expected) { + // expected exception + } + + try { + c.charAt(3); + fail(); + } catch (IndexOutOfBoundsException expected) { + // expected exception + } + } + + // LUCENE-3590: fix off-by-one in subsequence, and fully obey interface + public void testCharSequenceSubSequence() { + CharSequence c = new CharsRef("abc"); + + // slice + assertEquals("a", c.subSequence(0, 1).toString()); + // empty subsequence + assertEquals("", c.subSequence(0, 0).toString()); + + try { + c.subSequence(-1, 1); + fail(); + } catch (IndexOutOfBoundsException expected) { + // expected exception + } + + try { + c.subSequence(0, -1); + fail(); + } catch (IndexOutOfBoundsException expected) { + // expected exception + } + + try { + c.subSequence(0, 4); + fail(); + } catch (IndexOutOfBoundsException expected) { + // expected exception + } + + try { + c.subSequence(2, 1); + fail(); + } catch (IndexOutOfBoundsException expected) { + // expected exception + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestIntsRef.java b/lucene/core/src/test/org/apache/lucene/util/TestIntsRef.java new file mode 100644 index 00000000000..f70f59abb03 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/TestIntsRef.java @@ -0,0 +1,40 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestIntsRef extends LuceneTestCase { + public void testEmpty() { + IntsRef i = new IntsRef(); + assertEquals(IntsRef.EMPTY_INTS, i.ints); + assertEquals(0, i.offset); + assertEquals(0, i.length); + } + + public void testFromInts() { + int ints[] = new int[] { 1, 2, 3, 4 }; + IntsRef i = new IntsRef(ints, 0, 4); + assertEquals(ints, i.ints); + assertEquals(0, i.offset); + assertEquals(4, i.length); + + IntsRef i2 = new IntsRef(ints, 1, 3); + assertEquals(new IntsRef(new int[] { 2, 3, 4 }, 0, 3), i2); + + assertFalse(i.equals(i2)); + } +}