LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if the underlying readByte() is inlined (which happens e.g. in MMapDirectory). The loop was unwinded which makes the hotspot bug disappear.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1083245 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2011-03-19 18:13:01 +00:00
parent 978aaab9cf
commit 89c6480845
3 changed files with 117 additions and 33 deletions

View File

@ -761,6 +761,11 @@ Bug fixes
been rounded down to 0 instead of being rounded up to the smallest
positive number. (yonik)
* LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if
the underlying readByte() is inlined (which happens e.g. in MMapDirectory).
The loop was unwinded which makes the hotspot bug disappear.
(Uwe Schindler, Robert Muir, Mike McCandless)
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight

View File

@ -80,6 +80,9 @@ public abstract class DataInput implements Cloneable {
* @see DataOutput#writeVInt(int)
*/
public int readVInt() throws IOException {
/* This is the original code of this method,
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
* readByte() is inlined. So the loop was unwinded!
byte b = readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@ -87,6 +90,22 @@ public abstract class DataInput implements Cloneable {
i |= (b & 0x7F) << shift;
}
return i;
*/
byte b = readByte();
int i = b & 0x7F;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7F) << 7;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7F) << 14;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7F) << 21;
if ((b & 0x80) == 0) return i;
b = readByte();
assert (b & 0x80) == 0;
return i | ((b & 0x7F) << 28);
}
/** Reads eight bytes and returns a long.
@ -100,6 +119,9 @@ public abstract class DataInput implements Cloneable {
* nine bytes. Smaller values take fewer bytes. Negative numbers are not
* supported. */
public long readVLong() throws IOException {
/* This is the original code of this method,
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
* readByte() is inlined. So the loop was unwinded!
byte b = readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@ -107,6 +129,34 @@ public abstract class DataInput implements Cloneable {
i |= (b & 0x7FL) << shift;
}
return i;
*/
byte b = readByte();
long i = b & 0x7FL;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 7;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 14;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 21;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 28;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 35;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 42;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 49;
if ((b & 0x80) == 0) return i;
b = readByte();
assert (b & 0x80) == 0;
return i | ((b & 0x7FL) << 56);
}
/** Reads a string.

View File

@ -19,51 +19,60 @@ package org.apache.lucene.index;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
public class TestIndexInput extends LuceneTestCase {
public void testRead() throws IOException {
IndexInput is = new MockIndexInput(new byte[] {
(byte) 0x80, 0x01,
(byte) 0xFF, 0x7F,
(byte) 0x80, (byte) 0x80, 0x01,
(byte) 0x81, (byte) 0x80, 0x01,
0x06, 'L', 'u', 'c', 'e', 'n', 'e',
// 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK")
0x02, (byte) 0xC2, (byte) 0xBF,
0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF,
'c', 'e', (byte) 0xC2, (byte) 0xBF,
'n', 'e',
static final byte[] READ_TEST_BYTES = new byte[] {
(byte) 0x80, 0x01,
(byte) 0xFF, 0x7F,
(byte) 0x80, (byte) 0x80, 0x01,
(byte) 0x81, (byte) 0x80, 0x01,
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x7F,
0x06, 'L', 'u', 'c', 'e', 'n', 'e',
// 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES")
0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
'n', 'e',
// 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK")
0x02, (byte) 0xC2, (byte) 0xBF,
0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF,
'c', 'e', (byte) 0xC2, (byte) 0xBF,
'n', 'e',
// surrogate pairs
// (U+1D11E "MUSICAL SYMBOL G CLEF")
// (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
0x0E, 'L', 'u',
(byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
'c', 'e',
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
'n', 'e',
// 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES")
0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
'n', 'e',
// null bytes
0x01, 0x00,
0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
});
// surrogate pairs
// (U+1D11E "MUSICAL SYMBOL G CLEF")
// (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
0x0E, 'L', 'u',
(byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
'c', 'e',
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
'n', 'e',
// null bytes
0x01, 0x00,
0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
};
private void checkReads(IndexInput is) throws IOException {
assertEquals(128,is.readVInt());
assertEquals(16383,is.readVInt());
assertEquals(16384,is.readVInt());
assertEquals(16385,is.readVInt());
assertEquals(Integer.MAX_VALUE, is.readVInt());
assertEquals((long) Integer.MAX_VALUE, is.readVLong());
assertEquals(Long.MAX_VALUE, is.readVLong());
assertEquals("Lucene",is.readString());
assertEquals("\u00BF",is.readString());
@ -79,4 +88,24 @@ public class TestIndexInput extends LuceneTestCase {
assertEquals("\u0000",is.readString());
assertEquals("Lu\u0000ce\u0000ne",is.readString());
}
// this test only checks BufferedIndexInput because MockIndexInput extends BufferedIndexInput
public void testBufferedIndexInputRead() throws IOException {
final IndexInput is = new MockIndexInput(READ_TEST_BYTES);
checkReads(is);
is.close();
}
// this test checks the raw IndexInput methods as it uses RAMIndexInput which extends IndexInput directly
public void testRawIndexInputRead() throws IOException {
final RAMDirectory dir = new RAMDirectory();
final IndexOutput os = dir.createOutput("foo");
os.writeBytes(READ_TEST_BYTES, READ_TEST_BYTES.length);
os.close();
final IndexInput is = dir.openInput("foo");
checkReads(is);
is.close();
dir.close();
}
}