LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if the underlying readByte() is inlined (which happens e.g. in MMapDirectory). The loop was unwinded which makes the hotspot bug disappear.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1083245 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2011-03-19 18:13:01 +00:00
parent 978aaab9cf
commit 89c6480845
3 changed files with 117 additions and 33 deletions

View File

@ -761,6 +761,11 @@ Bug fixes
been rounded down to 0 instead of being rounded up to the smallest
positive number. (yonik)
* LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if
the underlying readByte() is inlined (which happens e.g. in MMapDirectory).
The loop was unwinded which makes the hotspot bug disappear.
(Uwe Schindler, Robert Muir, Mike McCandless)
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight

View File

@ -80,6 +80,9 @@ public abstract class DataInput implements Cloneable {
* @see DataOutput#writeVInt(int)
*/
public int readVInt() throws IOException {
/* This is the original code of this method,
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
* readByte() is inlined. So the loop was unwinded!
byte b = readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@ -87,6 +90,22 @@ public abstract class DataInput implements Cloneable {
i |= (b & 0x7F) << shift;
}
return i;
*/
byte b = readByte();
int i = b & 0x7F;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7F) << 7;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7F) << 14;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7F) << 21;
if ((b & 0x80) == 0) return i;
b = readByte();
assert (b & 0x80) == 0;
return i | ((b & 0x7F) << 28);
}
/** Reads eight bytes and returns a long.
@ -100,6 +119,9 @@ public abstract class DataInput implements Cloneable {
* nine bytes. Smaller values take fewer bytes. Negative numbers are not
* supported. */
public long readVLong() throws IOException {
/* This is the original code of this method,
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
* readByte() is inlined. So the loop was unwinded!
byte b = readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@ -107,6 +129,34 @@ public abstract class DataInput implements Cloneable {
i |= (b & 0x7FL) << shift;
}
return i;
*/
byte b = readByte();
long i = b & 0x7FL;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 7;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 14;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 21;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 28;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 35;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 42;
if ((b & 0x80) == 0) return i;
b = readByte();
i |= (b & 0x7FL) << 49;
if ((b & 0x80) == 0) return i;
b = readByte();
assert (b & 0x80) == 0;
return i | ((b & 0x7FL) << 56);
}
/** Reads a string.

View File

@ -19,16 +19,21 @@ package org.apache.lucene.index;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
public class TestIndexInput extends LuceneTestCase {
public void testRead() throws IOException {
IndexInput is = new MockIndexInput(new byte[] {
static final byte[] READ_TEST_BYTES = new byte[] {
(byte) 0x80, 0x01,
(byte) 0xFF, 0x7F,
(byte) 0x80, (byte) 0x80, 0x01,
(byte) 0x81, (byte) 0x80, 0x01,
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x7F,
0x06, 'L', 'u', 'c', 'e', 'n', 'e',
// 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK")
@ -58,12 +63,16 @@ public class TestIndexInput extends LuceneTestCase {
// null bytes
0x01, 0x00,
0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
});
};
private void checkReads(IndexInput is) throws IOException {
assertEquals(128,is.readVInt());
assertEquals(16383,is.readVInt());
assertEquals(16384,is.readVInt());
assertEquals(16385,is.readVInt());
assertEquals(Integer.MAX_VALUE, is.readVInt());
assertEquals((long) Integer.MAX_VALUE, is.readVLong());
assertEquals(Long.MAX_VALUE, is.readVLong());
assertEquals("Lucene",is.readString());
assertEquals("\u00BF",is.readString());
@ -79,4 +88,24 @@ public class TestIndexInput extends LuceneTestCase {
assertEquals("\u0000",is.readString());
assertEquals("Lu\u0000ce\u0000ne",is.readString());
}
// this test only checks BufferedIndexInput because MockIndexInput extends BufferedIndexInput
public void testBufferedIndexInputRead() throws IOException {
final IndexInput is = new MockIndexInput(READ_TEST_BYTES);
checkReads(is);
is.close();
}
// this test checks the raw IndexInput methods as it uses RAMIndexInput which extends IndexInput directly
public void testRawIndexInputRead() throws IOException {
final RAMDirectory dir = new RAMDirectory();
final IndexOutput os = dir.createOutput("foo");
os.writeBytes(READ_TEST_BYTES, READ_TEST_BYTES.length);
os.close();
final IndexInput is = dir.openInput("foo");
checkReads(is);
is.close();
dir.close();
}
}