mirror of https://github.com/apache/lucene.git
LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if the underlying readByte() is inlined (which happens e.g. in MMapDirectory). The loop was unwinded which makes the hotspot bug disappear.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1083245 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
978aaab9cf
commit
89c6480845
|
@ -761,6 +761,11 @@ Bug fixes
|
|||
been rounded down to 0 instead of being rounded up to the smallest
|
||||
positive number. (yonik)
|
||||
|
||||
* LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if
|
||||
the underlying readByte() is inlined (which happens e.g. in MMapDirectory).
|
||||
The loop was unwinded which makes the hotspot bug disappear.
|
||||
(Uwe Schindler, Robert Muir, Mike McCandless)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-2128: Parallelized fetching document frequencies during weight
|
||||
|
|
|
@ -80,6 +80,9 @@ public abstract class DataInput implements Cloneable {
|
|||
* @see DataOutput#writeVInt(int)
|
||||
*/
|
||||
public int readVInt() throws IOException {
|
||||
/* This is the original code of this method,
|
||||
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
|
||||
* readByte() is inlined. So the loop was unwinded!
|
||||
byte b = readByte();
|
||||
int i = b & 0x7F;
|
||||
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
|
||||
|
@ -87,6 +90,22 @@ public abstract class DataInput implements Cloneable {
|
|||
i |= (b & 0x7F) << shift;
|
||||
}
|
||||
return i;
|
||||
*/
|
||||
byte b = readByte();
|
||||
int i = b & 0x7F;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7F) << 7;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7F) << 14;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7F) << 21;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
assert (b & 0x80) == 0;
|
||||
return i | ((b & 0x7F) << 28);
|
||||
}
|
||||
|
||||
/** Reads eight bytes and returns a long.
|
||||
|
@ -100,6 +119,9 @@ public abstract class DataInput implements Cloneable {
|
|||
* nine bytes. Smaller values take fewer bytes. Negative numbers are not
|
||||
* supported. */
|
||||
public long readVLong() throws IOException {
|
||||
/* This is the original code of this method,
|
||||
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
|
||||
* readByte() is inlined. So the loop was unwinded!
|
||||
byte b = readByte();
|
||||
long i = b & 0x7F;
|
||||
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
|
||||
|
@ -107,6 +129,34 @@ public abstract class DataInput implements Cloneable {
|
|||
i |= (b & 0x7FL) << shift;
|
||||
}
|
||||
return i;
|
||||
*/
|
||||
byte b = readByte();
|
||||
long i = b & 0x7FL;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7FL) << 7;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7FL) << 14;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7FL) << 21;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7FL) << 28;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7FL) << 35;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7FL) << 42;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
i |= (b & 0x7FL) << 49;
|
||||
if ((b & 0x80) == 0) return i;
|
||||
b = readByte();
|
||||
assert (b & 0x80) == 0;
|
||||
return i | ((b & 0x7FL) << 56);
|
||||
}
|
||||
|
||||
/** Reads a string.
|
||||
|
|
|
@ -19,51 +19,60 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class TestIndexInput extends LuceneTestCase {
|
||||
public void testRead() throws IOException {
|
||||
IndexInput is = new MockIndexInput(new byte[] {
|
||||
(byte) 0x80, 0x01,
|
||||
(byte) 0xFF, 0x7F,
|
||||
(byte) 0x80, (byte) 0x80, 0x01,
|
||||
(byte) 0x81, (byte) 0x80, 0x01,
|
||||
0x06, 'L', 'u', 'c', 'e', 'n', 'e',
|
||||
|
||||
// 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK")
|
||||
0x02, (byte) 0xC2, (byte) 0xBF,
|
||||
0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF,
|
||||
'c', 'e', (byte) 0xC2, (byte) 0xBF,
|
||||
'n', 'e',
|
||||
static final byte[] READ_TEST_BYTES = new byte[] {
|
||||
(byte) 0x80, 0x01,
|
||||
(byte) 0xFF, 0x7F,
|
||||
(byte) 0x80, (byte) 0x80, 0x01,
|
||||
(byte) 0x81, (byte) 0x80, 0x01,
|
||||
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
|
||||
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
|
||||
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x7F,
|
||||
0x06, 'L', 'u', 'c', 'e', 'n', 'e',
|
||||
|
||||
// 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES")
|
||||
0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
|
||||
0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
|
||||
'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
|
||||
'n', 'e',
|
||||
// 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK")
|
||||
0x02, (byte) 0xC2, (byte) 0xBF,
|
||||
0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF,
|
||||
'c', 'e', (byte) 0xC2, (byte) 0xBF,
|
||||
'n', 'e',
|
||||
|
||||
// surrogate pairs
|
||||
// (U+1D11E "MUSICAL SYMBOL G CLEF")
|
||||
// (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
|
||||
0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
|
||||
0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
|
||||
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
|
||||
0x0E, 'L', 'u',
|
||||
(byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
|
||||
'c', 'e',
|
||||
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
|
||||
'n', 'e',
|
||||
// 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES")
|
||||
0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
|
||||
0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
|
||||
'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
|
||||
'n', 'e',
|
||||
|
||||
// null bytes
|
||||
0x01, 0x00,
|
||||
0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
|
||||
});
|
||||
|
||||
// surrogate pairs
|
||||
// (U+1D11E "MUSICAL SYMBOL G CLEF")
|
||||
// (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
|
||||
0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
|
||||
0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
|
||||
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
|
||||
0x0E, 'L', 'u',
|
||||
(byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
|
||||
'c', 'e',
|
||||
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
|
||||
'n', 'e',
|
||||
|
||||
// null bytes
|
||||
0x01, 0x00,
|
||||
0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
|
||||
};
|
||||
|
||||
private void checkReads(IndexInput is) throws IOException {
|
||||
assertEquals(128,is.readVInt());
|
||||
assertEquals(16383,is.readVInt());
|
||||
assertEquals(16384,is.readVInt());
|
||||
assertEquals(16385,is.readVInt());
|
||||
assertEquals(Integer.MAX_VALUE, is.readVInt());
|
||||
assertEquals((long) Integer.MAX_VALUE, is.readVLong());
|
||||
assertEquals(Long.MAX_VALUE, is.readVLong());
|
||||
assertEquals("Lucene",is.readString());
|
||||
|
||||
assertEquals("\u00BF",is.readString());
|
||||
|
@ -79,4 +88,24 @@ public class TestIndexInput extends LuceneTestCase {
|
|||
assertEquals("\u0000",is.readString());
|
||||
assertEquals("Lu\u0000ce\u0000ne",is.readString());
|
||||
}
|
||||
|
||||
// this test only checks BufferedIndexInput because MockIndexInput extends BufferedIndexInput
|
||||
public void testBufferedIndexInputRead() throws IOException {
|
||||
final IndexInput is = new MockIndexInput(READ_TEST_BYTES);
|
||||
checkReads(is);
|
||||
is.close();
|
||||
}
|
||||
|
||||
// this test checks the raw IndexInput methods as it uses RAMIndexInput which extends IndexInput directly
|
||||
public void testRawIndexInputRead() throws IOException {
|
||||
final RAMDirectory dir = new RAMDirectory();
|
||||
final IndexOutput os = dir.createOutput("foo");
|
||||
os.writeBytes(READ_TEST_BYTES, READ_TEST_BYTES.length);
|
||||
os.close();
|
||||
final IndexInput is = dir.openInput("foo");
|
||||
checkReads(is);
|
||||
is.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue