LUCENE-2937: fix floatToByte underflow detection

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1073850 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2011-02-23 17:55:54 +00:00
parent d0a8a1fcb8
commit 0045e0f4ea
3 changed files with 47 additions and 7 deletions

View File

@ -739,6 +739,13 @@ Bug fixes
* LUCENE-2891: IndexWriterConfig did not accept -1 in setReaderTermIndexDivisor,
which can be used to prevent loading the terms index into memory. (Shai Erera)
* LUCENE-2937: Encoding a float into a byte (e.g. encoding field norms during
indexing) had an underflow detection bug that caused floatToByte(f)==0 where
f was greater than 0, but slightly less than byteToFloat(1). This meant that
certain very small field norms (index_boost * length_norm) could have
been rounded down to 0 instead of being rounded up to the smallest
positive number. (yonik)
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight

View File

@ -39,7 +39,7 @@ public class SmallFloat {
int fzero = (63-zeroExp)<<numMantissaBits;
int bits = Float.floatToRawIntBits(f);
int smallfloat = bits >> (24-numMantissaBits);
if (smallfloat < fzero) {
if (smallfloat <= fzero) {
return (bits<=0) ?
(byte)0 // negative numbers and zero both map to 0 byte
:(byte)1; // underflow is mapped to smallest non-zero number.
@ -75,7 +75,7 @@ public class SmallFloat {
public static byte floatToByte315(float f) {
int bits = Float.floatToRawIntBits(f);
int smallfloat = bits >> (24-3);
if (smallfloat < (63-15)<<3) {
if (smallfloat <= ((63-15)<<3)) {
return (bits<=0) ? (byte)0 : (byte)1;
}
if (smallfloat >= ((63-15)<<3) + 0x100) {
@ -103,7 +103,7 @@ public class SmallFloat {
public static byte floatToByte52(float f) {
int bits = Float.floatToRawIntBits(f);
int smallfloat = bits >> (24-5);
if (smallfloat < (63-2)<<5) {
if (smallfloat <= (63-2)<<5) {
return (bits<=0) ? (byte)0 : (byte)1;
}
if (smallfloat >= ((63-2)<<5) + 0x100) {

View File

@ -28,8 +28,8 @@ public class TestSmallFloat extends LuceneTestCase {
return Float.intBitsToFloat(bits);
}
// original lucene floatToByte
static byte orig_floatToByte(float f) {
// original lucene floatToByte (since lucene 1.3)
static byte orig_floatToByte_v13(float f) {
if (f < 0.0f) // round negatives up to zero
f = 0.0f;
@ -53,7 +53,40 @@ public class TestSmallFloat extends LuceneTestCase {
return (byte)((exponent << 3) | mantissa); // pack into a byte
}
// This is the original lucene floatToBytes (from v1.3)
// except with the underflow detection bug fixed for values like 5.8123817E-10f
static byte orig_floatToByte(float f) {
if (f < 0.0f) // round negatives up to zero
f = 0.0f;
if (f == 0.0f) // zero is a special case
return 0;
int bits = Float.floatToIntBits(f); // parse float into parts
int mantissa = (bits & 0xffffff) >> 21;
int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
if (exponent > 31) { // overflow: use max value
exponent = 31;
mantissa = 7;
}
if (exponent < 0 || exponent == 0 && mantissa == 0) { // underflow: use min value
exponent = 0;
mantissa = 1;
}
return (byte)((exponent << 3) | mantissa); // pack into a byte
}
public void testByteToFloat() {
assertEquals(0, orig_floatToByte_v13(5.8123817E-10f)); // verify the old bug (see LUCENE-2937)
assertEquals(1, orig_floatToByte(5.8123817E-10f)); // verify it's fixed in this test code
assertEquals(1, SmallFloat.floatToByte315(5.8123817E-10f)); // verify it's fixed
assertEquals(1, orig_floatToByte(Float.MIN_VALUE));
for (int i=0; i<256; i++) {
float f1 = orig_byteToFloat((byte)i);
float f2 = SmallFloat.byteToFloat((byte)i, 3,15);
@ -95,8 +128,8 @@ public class TestSmallFloat extends LuceneTestCase {
if (f==f) { // skip non-numbers
byte b1 = orig_floatToByte(f);
byte b2 = SmallFloat.floatToByte315(f);
if (b1!=b2) {
TestCase.fail("Failed floatToByte315 for float " + f);
if (b1!=b2 || b2==0 && f>0) {
fail("Failed floatToByte315 for float " + f + " source bits="+Integer.toHexString(i) + " float raw bits=" + Integer.toHexString(Float.floatToRawIntBits(i)));
}
}
if (i==Integer.MAX_VALUE) break;