LUCENE-2937: fix floatToByte underflow detection

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1073850 13f79535-47bb-0310-9956-ffa450edef68
2025-02-06 01:58:44 +00:00 · 2011-02-23 17:55:54 +00:00 · 2011-02-23 17:55:54 +00:00 · 0045e0f4ea
commit 0045e0f4ea
parent d0a8a1fcb8
3 changed files with 47 additions and 7 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -739,6 +739,13 @@ Bug fixes
 * LUCENE-2891: IndexWriterConfig did not accept -1 in setReaderTermIndexDivisor,
  which can be used to prevent loading the terms index into memory. (Shai Erera)

+* LUCENE-2937: Encoding a float into a byte (e.g. encoding field norms during
+  indexing) had an underflow detection bug that caused floatToByte(f)==0 where
+  f was greater than 0, but slightly less than byteToFloat(1).  This meant that
+  certain very small field norms (index_boost * length_norm) could have
+  been rounded down to 0 instead of being rounded up to the smallest
+  positive number.  (yonik)
+
 New features

 * LUCENE-2128: Parallelized fetching document frequencies during weight
--- a/lucene/src/java/org/apache/lucene/util/SmallFloat.java
+++ b/lucene/src/java/org/apache/lucene/util/SmallFloat.java
@ -39,7 +39,7 @@ public class SmallFloat {
    int fzero = (63-zeroExp)<<numMantissaBits;
    int bits = Float.floatToRawIntBits(f);
    int smallfloat = bits >> (24-numMantissaBits);
-    if (smallfloat < fzero) {
+    if (smallfloat <= fzero) {
      return (bits<=0) ?
        (byte)0   // negative numbers and zero both map to 0 byte
       :(byte)1;  // underflow is mapped to smallest non-zero number.
@ -75,7 +75,7 @@ public class SmallFloat {
  public static byte floatToByte315(float f) {
    int bits = Float.floatToRawIntBits(f);
    int smallfloat = bits >> (24-3);
-    if (smallfloat < (63-15)<<3) {
+    if (smallfloat <= ((63-15)<<3)) {
      return (bits<=0) ? (byte)0 : (byte)1;
    }
    if (smallfloat >= ((63-15)<<3) + 0x100) {
@ -103,7 +103,7 @@ public class SmallFloat {
  public static byte floatToByte52(float f) {
    int bits = Float.floatToRawIntBits(f);
    int smallfloat = bits >> (24-5);
-    if (smallfloat < (63-2)<<5) {
+    if (smallfloat <= (63-2)<<5) {
      return (bits<=0) ? (byte)0 : (byte)1;
    }
    if (smallfloat >= ((63-2)<<5) + 0x100) {
--- a/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java
+++ b/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java
@ -28,8 +28,8 @@ public class TestSmallFloat extends LuceneTestCase {
    return Float.intBitsToFloat(bits);
  }

-  // original lucene floatToByte
-  static byte orig_floatToByte(float f) {
+  // original lucene floatToByte (since lucene 1.3)
+  static byte orig_floatToByte_v13(float f) {
    if (f < 0.0f)                                 // round negatives up to zero
      f = 0.0f;

@ -53,7 +53,40 @@ public class TestSmallFloat extends LuceneTestCase {
    return (byte)((exponent << 3) | mantissa);    // pack into a byte
  }

+  // This is the original lucene floatToBytes (from v1.3)
+  // except with the underflow detection bug fixed for values like 5.8123817E-10f
+  static byte orig_floatToByte(float f) {
+    if (f < 0.0f)                                 // round negatives up to zero
+      f = 0.0f;
+
+    if (f == 0.0f)                                // zero is a special case
+      return 0;
+
+    int bits = Float.floatToIntBits(f);           // parse float into parts
+    int mantissa = (bits & 0xffffff) >> 21;
+    int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
+
+    if (exponent > 31) {                          // overflow: use max value
+      exponent = 31;
+      mantissa = 7;
+    }
+
+    if (exponent < 0 || exponent == 0 && mantissa == 0) { // underflow: use min value
+      exponent = 0;
+      mantissa = 1;
+    }
+
+    return (byte)((exponent << 3) | mantissa);    // pack into a byte
+  }
+
+
  public void testByteToFloat() {
+    assertEquals(0, orig_floatToByte_v13(5.8123817E-10f));       // verify the old bug (see LUCENE-2937)
+    assertEquals(1, orig_floatToByte(5.8123817E-10f));           // verify it's fixed in this test code
+    assertEquals(1, SmallFloat.floatToByte315(5.8123817E-10f));  // verify it's fixed
+
+    assertEquals(1, orig_floatToByte(Float.MIN_VALUE));
+
    for (int i=0; i<256; i++) {
      float f1 = orig_byteToFloat((byte)i);
      float f2 = SmallFloat.byteToFloat((byte)i, 3,15);
@ -95,8 +128,8 @@ public class TestSmallFloat extends LuceneTestCase {
      if (f==f) { // skip non-numbers
        byte b1 = orig_floatToByte(f);
        byte b2 = SmallFloat.floatToByte315(f);
-        if (b1!=b2) {
-          TestCase.fail("Failed floatToByte315 for float " + f);
+        if (b1!=b2 || b2==0 && f>0) {
+          fail("Failed floatToByte315 for float " + f + " source bits="+Integer.toHexString(i) + " float raw bits=" + Integer.toHexString(Float.floatToRawIntBits(i)));
        }
      }
      if (i==Integer.MAX_VALUE) break;