Change Similarity to use SmallFloat for norm encoding

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@349074 13f79535-47bb-0310-9956-ffa450edef68
2005-11-26 04:17:08 +00:00 · 2005-11-26 04:17:08 +00:00 · 3ba5a1d9e5
parent 208760307f
commit 3ba5a1d9e5
2 changed files with 24 additions and 40 deletions
--- a/src/java/org/apache/lucene/search/Similarity.java
+++ b/src/java/org/apache/lucene/search/Similarity.java
@ -27,7 +27,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexReader;       // for javadoc
 import org.apache.lucene.index.IndexWriter;       // for javadoc
 import org.apache.lucene.document.Field;          // for javadoc
-
+import org.apache.lucene.util.SmallFloat;
 /** Expert: Scoring API.
 * <p>Subclasses implement search scoring.
@ -116,7 +116,7 @@ public abstract class Similarity implements Serializable {
  static {
    for (int i = 0; i < 256; i++)
-      NORM_TABLE[i] = byteToFloat((byte)i);
+      NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
  }
  /** Decodes a normalization factor stored in an index.
@ -170,7 +170,8 @@ public abstract class Similarity implements Serializable {
  /** Encodes a normalization factor for storage in an index.
   *
-   * <p>The encoding uses a five-bit exponent and three-bit mantissa, thus
+   * <p>The encoding uses a three-bit mantissa, a five-bit exponent, and
   * the zero-exponent point at 15, thus
   * representing values from around 7x10^9 to 2x10^-9 with about one
   * significant decimal digit of accuracy.  Zero is also represented.
   * Negative numbers are rounded up to zero.  Values too large to represent
@ -179,42 +180,10 @@ public abstract class Similarity implements Serializable {
   * value.
   *
   * @see Field#setBoost(float)
   * @see SmallFloat
   */
  public static byte encodeNorm(float f) {
-    return floatToByte(f);
+    return SmallFloat.floatToByte315(f);
  }
  private static float byteToFloat(byte b) {
    if (b == 0)                                   // zero is a special case
      return 0.0f;
    int mantissa = b & 7;
    int exponent = (b >> 3) & 31;
    int bits = ((exponent+(63-15)) << 24) | (mantissa << 21);
    return Float.intBitsToFloat(bits);
  }
  private static byte floatToByte(float f) {
    if (f < 0.0f)                                 // round negatives up to zero
      f = 0.0f;
    if (f == 0.0f)                                // zero is a special case
      return 0;
    int bits = Float.floatToIntBits(f);           // parse float into parts
    int mantissa = (bits & 0xffffff) >> 21;
    int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
    if (exponent > 31) {                          // overflow: use max value
      exponent = 31;
      mantissa = 7;
    }
    if (exponent < 0) {                           // underflow: use min value
      exponent = 0;
      mantissa = 1;
    }
    return (byte)((exponent << 3) | mantissa);    // pack into a byte
  }
--- a/src/test/org/apache/lucene/util/TestSmallFloat.java
+++ b/src/test/org/apache/lucene/util/TestSmallFloat.java
@ -76,7 +76,6 @@ public class TestSmallFloat extends TestCase {
  public void testFloatToByte() {
    Random rand = new Random(0);
    rand.nextFloat();
    // up iterations for more exhaustive test after changing something
    for (int i=0; i<100000; i++) {
      float f = Float.intBitsToFloat(rand.nextInt());
@ -93,7 +92,23 @@ public class TestSmallFloat extends TestCase {
    }
  }
-
+  /***
-
+  // Do an exhaustive test of all possible floating point values
  // for the 315 float against the original norm encoding in Similarity.
  // Takes 75 seconds on my Pentium4 3GHz, with Java5 -server
  public void testAllFloats() {
    for(int i = Integer.MIN_VALUE;;i++) {
      float f = Float.intBitsToFloat(i);
      if (f==f) { // skip non-numbers
        byte b1 = orig_floatToByte(f);
        byte b2 = SmallFloat.floatToByte315(f);
        if (b1!=b2) {
          TestCase.fail("Failed floatToByte315 for float " + f);
        }
      }
      if (i==Integer.MAX_VALUE) break;
    }
  }
  ***/
 }