diff --git a/src/java/org/apache/lucene/search/Similarity.java b/src/java/org/apache/lucene/search/Similarity.java index f3e8af09d34..4674dd2da0f 100644 --- a/src/java/org/apache/lucene/search/Similarity.java +++ b/src/java/org/apache/lucene/search/Similarity.java @@ -27,7 +27,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; // for javadoc import org.apache.lucene.index.IndexWriter; // for javadoc import org.apache.lucene.document.Field; // for javadoc - +import org.apache.lucene.util.SmallFloat; /** Expert: Scoring API. *
Subclasses implement search scoring. @@ -116,7 +116,7 @@ public abstract class Similarity implements Serializable { static { for (int i = 0; i < 256; i++) - NORM_TABLE[i] = byteToFloat((byte)i); + NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i); } /** Decodes a normalization factor stored in an index. @@ -170,7 +170,8 @@ public abstract class Similarity implements Serializable { /** Encodes a normalization factor for storage in an index. * - *
The encoding uses a five-bit exponent and three-bit mantissa, thus + *
The encoding uses a three-bit mantissa, a five-bit exponent, and + * the zero-exponent point at 15, thus * representing values from around 7x10^9 to 2x10^-9 with about one * significant decimal digit of accuracy. Zero is also represented. * Negative numbers are rounded up to zero. Values too large to represent @@ -179,44 +180,12 @@ public abstract class Similarity implements Serializable { * value. * * @see Field#setBoost(float) + * @see SmallFloat */ public static byte encodeNorm(float f) { - return floatToByte(f); + return SmallFloat.floatToByte315(f); } - private static float byteToFloat(byte b) { - if (b == 0) // zero is a special case - return 0.0f; - int mantissa = b & 7; - int exponent = (b >> 3) & 31; - int bits = ((exponent+(63-15)) << 24) | (mantissa << 21); - return Float.intBitsToFloat(bits); - } - - private static byte floatToByte(float f) { - if (f < 0.0f) // round negatives up to zero - f = 0.0f; - - if (f == 0.0f) // zero is a special case - return 0; - - int bits = Float.floatToIntBits(f); // parse float into parts - int mantissa = (bits & 0xffffff) >> 21; - int exponent = (((bits >> 24) & 0x7f) - 63) + 15; - - if (exponent > 31) { // overflow: use max value - exponent = 31; - mantissa = 7; - } - - if (exponent < 0) { // underflow: use min value - exponent = 0; - mantissa = 1; - } - - return (byte)((exponent << 3) | mantissa); // pack into a byte - } - /** Computes a score factor based on a term or phrase's frequency in a * document. This value is multiplied by the {@link #idf(Term, Searcher)} diff --git a/src/test/org/apache/lucene/util/TestSmallFloat.java b/src/test/org/apache/lucene/util/TestSmallFloat.java index 00986adb740..58c49b49df5 100644 --- a/src/test/org/apache/lucene/util/TestSmallFloat.java +++ b/src/test/org/apache/lucene/util/TestSmallFloat.java @@ -76,7 +76,6 @@ public class TestSmallFloat extends TestCase { public void testFloatToByte() { Random rand = new Random(0); - rand.nextFloat(); // up iterations for more exhaustive test after changing something for (int i=0; i<100000; i++) { float f = Float.intBitsToFloat(rand.nextInt()); @@ -93,7 +92,23 @@ public class TestSmallFloat extends TestCase { } } - - + /*** + // Do an exhaustive test of all possible floating point values + // for the 315 float against the original norm encoding in Similarity. + // Takes 75 seconds on my Pentium4 3GHz, with Java5 -server + public void testAllFloats() { + for(int i = Integer.MIN_VALUE;;i++) { + float f = Float.intBitsToFloat(i); + if (f==f) { // skip non-numbers + byte b1 = orig_floatToByte(f); + byte b2 = SmallFloat.floatToByte315(f); + if (b1!=b2) { + TestCase.fail("Failed floatToByte315 for float " + f); + } + } + if (i==Integer.MAX_VALUE) break; + } + } + ***/ }