Change Similarity to use SmallFloat for norm encoding

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@349074 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2005-11-26 04:17:08 +00:00
parent 208760307f
commit 3ba5a1d9e5
2 changed files with 24 additions and 40 deletions

View File

@ -27,7 +27,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader; // for javadoc import org.apache.lucene.index.IndexReader; // for javadoc
import org.apache.lucene.index.IndexWriter; // for javadoc import org.apache.lucene.index.IndexWriter; // for javadoc
import org.apache.lucene.document.Field; // for javadoc import org.apache.lucene.document.Field; // for javadoc
import org.apache.lucene.util.SmallFloat;
/** Expert: Scoring API. /** Expert: Scoring API.
* <p>Subclasses implement search scoring. * <p>Subclasses implement search scoring.
@ -116,7 +116,7 @@ public abstract class Similarity implements Serializable {
static { static {
for (int i = 0; i < 256; i++) for (int i = 0; i < 256; i++)
NORM_TABLE[i] = byteToFloat((byte)i); NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
} }
/** Decodes a normalization factor stored in an index. /** Decodes a normalization factor stored in an index.
@ -170,7 +170,8 @@ public abstract class Similarity implements Serializable {
/** Encodes a normalization factor for storage in an index. /** Encodes a normalization factor for storage in an index.
* *
* <p>The encoding uses a five-bit exponent and three-bit mantissa, thus * <p>The encoding uses a three-bit mantissa, a five-bit exponent, and
* the zero-exponent point at 15, thus
* representing values from around 7x10^9 to 2x10^-9 with about one * representing values from around 7x10^9 to 2x10^-9 with about one
* significant decimal digit of accuracy. Zero is also represented. * significant decimal digit of accuracy. Zero is also represented.
* Negative numbers are rounded up to zero. Values too large to represent * Negative numbers are rounded up to zero. Values too large to represent
@ -179,44 +180,12 @@ public abstract class Similarity implements Serializable {
* value. * value.
* *
* @see Field#setBoost(float) * @see Field#setBoost(float)
* @see SmallFloat
*/ */
public static byte encodeNorm(float f) { public static byte encodeNorm(float f) {
return floatToByte(f); return SmallFloat.floatToByte315(f);
} }
private static float byteToFloat(byte b) {
if (b == 0) // zero is a special case
return 0.0f;
int mantissa = b & 7;
int exponent = (b >> 3) & 31;
int bits = ((exponent+(63-15)) << 24) | (mantissa << 21);
return Float.intBitsToFloat(bits);
}
private static byte floatToByte(float f) {
if (f < 0.0f) // round negatives up to zero
f = 0.0f;
if (f == 0.0f) // zero is a special case
return 0;
int bits = Float.floatToIntBits(f); // parse float into parts
int mantissa = (bits & 0xffffff) >> 21;
int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
if (exponent > 31) { // overflow: use max value
exponent = 31;
mantissa = 7;
}
if (exponent < 0) { // underflow: use min value
exponent = 0;
mantissa = 1;
}
return (byte)((exponent << 3) | mantissa); // pack into a byte
}
/** Computes a score factor based on a term or phrase's frequency in a /** Computes a score factor based on a term or phrase's frequency in a
* document. This value is multiplied by the {@link #idf(Term, Searcher)} * document. This value is multiplied by the {@link #idf(Term, Searcher)}

View File

@ -76,7 +76,6 @@ public class TestSmallFloat extends TestCase {
public void testFloatToByte() { public void testFloatToByte() {
Random rand = new Random(0); Random rand = new Random(0);
rand.nextFloat();
// up iterations for more exhaustive test after changing something // up iterations for more exhaustive test after changing something
for (int i=0; i<100000; i++) { for (int i=0; i<100000; i++) {
float f = Float.intBitsToFloat(rand.nextInt()); float f = Float.intBitsToFloat(rand.nextInt());
@ -93,7 +92,23 @@ public class TestSmallFloat extends TestCase {
} }
} }
/***
// Do an exhaustive test of all possible floating point values
// for the 315 float against the original norm encoding in Similarity.
// Takes 75 seconds on my Pentium4 3GHz, with Java5 -server
public void testAllFloats() {
for(int i = Integer.MIN_VALUE;;i++) {
float f = Float.intBitsToFloat(i);
if (f==f) { // skip non-numbers
byte b1 = orig_floatToByte(f);
byte b2 = SmallFloat.floatToByte315(f);
if (b1!=b2) {
TestCase.fail("Failed floatToByte315 for float " + f);
}
}
if (i==Integer.MAX_VALUE) break;
}
}
***/
} }