mirror of https://github.com/apache/lucene.git
Change Similarity to use SmallFloat for norm encoding
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@349074 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
208760307f
commit
3ba5a1d9e5
|
@ -27,7 +27,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.IndexReader; // for javadoc
|
||||
import org.apache.lucene.index.IndexWriter; // for javadoc
|
||||
import org.apache.lucene.document.Field; // for javadoc
|
||||
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
|
||||
/** Expert: Scoring API.
|
||||
* <p>Subclasses implement search scoring.
|
||||
|
@ -116,7 +116,7 @@ public abstract class Similarity implements Serializable {
|
|||
|
||||
static {
|
||||
for (int i = 0; i < 256; i++)
|
||||
NORM_TABLE[i] = byteToFloat((byte)i);
|
||||
NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
|
||||
}
|
||||
|
||||
/** Decodes a normalization factor stored in an index.
|
||||
|
@ -170,7 +170,8 @@ public abstract class Similarity implements Serializable {
|
|||
|
||||
/** Encodes a normalization factor for storage in an index.
|
||||
*
|
||||
* <p>The encoding uses a five-bit exponent and three-bit mantissa, thus
|
||||
* <p>The encoding uses a three-bit mantissa, a five-bit exponent, and
|
||||
* the zero-exponent point at 15, thus
|
||||
* representing values from around 7x10^9 to 2x10^-9 with about one
|
||||
* significant decimal digit of accuracy. Zero is also represented.
|
||||
* Negative numbers are rounded up to zero. Values too large to represent
|
||||
|
@ -179,42 +180,10 @@ public abstract class Similarity implements Serializable {
|
|||
* value.
|
||||
*
|
||||
* @see Field#setBoost(float)
|
||||
* @see SmallFloat
|
||||
*/
|
||||
public static byte encodeNorm(float f) {
|
||||
return floatToByte(f);
|
||||
}
|
||||
|
||||
private static float byteToFloat(byte b) {
|
||||
if (b == 0) // zero is a special case
|
||||
return 0.0f;
|
||||
int mantissa = b & 7;
|
||||
int exponent = (b >> 3) & 31;
|
||||
int bits = ((exponent+(63-15)) << 24) | (mantissa << 21);
|
||||
return Float.intBitsToFloat(bits);
|
||||
}
|
||||
|
||||
private static byte floatToByte(float f) {
|
||||
if (f < 0.0f) // round negatives up to zero
|
||||
f = 0.0f;
|
||||
|
||||
if (f == 0.0f) // zero is a special case
|
||||
return 0;
|
||||
|
||||
int bits = Float.floatToIntBits(f); // parse float into parts
|
||||
int mantissa = (bits & 0xffffff) >> 21;
|
||||
int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
|
||||
|
||||
if (exponent > 31) { // overflow: use max value
|
||||
exponent = 31;
|
||||
mantissa = 7;
|
||||
}
|
||||
|
||||
if (exponent < 0) { // underflow: use min value
|
||||
exponent = 0;
|
||||
mantissa = 1;
|
||||
}
|
||||
|
||||
return (byte)((exponent << 3) | mantissa); // pack into a byte
|
||||
return SmallFloat.floatToByte315(f);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -76,7 +76,6 @@ public class TestSmallFloat extends TestCase {
|
|||
|
||||
public void testFloatToByte() {
|
||||
Random rand = new Random(0);
|
||||
rand.nextFloat();
|
||||
// up iterations for more exhaustive test after changing something
|
||||
for (int i=0; i<100000; i++) {
|
||||
float f = Float.intBitsToFloat(rand.nextInt());
|
||||
|
@ -93,7 +92,23 @@ public class TestSmallFloat extends TestCase {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***
|
||||
// Do an exhaustive test of all possible floating point values
|
||||
// for the 315 float against the original norm encoding in Similarity.
|
||||
// Takes 75 seconds on my Pentium4 3GHz, with Java5 -server
|
||||
public void testAllFloats() {
|
||||
for(int i = Integer.MIN_VALUE;;i++) {
|
||||
float f = Float.intBitsToFloat(i);
|
||||
if (f==f) { // skip non-numbers
|
||||
byte b1 = orig_floatToByte(f);
|
||||
byte b2 = SmallFloat.floatToByte315(f);
|
||||
if (b1!=b2) {
|
||||
TestCase.fail("Failed floatToByte315 for float " + f);
|
||||
}
|
||||
}
|
||||
if (i==Integer.MAX_VALUE) break;
|
||||
}
|
||||
}
|
||||
***/
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue