mirror of https://github.com/apache/lucene.git
Change Similarity to use SmallFloat for norm encoding
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@349074 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
208760307f
commit
3ba5a1d9e5
|
@ -27,7 +27,7 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.IndexReader; // for javadoc
|
import org.apache.lucene.index.IndexReader; // for javadoc
|
||||||
import org.apache.lucene.index.IndexWriter; // for javadoc
|
import org.apache.lucene.index.IndexWriter; // for javadoc
|
||||||
import org.apache.lucene.document.Field; // for javadoc
|
import org.apache.lucene.document.Field; // for javadoc
|
||||||
|
import org.apache.lucene.util.SmallFloat;
|
||||||
|
|
||||||
/** Expert: Scoring API.
|
/** Expert: Scoring API.
|
||||||
* <p>Subclasses implement search scoring.
|
* <p>Subclasses implement search scoring.
|
||||||
|
@ -116,7 +116,7 @@ public abstract class Similarity implements Serializable {
|
||||||
|
|
||||||
static {
|
static {
|
||||||
for (int i = 0; i < 256; i++)
|
for (int i = 0; i < 256; i++)
|
||||||
NORM_TABLE[i] = byteToFloat((byte)i);
|
NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Decodes a normalization factor stored in an index.
|
/** Decodes a normalization factor stored in an index.
|
||||||
|
@ -170,7 +170,8 @@ public abstract class Similarity implements Serializable {
|
||||||
|
|
||||||
/** Encodes a normalization factor for storage in an index.
|
/** Encodes a normalization factor for storage in an index.
|
||||||
*
|
*
|
||||||
* <p>The encoding uses a five-bit exponent and three-bit mantissa, thus
|
* <p>The encoding uses a three-bit mantissa, a five-bit exponent, and
|
||||||
|
* the zero-exponent point at 15, thus
|
||||||
* representing values from around 7x10^9 to 2x10^-9 with about one
|
* representing values from around 7x10^9 to 2x10^-9 with about one
|
||||||
* significant decimal digit of accuracy. Zero is also represented.
|
* significant decimal digit of accuracy. Zero is also represented.
|
||||||
* Negative numbers are rounded up to zero. Values too large to represent
|
* Negative numbers are rounded up to zero. Values too large to represent
|
||||||
|
@ -179,44 +180,12 @@ public abstract class Similarity implements Serializable {
|
||||||
* value.
|
* value.
|
||||||
*
|
*
|
||||||
* @see Field#setBoost(float)
|
* @see Field#setBoost(float)
|
||||||
|
* @see SmallFloat
|
||||||
*/
|
*/
|
||||||
public static byte encodeNorm(float f) {
|
public static byte encodeNorm(float f) {
|
||||||
return floatToByte(f);
|
return SmallFloat.floatToByte315(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static float byteToFloat(byte b) {
|
|
||||||
if (b == 0) // zero is a special case
|
|
||||||
return 0.0f;
|
|
||||||
int mantissa = b & 7;
|
|
||||||
int exponent = (b >> 3) & 31;
|
|
||||||
int bits = ((exponent+(63-15)) << 24) | (mantissa << 21);
|
|
||||||
return Float.intBitsToFloat(bits);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static byte floatToByte(float f) {
|
|
||||||
if (f < 0.0f) // round negatives up to zero
|
|
||||||
f = 0.0f;
|
|
||||||
|
|
||||||
if (f == 0.0f) // zero is a special case
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
int bits = Float.floatToIntBits(f); // parse float into parts
|
|
||||||
int mantissa = (bits & 0xffffff) >> 21;
|
|
||||||
int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
|
|
||||||
|
|
||||||
if (exponent > 31) { // overflow: use max value
|
|
||||||
exponent = 31;
|
|
||||||
mantissa = 7;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (exponent < 0) { // underflow: use min value
|
|
||||||
exponent = 0;
|
|
||||||
mantissa = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (byte)((exponent << 3) | mantissa); // pack into a byte
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** Computes a score factor based on a term or phrase's frequency in a
|
/** Computes a score factor based on a term or phrase's frequency in a
|
||||||
* document. This value is multiplied by the {@link #idf(Term, Searcher)}
|
* document. This value is multiplied by the {@link #idf(Term, Searcher)}
|
||||||
|
|
|
@ -76,7 +76,6 @@ public class TestSmallFloat extends TestCase {
|
||||||
|
|
||||||
public void testFloatToByte() {
|
public void testFloatToByte() {
|
||||||
Random rand = new Random(0);
|
Random rand = new Random(0);
|
||||||
rand.nextFloat();
|
|
||||||
// up iterations for more exhaustive test after changing something
|
// up iterations for more exhaustive test after changing something
|
||||||
for (int i=0; i<100000; i++) {
|
for (int i=0; i<100000; i++) {
|
||||||
float f = Float.intBitsToFloat(rand.nextInt());
|
float f = Float.intBitsToFloat(rand.nextInt());
|
||||||
|
@ -93,7 +92,23 @@ public class TestSmallFloat extends TestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***
|
||||||
|
// Do an exhaustive test of all possible floating point values
|
||||||
|
// for the 315 float against the original norm encoding in Similarity.
|
||||||
|
// Takes 75 seconds on my Pentium4 3GHz, with Java5 -server
|
||||||
|
public void testAllFloats() {
|
||||||
|
for(int i = Integer.MIN_VALUE;;i++) {
|
||||||
|
float f = Float.intBitsToFloat(i);
|
||||||
|
if (f==f) { // skip non-numbers
|
||||||
|
byte b1 = orig_floatToByte(f);
|
||||||
|
byte b2 = SmallFloat.floatToByte315(f);
|
||||||
|
if (b1!=b2) {
|
||||||
|
TestCase.fail("Failed floatToByte315 for float " + f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i==Integer.MAX_VALUE) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
***/
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue