LUCENE-1260: allow Similarity instance to customize how norms are encoded/decoded

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@883852 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-11-24 20:26:07 +00:00
parent 55046c8d74
commit 7e3700e1c8
29 changed files with 92 additions and 69 deletions

View File

@ -11,6 +11,12 @@ API Changes
* LUCENE-2076: Rename FSDirectory.getFile -> getDirectory. (George * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory. (George
Aroush via Mike McCandless) Aroush via Mike McCandless)
* LUCENE-1260: Change norm encode (float->byte) and decode
(byte->float) to be instance methods not static methods. This way a
custom Similarity can alter how norms are encoded, though they must
still be encoded as a single byte (Johan Kindgren via Mike
McCandless)
Bug fixes Bug fixes
* LUCENE-2092: BooleanQuery was ignoring disableCoord in its hashCode * LUCENE-2092: BooleanQuery was ignoring disableCoord in its hashCode

View File

@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectorOffsetInfo; import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
@ -202,9 +201,9 @@ public class InstantiatedIndexWriter implements Closeable {
byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field); byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field);
if (oldNorms != null) { if (oldNorms != null) {
System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length); System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length);
Arrays.fill(norms, oldNorms.length, norms.length, DefaultSimilarity.encodeNorm(1.0f)); Arrays.fill(norms, oldNorms.length, norms.length, similarity.encodeNormValue(1.0f));
} else { } else {
Arrays.fill(norms, 0, norms.length, DefaultSimilarity.encodeNorm(1.0f)); Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f));
} }
normsByFieldNameAndDocumentNumber.put(field, norms); normsByFieldNameAndDocumentNumber.put(field, norms);
fieldNames.remove(field); fieldNames.remove(field);
@ -212,7 +211,7 @@ public class InstantiatedIndexWriter implements Closeable {
for (String field : fieldNames) { for (String field : fieldNames) {
//System.out.println(field); //System.out.println(field);
byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()]; byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
Arrays.fill(norms, 0, norms.length, DefaultSimilarity.encodeNorm(1.0f)); Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f));
normsByFieldNameAndDocumentNumber.put(field, norms); normsByFieldNameAndDocumentNumber.put(field, norms);
} }
fieldNames.clear(); fieldNames.clear();
@ -240,7 +239,7 @@ public class InstantiatedIndexWriter implements Closeable {
float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost; float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost;
norm *= document.getDocument().getBoost(); norm *= document.getDocument().getBoost();
norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength); norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = Similarity.encodeNorm(norm); normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm);
} else { } else {
System.currentTimeMillis(); System.currentTimeMillis();
} }

View File

@ -50,7 +50,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory; // for javadocs
/** /**
* High-performance single-document main memory Apache Lucene fulltext search index. * High-performance single-document main memory Apache Lucene fulltext search index.
@ -1102,7 +1102,7 @@ public class MemoryIndex implements Serializable {
float boost = info != null ? info.getBoost() : 1.0f; float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost); FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
float n = sim.computeNorm(fieldName, invertState); float n = sim.computeNorm(fieldName, invertState);
byte norm = Similarity.encodeNorm(n); byte norm = sim.encodeNormValue(n);
norms = new byte[] {norm}; norms = new byte[] {norm};
// cache it for future reuse // cache it for future reuse

View File

@ -143,7 +143,7 @@ public class FieldNormModifier {
if (sim == null) if (sim == null)
reader.setNorm(d, fieldName, Similarity.encodeNorm(1.0f)); reader.setNorm(d, fieldName, Similarity.encodeNorm(1.0f));
else else
reader.setNorm(d, fieldName, Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]))); reader.setNorm(d, fieldName, sim.encodeNormValue(sim.lengthNorm(fieldName, termCounts[d])));
} }
} }

View File

@ -44,9 +44,7 @@ public class TestFieldNormModifier extends TestCase {
public TestFieldNormModifier(String name) { public TestFieldNormModifier(String name) {
super(name); super(name);
} }
public static byte DEFAULT_NORM = Similarity.encodeNorm(1.0f);
public static int NUM_DOCS = 5; public static int NUM_DOCS = 5;
public Directory store = new RAMDirectory(); public Directory store = new RAMDirectory();

View File

@ -47,8 +47,6 @@ public class TestLengthNormModifier extends TestCase {
public TestLengthNormModifier(String name) { public TestLengthNormModifier(String name) {
super(name); super(name);
} }
public static byte DEFAULT_NORM = Similarity.encodeNorm(1.0f);
public static int NUM_DOCS = 5; public static int NUM_DOCS = 5;

View File

@ -16,7 +16,7 @@ package org.apache.lucene.document;
*/ */
import org.apache.lucene.search.PhraseQuery; // for javadocs import org.apache.lucene.search.PhraseQuery; // for javadocs
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery; // for javadocs
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.StringHelper; // for javadocs import org.apache.lucene.util.StringHelper; // for javadocs
@ -80,13 +80,13 @@ public abstract class AbstractField implements Fieldable {
* by the {@link * by the {@link
* org.apache.lucene.search.Similarity#lengthNorm(String, * org.apache.lucene.search.Similarity#lengthNorm(String,
* int)} and then * int)} and then
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the * rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow * index. One should attempt to ensure that this product does not overflow
* the range of that encoding. * the range of that encoding.
* *
* @see org.apache.lucene.document.Document#setBoost(float) * @see org.apache.lucene.document.Document#setBoost(float)
* @see org.apache.lucene.search.Similarity#computeNorm(String, org.apache.lucene.index.FieldInvertState) * @see org.apache.lucene.search.Similarity#computeNorm(String, org.apache.lucene.index.FieldInvertState)
* @see org.apache.lucene.search.Similarity#encodeNorm(float) * @see org.apache.lucene.search.Similarity#encodeNormValue(float)
*/ */
public void setBoost(float boost) { public void setBoost(float boost) {
this.boost = boost; this.boost = boost;

View File

@ -18,8 +18,8 @@ package org.apache.lucene.document;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInvertState; // for javadocs import org.apache.lucene.index.FieldInvertState; // for javadocs
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery; // for javadocs
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery; // for javadocs
import java.io.Reader; import java.io.Reader;
import java.io.Serializable; import java.io.Serializable;
@ -48,13 +48,13 @@ public interface Fieldable extends Serializable {
* FieldInvertState)} method, the boost value is multiplied * FieldInvertState)} method, the boost value is multiplied
* by the {@link * by the {@link
* org.apache.lucene.search.Similarity#lengthNorm(String, * org.apache.lucene.search.Similarity#lengthNorm(String,
* int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the * int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow * index. One should attempt to ensure that this product does not overflow
* the range of that encoding. * the range of that encoding.
* *
* @see org.apache.lucene.document.Document#setBoost(float) * @see org.apache.lucene.document.Document#setBoost(float)
* @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState) * @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState)
* @see org.apache.lucene.search.Similarity#encodeNorm(float) * @see org.apache.lucene.search.Similarity#encodeNormValue(float)
*/ */
void setBoost(float boost); void setBoost(float boost);

View File

@ -31,7 +31,7 @@ import java.util.Set;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.Lock; import org.apache.lucene.store.Lock;
@ -604,7 +604,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
ensureOpen(); ensureOpen();
byte[] bytes = normsCache.get(field); byte[] bytes = normsCache.get(field);
if (bytes==null && !hasNorms(field)) { if (bytes==null && !hasNorms(field)) {
Arrays.fill(result, offset, result.length, DefaultSimilarity.encodeNorm(1.0f)); Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f));
} else if (bytes != null) { // cache hit } else if (bytes != null) { // cache hit
System.arraycopy(bytes, 0, result, offset, maxDoc()); System.arraycopy(bytes, 0, result, offset, maxDoc());
} else { } else {

View File

@ -730,7 +730,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
* this method call will silently do nothing. * this method call will silently do nothing.
* *
* @see #norms(String) * @see #norms(String)
* @see Similarity#decodeNorm(byte) * @see Similarity#decodeNormValue(byte)
* @throws StaleReaderException if the index has changed * @throws StaleReaderException if the index has changed
* since this reader was opened * since this reader was opened
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
@ -755,7 +755,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
* document. * document.
* *
* @see #norms(String) * @see #norms(String)
* @see Similarity#decodeNorm(byte) * @see Similarity#decodeNormValue(byte)
* *
* @throws StaleReaderException if the index has changed * @throws StaleReaderException if the index has changed
* since this reader was opened * since this reader was opened
@ -768,7 +768,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
public void setNorm(int doc, String field, float value) public void setNorm(int doc, String field, float value)
throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
ensureOpen(); ensureOpen();
setNorm(doc, field, Similarity.encodeNorm(value)); setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
} }
/** Returns an enumeration of all the terms in the index. The /** Returns an enumeration of all the terms in the index. The

View File

@ -28,7 +28,7 @@ import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.DirectoryReader.MultiTermDocs; import org.apache.lucene.index.DirectoryReader.MultiTermDocs;
import org.apache.lucene.index.DirectoryReader.MultiTermEnum; import org.apache.lucene.index.DirectoryReader.MultiTermEnum;
import org.apache.lucene.index.DirectoryReader.MultiTermPositions; import org.apache.lucene.index.DirectoryReader.MultiTermPositions;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity;
/** An IndexReader which reads multiple indexes, appending /** An IndexReader which reads multiple indexes, appending
* their content. */ * their content. */
@ -316,7 +316,7 @@ public class MultiReader extends IndexReader implements Cloneable {
subReaders[i].norms(field, result, offset + starts[i]); subReaders[i].norms(field, result, offset + starts[i]);
if (bytes==null && !hasNorms(field)) { if (bytes==null && !hasNorms(field)) {
Arrays.fill(result, offset, result.length, DefaultSimilarity.encodeNorm(1.0f)); Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f));
} else if (bytes != null) { // cache hit } else if (bytes != null) { // cache hit
System.arraycopy(bytes, 0, result, offset, maxDoc()); System.arraycopy(bytes, 0, result, offset, maxDoc());
} else { } else {

View File

@ -37,7 +37,7 @@ import org.apache.lucene.search.Similarity;
final class NormsWriter extends InvertedDocEndConsumer { final class NormsWriter extends InvertedDocEndConsumer {
private static final byte defaultNorm = Similarity.encodeNorm(1.0f); private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
private FieldInfos fieldInfos; private FieldInfos fieldInfos;
@Override @Override
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {

View File

@ -71,7 +71,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
norms = ArrayUtil.grow(norms, 1+upto); norms = ArrayUtil.grow(norms, 1+upto);
} }
final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState); final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
norms[upto] = Similarity.encodeNorm(norm); norms[upto] = Similarity.getDefault().encodeNormValue(norm);
docIDs[upto] = docState.docID; docIDs[upto] = docState.docID;
upto++; upto++;
} }

View File

@ -30,7 +30,7 @@ import java.util.Set;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
@ -1022,7 +1022,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
ensureOpen(); ensureOpen();
Norm norm = norms.get(field); Norm norm = norms.get(field);
if (norm == null) { if (norm == null) {
Arrays.fill(bytes, offset, bytes.length, DefaultSimilarity.encodeNorm(1.0f)); Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f));
return; return;
} }

View File

@ -70,7 +70,7 @@ public class MatchAllDocsQuery extends Query {
@Override @Override
public float score() { public float score() {
return norms == null ? score : score * Similarity.decodeNorm(norms[docID()]); return norms == null ? score : score * getSimilarity().decodeNormValue(norms[docID()]);
} }
@Override @Override

View File

@ -238,7 +238,7 @@ public class MultiPhraseQuery extends Query {
Explanation fieldNormExpl = new Explanation(); Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.norms(field); byte[] fieldNorms = reader.norms(field);
float fieldNorm = float fieldNorm =
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f; fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl); fieldExpl.addDetail(fieldNormExpl);

View File

@ -108,7 +108,7 @@ public class PhraseQuery extends Query {
} }
private class PhraseWeight extends Weight { private class PhraseWeight extends Weight {
private Similarity similarity; private final Similarity similarity;
private float value; private float value;
private float idf; private float idf;
private float queryNorm; private float queryNorm;
@ -232,7 +232,7 @@ public class PhraseQuery extends Query {
Explanation fieldNormExpl = new Explanation(); Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.norms(field); byte[] fieldNorms = reader.norms(field);
float fieldNorm = float fieldNorm =
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f; fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl); fieldExpl.addDetail(fieldNormExpl);

View File

@ -110,7 +110,7 @@ abstract class PhraseScorer extends Scorer {
public float score() throws IOException { public float score() throws IOException {
//System.out.println("scoring " + first.doc); //System.out.println("scoring " + first.doc);
float raw = getSimilarity().tf(freq) * value; // raw score float raw = getSimilarity().tf(freq) * value; // raw score
return norms == null ? raw : raw * Similarity.decodeNorm(norms[first.doc]); // normalize return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[first.doc]); // normalize
} }
@Override @Override

View File

@ -38,7 +38,7 @@ import java.io.IOException;
* with these scores. * with these scores.
*/ */
public abstract class Scorer extends DocIdSetIterator { public abstract class Scorer extends DocIdSetIterator {
private Similarity similarity; private final Similarity similarity;
/** Constructs a Scorer. /** Constructs a Scorer.
* @param similarity The <code>Similarity</code> implementation used by this scorer. * @param similarity The <code>Similarity</code> implementation used by this scorer.

View File

@ -26,7 +26,6 @@ import org.apache.lucene.util.SmallFloat;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.Collection; import java.util.Collection;
import java.util.IdentityHashMap;
/** /**
@ -498,11 +497,11 @@ import java.util.IdentityHashMap;
* </tr> * </tr>
* </table> * </table>
* <br>&nbsp;<br> * <br>&nbsp;<br>
* However the resulted <i>norm</i> value is {@link #encodeNorm(float) encoded} as a single byte * However the resulted <i>norm</i> value is {@link #encodeNormValue(float) encoded} as a single byte
* before being stored. * before being stored.
* At search time, the norm byte value is read from the index * At search time, the norm byte value is read from the index
* {@link org.apache.lucene.store.Directory directory} and * {@link org.apache.lucene.store.Directory directory} and
* {@link #decodeNorm(byte) decoded} back to a float <i>norm</i> value. * {@link #decodeNormValue(byte) decoded} back to a float <i>norm</i> value.
* This encoding/decoding, while reducing index size, comes with the price of * This encoding/decoding, while reducing index size, comes with the price of
* precision loss - it is not guaranteed that <i>decode(encode(x)) = x</i>. * precision loss - it is not guaranteed that <i>decode(encode(x)) = x</i>.
* For instance, <i>decode(encode(0.89)) = 0.75</i>. * For instance, <i>decode(encode(0.89)) = 0.75</i>.
@ -563,16 +562,30 @@ public abstract class Similarity implements Serializable {
NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i); NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
} }
/** Decodes a normalization factor stored in an index. /**
* @see #encodeNorm(float) * Decodes a normalization factor stored in an index.
* @see #decodeNormValue(byte)
* @deprecated Use {@link #decodeNormValue} instead.
*/ */
@Deprecated
public static float decodeNorm(byte b) { public static float decodeNorm(byte b) {
return NORM_TABLE[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127 return NORM_TABLE[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127
} }
/** Returns a table for decoding normalization bytes. /** Decodes a normalization factor stored in an index.
* @see #encodeNorm(float) * @see #encodeNormValue(float)
*/ */
public float decodeNormValue(byte b) {
return NORM_TABLE[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127
}
/** Returns a table for decoding normalization bytes.
* @see #encodeNormValue(float)
* @see #decodeNormValue(byte)
*
* @deprecated Use instance methods for encoding/decoding norm values to enable customization.
*/
@Deprecated
public static float[] getNormDecoder() { public static float[] getNormDecoder() {
return NORM_TABLE; return NORM_TABLE;
} }
@ -612,7 +625,7 @@ public abstract class Similarity implements Serializable {
* <p>Note that the return values are computed under * <p>Note that the return values are computed under
* {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document)} * {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document)}
* and then stored using * and then stored using
* {@link #encodeNorm(float)}. * {@link #encodeNormValue(float)}.
* Thus they have limited precision, and documents * Thus they have limited precision, and documents
* must be re-indexed if this method is altered. * must be re-indexed if this method is altered.
* *
@ -654,6 +667,19 @@ public abstract class Similarity implements Serializable {
* @see org.apache.lucene.document.Field#setBoost(float) * @see org.apache.lucene.document.Field#setBoost(float)
* @see org.apache.lucene.util.SmallFloat * @see org.apache.lucene.util.SmallFloat
*/ */
public byte encodeNormValue(float f) {
return SmallFloat.floatToByte315(f);
}
/**
* Static accessor kept for backwards compability reason, use encodeNormValue instead.
* @param f norm-value to encode
* @return byte representing the given float
* @deprecated Use {@link #encodeNormValue} instead.
*
* @see #encodeNormValue(float)
*/
@Deprecated
public static byte encodeNorm(float f) { public static byte encodeNorm(float f) {
return SmallFloat.floatToByte315(f); return SmallFloat.floatToByte315(f);
} }

View File

@ -33,7 +33,7 @@ public class TermQuery extends Query {
private Term term; private Term term;
private class TermWeight extends Weight { private class TermWeight extends Weight {
private Similarity similarity; private final Similarity similarity;
private float value; private float value;
private float idf; private float idf;
private float queryNorm; private float queryNorm;
@ -135,7 +135,7 @@ public class TermQuery extends Query {
Explanation fieldNormExpl = new Explanation(); Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.norms(field); byte[] fieldNorms = reader.norms(field);
float fieldNorm = float fieldNorm =
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f; fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl); fieldExpl.addDetail(fieldNormExpl);

View File

@ -25,8 +25,6 @@ import org.apache.lucene.index.TermDocs;
*/ */
final class TermScorer extends Scorer { final class TermScorer extends Scorer {
private static final float[] SIM_NORM_DECODER = Similarity.getNormDecoder();
private Weight weight; private Weight weight;
private TermDocs termDocs; private TermDocs termDocs;
private byte[] norms; private byte[] norms;
@ -56,6 +54,7 @@ final class TermScorer extends Scorer {
*/ */
TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms) { TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms) {
super(similarity); super(similarity);
this.weight = weight; this.weight = weight;
this.termDocs = td; this.termDocs = td;
this.norms = norms; this.norms = norms;
@ -127,7 +126,7 @@ final class TermScorer extends Scorer {
? scoreCache[f] // cache hit ? scoreCache[f] // cache hit
: getSimilarity().tf(f)*weightValue; // cache miss : getSimilarity().tf(f)*weightValue; // cache miss
return norms == null ? raw : raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize for field
} }
/** /**

View File

@ -95,7 +95,7 @@ public class SpanScorer extends Scorer {
@Override @Override
public float score() throws IOException { public float score() throws IOException {
float raw = getSimilarity().tf(freq) * value; // raw score float raw = getSimilarity().tf(freq) * value; // raw score
return norms == null? raw : raw * Similarity.decodeNorm(norms[doc]); // normalize return norms == null? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize
} }
/** This method is no longer an official member of {@link Scorer}, /** This method is no longer an official member of {@link Scorer},

View File

@ -118,7 +118,7 @@ public class SpanWeight extends Weight {
Explanation fieldNormExpl = new Explanation(); Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.norms(field); byte[] fieldNorms = reader.norms(field);
float fieldNorm = float fieldNorm =
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f; fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl); fieldExpl.addDetail(fieldNormExpl);

View File

@ -511,7 +511,7 @@ public class TestIndexReader extends LuceneTestCase
// is open against the index: // is open against the index:
public void testWritingNorms() throws IOException public void testWritingNorms() throws IOException
{ {
String tempDir = System.getProperty("tempDir"); String tempDir = "target/test";
if (tempDir == null) if (tempDir == null)
throw new IOException("tempDir undefined, cannot run test"); throw new IOException("tempDir undefined, cannot run test");

View File

@ -17,9 +17,6 @@ package org.apache.lucene.index;
* limitations under the License. * limitations under the License.
*/ */
import java.io.File;
import java.io.IOException;
import org.apache.lucene.index.SegmentReader.Norm; import org.apache.lucene.index.SegmentReader.Norm;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.SimpleAnalyzer;
@ -28,9 +25,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.AlreadyClosedException;
/** /**
* Tests cloning multiple types of readers, modifying the deletedDocs and norms * Tests cloning multiple types of readers, modifying the deletedDocs and norms
@ -273,13 +268,13 @@ public class TestIndexReaderClone extends LuceneTestCase {
* @throws Exception * @throws Exception
*/ */
private void performDefaultTests(IndexReader r1) throws Exception { private void performDefaultTests(IndexReader r1) throws Exception {
float norm1 = Similarity.decodeNorm(r1.norms("field1")[4]); float norm1 = Similarity.getDefault().decodeNormValue(r1.norms("field1")[4]);
IndexReader pr1Clone = (IndexReader) r1.clone(); IndexReader pr1Clone = (IndexReader) r1.clone();
pr1Clone.deleteDocument(10); pr1Clone.deleteDocument(10);
pr1Clone.setNorm(4, "field1", 0.5f); pr1Clone.setNorm(4, "field1", 0.5f);
assertTrue(Similarity.decodeNorm(r1.norms("field1")[4]) == norm1); assertTrue(Similarity.getDefault().decodeNormValue(r1.norms("field1")[4]) == norm1);
assertTrue(Similarity.decodeNorm(pr1Clone.norms("field1")[4]) != norm1); assertTrue(Similarity.getDefault().decodeNormValue(pr1Clone.norms("field1")[4]) != norm1);
assertTrue(!r1.isDeleted(10)); assertTrue(!r1.isDeleted(10));
assertTrue(pr1Clone.isDeleted(10)); assertTrue(pr1Clone.isDeleted(10));
@ -426,7 +421,7 @@ public class TestIndexReaderClone extends LuceneTestCase {
TestIndexReaderReopen.createIndex(dir1, false); TestIndexReaderReopen.createIndex(dir1, false);
IndexReader orig = IndexReader.open(dir1, false); IndexReader orig = IndexReader.open(dir1, false);
orig.setNorm(1, "field1", 17.0f); orig.setNorm(1, "field1", 17.0f);
final byte encoded = Similarity.encodeNorm(17.0f); final byte encoded = Similarity.getDefault().encodeNormValue(17.0f);
assertEquals(encoded, orig.norms("field1")[1]); assertEquals(encoded, orig.norms("field1")[1]);
// the cloned segmentreader should have 2 references, 1 to itself, and 1 to // the cloned segmentreader should have 2 references, 1 to itself, and 1 to

View File

@ -216,7 +216,8 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
} }
// norm values should be different // norm values should be different
assertTrue(Similarity.decodeNorm(segmentReader3C.norms("field1")[5]) != Similarity.decodeNorm(segmentReader4C.norms("field1")[5])); assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5])
!= Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5]));
Norm reader4CCNorm = segmentReader4C.norms.get("field1"); Norm reader4CCNorm = segmentReader4C.norms.get("field1");
assertEquals(3, reader3CCNorm.bytesRef().refCount()); assertEquals(3, reader3CCNorm.bytesRef().refCount());
assertEquals(1, reader4CCNorm.bytesRef().refCount()); assertEquals(1, reader4CCNorm.bytesRef().refCount());
@ -283,7 +284,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
assertEquals("number of norms mismatches", numDocNorms, b.length); assertEquals("number of norms mismatches", numDocNorms, b.length);
ArrayList storedNorms = (i == 1 ? modifiedNorms : norms); ArrayList storedNorms = (i == 1 ? modifiedNorms : norms);
for (int j = 0; j < b.length; j++) { for (int j = 0; j < b.length; j++) {
float norm = Similarity.decodeNorm(b[j]); float norm = Similarity.getDefault().decodeNormValue(b[j]);
float norm1 = ((Float) storedNorms.get(j)).floatValue(); float norm1 = ((Float) storedNorms.get(j)).floatValue();
assertEquals("stored norm value of " + field + " for doc " + j + " is " assertEquals("stored norm value of " + field + " for doc " + j + " is "
+ norm + " - a mismatch!", norm, norm1, 0.000001); + norm + " - a mismatch!", norm, norm1, 0.000001);
@ -321,7 +322,8 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
private float nextNorm() { private float nextNorm() {
float norm = lastNorm + normDelta; float norm = lastNorm + normDelta;
do { do {
float norm1 = Similarity.decodeNorm(Similarity.encodeNorm(norm)); float norm1 = Similarity.getDefault().decodeNormValue(
Similarity.getDefault().encodeNormValue(norm));
if (norm1 > lastNorm) { if (norm1 > lastNorm) {
// System.out.println(norm1+" > "+lastNorm); // System.out.println(norm1+" > "+lastNorm);
norm = norm1; norm = norm1;

View File

@ -189,7 +189,7 @@ public class TestNorms extends LuceneTestCase {
assertEquals("number of norms mismatches",numDocNorms,b.length); assertEquals("number of norms mismatches",numDocNorms,b.length);
ArrayList storedNorms = (i==1 ? modifiedNorms : norms); ArrayList storedNorms = (i==1 ? modifiedNorms : norms);
for (int j = 0; j < b.length; j++) { for (int j = 0; j < b.length; j++) {
float norm = Similarity.decodeNorm(b[j]); float norm = similarityOne.decodeNormValue(b[j]);
float norm1 = ((Float)storedNorms.get(j)).floatValue(); float norm1 = ((Float)storedNorms.get(j)).floatValue();
assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001); assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
} }
@ -224,7 +224,7 @@ public class TestNorms extends LuceneTestCase {
private float nextNorm() { private float nextNorm() {
float norm = lastNorm + normDelta; float norm = lastNorm + normDelta;
do { do {
float norm1 = Similarity.decodeNorm(Similarity.encodeNorm(norm)); float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm));
if (norm1 > lastNorm) { if (norm1 > lastNorm) {
//System.out.println(norm1+" > "+lastNorm); //System.out.println(norm1+" > "+lastNorm);
norm = norm1; norm = norm1;

View File

@ -26,7 +26,7 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
public class TestSegmentReader extends LuceneTestCase { public class TestSegmentReader extends LuceneTestCase {
@ -167,7 +167,7 @@ public class TestSegmentReader extends LuceneTestCase {
if (!reader.hasNorms(f.name())) { if (!reader.hasNorms(f.name())) {
// test for fake norms of 1.0 or null depending on the flag // test for fake norms of 1.0 or null depending on the flag
byte [] norms = reader.norms(f.name()); byte [] norms = reader.norms(f.name());
byte norm1 = DefaultSimilarity.encodeNorm(1.0f); byte norm1 = Similarity.getDefault().encodeNormValue(1.0f);
assertNull(norms); assertNull(norms);
norms = new byte[reader.maxDoc()]; norms = new byte[reader.maxDoc()];
reader.norms(f.name(),norms, 0); reader.norms(f.name(),norms, 0);