From 7e3700e1c8d3d089fc25c596c3dd8ba5cf958608 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 24 Nov 2009 20:26:07 +0000 Subject: [PATCH] LUCENE-1260: allow Similarity instance to customize how norms are encoded/decoded git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@883852 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 6 +++ .../instantiated/InstantiatedIndexWriter.java | 9 ++-- .../lucene/index/memory/MemoryIndex.java | 4 +- .../lucene/index/FieldNormModifier.java | 2 +- .../lucene/index/TestFieldNormModifier.java | 4 +- .../lucene/misc/TestLengthNormModifier.java | 2 - .../apache/lucene/document/AbstractField.java | 6 +-- .../org/apache/lucene/document/Fieldable.java | 8 ++-- .../apache/lucene/index/DirectoryReader.java | 4 +- .../org/apache/lucene/index/IndexReader.java | 6 +-- .../org/apache/lucene/index/MultiReader.java | 4 +- .../org/apache/lucene/index/NormsWriter.java | 2 +- .../lucene/index/NormsWriterPerField.java | 2 +- .../apache/lucene/index/SegmentReader.java | 4 +- .../lucene/search/MatchAllDocsQuery.java | 2 +- .../lucene/search/MultiPhraseQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 4 +- .../apache/lucene/search/PhraseScorer.java | 2 +- src/java/org/apache/lucene/search/Scorer.java | 2 +- .../org/apache/lucene/search/Similarity.java | 42 +++++++++++++++---- .../org/apache/lucene/search/TermQuery.java | 4 +- .../org/apache/lucene/search/TermScorer.java | 5 +-- .../lucene/search/spans/SpanScorer.java | 2 +- .../lucene/search/spans/SpanWeight.java | 2 +- .../apache/lucene/index/TestIndexReader.java | 2 +- .../lucene/index/TestIndexReaderClone.java | 13 ++---- .../index/TestIndexReaderCloneNorms.java | 8 ++-- .../org/apache/lucene/index/TestNorms.java | 4 +- .../lucene/index/TestSegmentReader.java | 4 +- 29 files changed, 92 insertions(+), 69 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index ce335ec8531..416a68054aa 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -11,6 +11,12 @@ API Changes * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory. (George Aroush via Mike McCandless) +* LUCENE-1260: Change norm encode (float->byte) and decode + (byte->float) to be instance methods not static methods. This way a + custom Similarity can alter how norms are encoded, though they must + still be encoded as a single byte (Johan Kindgren via Mike + McCandless) + Bug fixes * LUCENE-2092: BooleanQuery was ignoring disableCoord in its hashCode diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java index 14ab38efb8b..9a937cfd0b8 100644 --- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java +++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java @@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermVectorOffsetInfo; -import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.AttributeImpl; @@ -202,9 +201,9 @@ public class InstantiatedIndexWriter implements Closeable { byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field); if (oldNorms != null) { System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length); - Arrays.fill(norms, oldNorms.length, norms.length, DefaultSimilarity.encodeNorm(1.0f)); + Arrays.fill(norms, oldNorms.length, norms.length, similarity.encodeNormValue(1.0f)); } else { - Arrays.fill(norms, 0, norms.length, DefaultSimilarity.encodeNorm(1.0f)); + Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f)); } normsByFieldNameAndDocumentNumber.put(field, norms); fieldNames.remove(field); @@ -212,7 +211,7 @@ public class InstantiatedIndexWriter implements Closeable { for (String field : fieldNames) { //System.out.println(field); byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()]; - Arrays.fill(norms, 0, norms.length, DefaultSimilarity.encodeNorm(1.0f)); + Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f)); normsByFieldNameAndDocumentNumber.put(field, norms); } fieldNames.clear(); @@ -240,7 +239,7 @@ public class InstantiatedIndexWriter implements Closeable { float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost; norm *= document.getDocument().getBoost(); norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength); - normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = Similarity.encodeNorm(norm); + normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm); } else { System.currentTimeMillis(); } diff --git a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 6926c394cc0..505a5fd3696 100644 --- a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -50,7 +50,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Similarity; -import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.RAMDirectory; // for javadocs /** * High-performance single-document main memory Apache Lucene fulltext search index. @@ -1102,7 +1102,7 @@ public class MemoryIndex implements Serializable { float boost = info != null ? info.getBoost() : 1.0f; FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost); float n = sim.computeNorm(fieldName, invertState); - byte norm = Similarity.encodeNorm(n); + byte norm = sim.encodeNormValue(n); norms = new byte[] {norm}; // cache it for future reuse diff --git a/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java b/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java index ce326ce7fea..39ebc6972fc 100644 --- a/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java +++ b/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java @@ -143,7 +143,7 @@ public class FieldNormModifier { if (sim == null) reader.setNorm(d, fieldName, Similarity.encodeNorm(1.0f)); else - reader.setNorm(d, fieldName, Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]))); + reader.setNorm(d, fieldName, sim.encodeNormValue(sim.lengthNorm(fieldName, termCounts[d]))); } } diff --git a/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java b/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java index db64956eb6f..a6e605d84e1 100644 --- a/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java +++ b/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java @@ -44,9 +44,7 @@ public class TestFieldNormModifier extends TestCase { public TestFieldNormModifier(String name) { super(name); } - - public static byte DEFAULT_NORM = Similarity.encodeNorm(1.0f); - + public static int NUM_DOCS = 5; public Directory store = new RAMDirectory(); diff --git a/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java b/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java index 1a3a08293a9..dad12444579 100644 --- a/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java +++ b/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java @@ -47,8 +47,6 @@ public class TestLengthNormModifier extends TestCase { public TestLengthNormModifier(String name) { super(name); } - - public static byte DEFAULT_NORM = Similarity.encodeNorm(1.0f); public static int NUM_DOCS = 5; diff --git a/src/java/org/apache/lucene/document/AbstractField.java b/src/java/org/apache/lucene/document/AbstractField.java index d8a90675a92..a8249bc226f 100755 --- a/src/java/org/apache/lucene/document/AbstractField.java +++ b/src/java/org/apache/lucene/document/AbstractField.java @@ -16,7 +16,7 @@ package org.apache.lucene.document; */ import org.apache.lucene.search.PhraseQuery; // for javadocs -import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanQuery; // for javadocs import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.util.StringHelper; // for javadocs @@ -80,13 +80,13 @@ public abstract class AbstractField implements Fieldable { * by the {@link * org.apache.lucene.search.Similarity#lengthNorm(String, * int)} and then - * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the + * rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the * index. One should attempt to ensure that this product does not overflow * the range of that encoding. * * @see org.apache.lucene.document.Document#setBoost(float) * @see org.apache.lucene.search.Similarity#computeNorm(String, org.apache.lucene.index.FieldInvertState) - * @see org.apache.lucene.search.Similarity#encodeNorm(float) + * @see org.apache.lucene.search.Similarity#encodeNormValue(float) */ public void setBoost(float boost) { this.boost = boost; diff --git a/src/java/org/apache/lucene/document/Fieldable.java b/src/java/org/apache/lucene/document/Fieldable.java index ee1dff9784b..3b48ad53350 100755 --- a/src/java/org/apache/lucene/document/Fieldable.java +++ b/src/java/org/apache/lucene/document/Fieldable.java @@ -18,8 +18,8 @@ package org.apache.lucene.document; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.FieldInvertState; // for javadocs -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.PhraseQuery; // for javadocs +import org.apache.lucene.search.spans.SpanQuery; // for javadocs import java.io.Reader; import java.io.Serializable; @@ -48,13 +48,13 @@ public interface Fieldable extends Serializable { * FieldInvertState)} method, the boost value is multiplied * by the {@link * org.apache.lucene.search.Similarity#lengthNorm(String, - * int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the + * int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the * index. One should attempt to ensure that this product does not overflow * the range of that encoding. * * @see org.apache.lucene.document.Document#setBoost(float) * @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState) - * @see org.apache.lucene.search.Similarity#encodeNorm(float) + * @see org.apache.lucene.search.Similarity#encodeNormValue(float) */ void setBoost(float boost); diff --git a/src/java/org/apache/lucene/index/DirectoryReader.java b/src/java/org/apache/lucene/index/DirectoryReader.java index ae429e35106..eb29e87d30c 100644 --- a/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/src/java/org/apache/lucene/index/DirectoryReader.java @@ -31,7 +31,7 @@ import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Similarity; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.Lock; @@ -604,7 +604,7 @@ class DirectoryReader extends IndexReader implements Cloneable { ensureOpen(); byte[] bytes = normsCache.get(field); if (bytes==null && !hasNorms(field)) { - Arrays.fill(result, offset, result.length, DefaultSimilarity.encodeNorm(1.0f)); + Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f)); } else if (bytes != null) { // cache hit System.arraycopy(bytes, 0, result, offset, maxDoc()); } else { diff --git a/src/java/org/apache/lucene/index/IndexReader.java b/src/java/org/apache/lucene/index/IndexReader.java index 48a6a763a8a..734bf06f12c 100644 --- a/src/java/org/apache/lucene/index/IndexReader.java +++ b/src/java/org/apache/lucene/index/IndexReader.java @@ -730,7 +730,7 @@ public abstract class IndexReader implements Cloneable,Closeable { * this method call will silently do nothing. * * @see #norms(String) - * @see Similarity#decodeNorm(byte) + * @see Similarity#decodeNormValue(byte) * @throws StaleReaderException if the index has changed * since this reader was opened * @throws CorruptIndexException if the index is corrupt @@ -755,7 +755,7 @@ public abstract class IndexReader implements Cloneable,Closeable { * document. * * @see #norms(String) - * @see Similarity#decodeNorm(byte) + * @see Similarity#decodeNormValue(byte) * * @throws StaleReaderException if the index has changed * since this reader was opened @@ -768,7 +768,7 @@ public abstract class IndexReader implements Cloneable,Closeable { public void setNorm(int doc, String field, float value) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { ensureOpen(); - setNorm(doc, field, Similarity.encodeNorm(value)); + setNorm(doc, field, Similarity.getDefault().encodeNormValue(value)); } /** Returns an enumeration of all the terms in the index. The diff --git a/src/java/org/apache/lucene/index/MultiReader.java b/src/java/org/apache/lucene/index/MultiReader.java index 132fdf04e84..4f4814c951b 100644 --- a/src/java/org/apache/lucene/index/MultiReader.java +++ b/src/java/org/apache/lucene/index/MultiReader.java @@ -28,7 +28,7 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.DirectoryReader.MultiTermDocs; import org.apache.lucene.index.DirectoryReader.MultiTermEnum; import org.apache.lucene.index.DirectoryReader.MultiTermPositions; -import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Similarity; /** An IndexReader which reads multiple indexes, appending * their content. */ @@ -316,7 +316,7 @@ public class MultiReader extends IndexReader implements Cloneable { subReaders[i].norms(field, result, offset + starts[i]); if (bytes==null && !hasNorms(field)) { - Arrays.fill(result, offset, result.length, DefaultSimilarity.encodeNorm(1.0f)); + Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f)); } else if (bytes != null) { // cache hit System.arraycopy(bytes, 0, result, offset, maxDoc()); } else { diff --git a/src/java/org/apache/lucene/index/NormsWriter.java b/src/java/org/apache/lucene/index/NormsWriter.java index d9e4e3f3fdc..d4754ec1125 100644 --- a/src/java/org/apache/lucene/index/NormsWriter.java +++ b/src/java/org/apache/lucene/index/NormsWriter.java @@ -37,7 +37,7 @@ import org.apache.lucene.search.Similarity; final class NormsWriter extends InvertedDocEndConsumer { - private static final byte defaultNorm = Similarity.encodeNorm(1.0f); + private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f); private FieldInfos fieldInfos; @Override public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { diff --git a/src/java/org/apache/lucene/index/NormsWriterPerField.java b/src/java/org/apache/lucene/index/NormsWriterPerField.java index b604884b632..959e7278a97 100644 --- a/src/java/org/apache/lucene/index/NormsWriterPerField.java +++ b/src/java/org/apache/lucene/index/NormsWriterPerField.java @@ -71,7 +71,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement norms = ArrayUtil.grow(norms, 1+upto); } final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState); - norms[upto] = Similarity.encodeNorm(norm); + norms[upto] = Similarity.getDefault().encodeNormValue(norm); docIDs[upto] = docState.docID; upto++; } diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java index 5aec0156868..fe7a864b43f 100644 --- a/src/java/org/apache/lucene/index/SegmentReader.java +++ b/src/java/org/apache/lucene/index/SegmentReader.java @@ -30,7 +30,7 @@ import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Similarity; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -1022,7 +1022,7 @@ public class SegmentReader extends IndexReader implements Cloneable { ensureOpen(); Norm norm = norms.get(field); if (norm == null) { - Arrays.fill(bytes, offset, bytes.length, DefaultSimilarity.encodeNorm(1.0f)); + Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f)); return; } diff --git a/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index 25323bce1fe..6e447afc0d1 100644 --- a/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -70,7 +70,7 @@ public class MatchAllDocsQuery extends Query { @Override public float score() { - return norms == null ? score : score * Similarity.decodeNorm(norms[docID()]); + return norms == null ? score : score * getSimilarity().decodeNormValue(norms[docID()]); } @Override diff --git a/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 34ace55ab02..fbb5cc1d029 100644 --- a/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -238,7 +238,7 @@ public class MultiPhraseQuery extends Query { Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.norms(field); float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f; + fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldExpl.addDetail(fieldNormExpl); diff --git a/src/java/org/apache/lucene/search/PhraseQuery.java b/src/java/org/apache/lucene/search/PhraseQuery.java index c25cc9e177b..08a01c9e1ce 100644 --- a/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/src/java/org/apache/lucene/search/PhraseQuery.java @@ -108,7 +108,7 @@ public class PhraseQuery extends Query { } private class PhraseWeight extends Weight { - private Similarity similarity; + private final Similarity similarity; private float value; private float idf; private float queryNorm; @@ -232,7 +232,7 @@ public class PhraseQuery extends Query { Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.norms(field); float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f; + fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldExpl.addDetail(fieldNormExpl); diff --git a/src/java/org/apache/lucene/search/PhraseScorer.java b/src/java/org/apache/lucene/search/PhraseScorer.java index 303e2d19415..dab6d896302 100644 --- a/src/java/org/apache/lucene/search/PhraseScorer.java +++ b/src/java/org/apache/lucene/search/PhraseScorer.java @@ -110,7 +110,7 @@ abstract class PhraseScorer extends Scorer { public float score() throws IOException { //System.out.println("scoring " + first.doc); float raw = getSimilarity().tf(freq) * value; // raw score - return norms == null ? raw : raw * Similarity.decodeNorm(norms[first.doc]); // normalize + return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[first.doc]); // normalize } @Override diff --git a/src/java/org/apache/lucene/search/Scorer.java b/src/java/org/apache/lucene/search/Scorer.java index 3e04fe66117..27a545432f8 100644 --- a/src/java/org/apache/lucene/search/Scorer.java +++ b/src/java/org/apache/lucene/search/Scorer.java @@ -38,7 +38,7 @@ import java.io.IOException; * with these scores. */ public abstract class Scorer extends DocIdSetIterator { - private Similarity similarity; + private final Similarity similarity; /** Constructs a Scorer. * @param similarity The Similarity implementation used by this scorer. diff --git a/src/java/org/apache/lucene/search/Similarity.java b/src/java/org/apache/lucene/search/Similarity.java index 376e0370c1b..f2195e8f269 100644 --- a/src/java/org/apache/lucene/search/Similarity.java +++ b/src/java/org/apache/lucene/search/Similarity.java @@ -26,7 +26,6 @@ import org.apache.lucene.util.SmallFloat; import java.io.IOException; import java.io.Serializable; import java.util.Collection; -import java.util.IdentityHashMap; /** @@ -498,11 +497,11 @@ import java.util.IdentityHashMap; * * *
 
- * However the resulted norm value is {@link #encodeNorm(float) encoded} as a single byte + * However the resulted norm value is {@link #encodeNormValue(float) encoded} as a single byte * before being stored. * At search time, the norm byte value is read from the index * {@link org.apache.lucene.store.Directory directory} and - * {@link #decodeNorm(byte) decoded} back to a float norm value. + * {@link #decodeNormValue(byte) decoded} back to a float norm value. * This encoding/decoding, while reducing index size, comes with the price of * precision loss - it is not guaranteed that decode(encode(x)) = x. * For instance, decode(encode(0.89)) = 0.75. @@ -563,16 +562,30 @@ public abstract class Similarity implements Serializable { NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i); } - /** Decodes a normalization factor stored in an index. - * @see #encodeNorm(float) + /** + * Decodes a normalization factor stored in an index. + * @see #decodeNormValue(byte) + * @deprecated Use {@link #decodeNormValue} instead. */ + @Deprecated public static float decodeNorm(byte b) { return NORM_TABLE[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127 } - /** Returns a table for decoding normalization bytes. - * @see #encodeNorm(float) + /** Decodes a normalization factor stored in an index. + * @see #encodeNormValue(float) */ + public float decodeNormValue(byte b) { + return NORM_TABLE[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127 + } + + /** Returns a table for decoding normalization bytes. + * @see #encodeNormValue(float) + * @see #decodeNormValue(byte) + * + * @deprecated Use instance methods for encoding/decoding norm values to enable customization. + */ + @Deprecated public static float[] getNormDecoder() { return NORM_TABLE; } @@ -612,7 +625,7 @@ public abstract class Similarity implements Serializable { *

Note that the return values are computed under * {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document)} * and then stored using - * {@link #encodeNorm(float)}. + * {@link #encodeNormValue(float)}. * Thus they have limited precision, and documents * must be re-indexed if this method is altered. * @@ -654,6 +667,19 @@ public abstract class Similarity implements Serializable { * @see org.apache.lucene.document.Field#setBoost(float) * @see org.apache.lucene.util.SmallFloat */ + public byte encodeNormValue(float f) { + return SmallFloat.floatToByte315(f); + } + + /** + * Static accessor kept for backwards compability reason, use encodeNormValue instead. + * @param f norm-value to encode + * @return byte representing the given float + * @deprecated Use {@link #encodeNormValue} instead. + * + * @see #encodeNormValue(float) + */ + @Deprecated public static byte encodeNorm(float f) { return SmallFloat.floatToByte315(f); } diff --git a/src/java/org/apache/lucene/search/TermQuery.java b/src/java/org/apache/lucene/search/TermQuery.java index 0ab95178032..bdc85af939d 100644 --- a/src/java/org/apache/lucene/search/TermQuery.java +++ b/src/java/org/apache/lucene/search/TermQuery.java @@ -33,7 +33,7 @@ public class TermQuery extends Query { private Term term; private class TermWeight extends Weight { - private Similarity similarity; + private final Similarity similarity; private float value; private float idf; private float queryNorm; @@ -135,7 +135,7 @@ public class TermQuery extends Query { Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.norms(field); float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f; + fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldExpl.addDetail(fieldNormExpl); diff --git a/src/java/org/apache/lucene/search/TermScorer.java b/src/java/org/apache/lucene/search/TermScorer.java index b690e31a9cd..d450295afc2 100644 --- a/src/java/org/apache/lucene/search/TermScorer.java +++ b/src/java/org/apache/lucene/search/TermScorer.java @@ -25,8 +25,6 @@ import org.apache.lucene.index.TermDocs; */ final class TermScorer extends Scorer { - private static final float[] SIM_NORM_DECODER = Similarity.getNormDecoder(); - private Weight weight; private TermDocs termDocs; private byte[] norms; @@ -56,6 +54,7 @@ final class TermScorer extends Scorer { */ TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms) { super(similarity); + this.weight = weight; this.termDocs = td; this.norms = norms; @@ -127,7 +126,7 @@ final class TermScorer extends Scorer { ? scoreCache[f] // cache hit : getSimilarity().tf(f)*weightValue; // cache miss - return norms == null ? raw : raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field + return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize for field } /** diff --git a/src/java/org/apache/lucene/search/spans/SpanScorer.java b/src/java/org/apache/lucene/search/spans/SpanScorer.java index 6ac9a188ed8..e44fcbc836c 100644 --- a/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -95,7 +95,7 @@ public class SpanScorer extends Scorer { @Override public float score() throws IOException { float raw = getSimilarity().tf(freq) * value; // raw score - return norms == null? raw : raw * Similarity.decodeNorm(norms[doc]); // normalize + return norms == null? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize } /** This method is no longer an official member of {@link Scorer}, diff --git a/src/java/org/apache/lucene/search/spans/SpanWeight.java b/src/java/org/apache/lucene/search/spans/SpanWeight.java index 3a3992b449c..28fd905a208 100644 --- a/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -118,7 +118,7 @@ public class SpanWeight extends Weight { Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.norms(field); float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f; + fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldExpl.addDetail(fieldNormExpl); diff --git a/src/test/org/apache/lucene/index/TestIndexReader.java b/src/test/org/apache/lucene/index/TestIndexReader.java index 3c6160ec255..0fe0bfdb123 100644 --- a/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/src/test/org/apache/lucene/index/TestIndexReader.java @@ -511,7 +511,7 @@ public class TestIndexReader extends LuceneTestCase // is open against the index: public void testWritingNorms() throws IOException { - String tempDir = System.getProperty("tempDir"); + String tempDir = "target/test"; if (tempDir == null) throw new IOException("tempDir undefined, cannot run test"); diff --git a/src/test/org/apache/lucene/index/TestIndexReaderClone.java b/src/test/org/apache/lucene/index/TestIndexReaderClone.java index d977f7c29b0..5d1cc9d0b25 100644 --- a/src/test/org/apache/lucene/index/TestIndexReaderClone.java +++ b/src/test/org/apache/lucene/index/TestIndexReaderClone.java @@ -17,9 +17,6 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.File; -import java.io.IOException; - import org.apache.lucene.index.SegmentReader.Norm; import org.apache.lucene.search.Similarity; import org.apache.lucene.analysis.SimpleAnalyzer; @@ -28,9 +25,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.MockRAMDirectory; -import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.store.AlreadyClosedException; /** * Tests cloning multiple types of readers, modifying the deletedDocs and norms @@ -273,13 +268,13 @@ public class TestIndexReaderClone extends LuceneTestCase { * @throws Exception */ private void performDefaultTests(IndexReader r1) throws Exception { - float norm1 = Similarity.decodeNorm(r1.norms("field1")[4]); + float norm1 = Similarity.getDefault().decodeNormValue(r1.norms("field1")[4]); IndexReader pr1Clone = (IndexReader) r1.clone(); pr1Clone.deleteDocument(10); pr1Clone.setNorm(4, "field1", 0.5f); - assertTrue(Similarity.decodeNorm(r1.norms("field1")[4]) == norm1); - assertTrue(Similarity.decodeNorm(pr1Clone.norms("field1")[4]) != norm1); + assertTrue(Similarity.getDefault().decodeNormValue(r1.norms("field1")[4]) == norm1); + assertTrue(Similarity.getDefault().decodeNormValue(pr1Clone.norms("field1")[4]) != norm1); assertTrue(!r1.isDeleted(10)); assertTrue(pr1Clone.isDeleted(10)); @@ -426,7 +421,7 @@ public class TestIndexReaderClone extends LuceneTestCase { TestIndexReaderReopen.createIndex(dir1, false); IndexReader orig = IndexReader.open(dir1, false); orig.setNorm(1, "field1", 17.0f); - final byte encoded = Similarity.encodeNorm(17.0f); + final byte encoded = Similarity.getDefault().encodeNormValue(17.0f); assertEquals(encoded, orig.norms("field1")[1]); // the cloned segmentreader should have 2 references, 1 to itself, and 1 to diff --git a/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java b/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java index e82c72bd125..577d4925a60 100644 --- a/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java +++ b/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java @@ -216,7 +216,8 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { } // norm values should be different - assertTrue(Similarity.decodeNorm(segmentReader3C.norms("field1")[5]) != Similarity.decodeNorm(segmentReader4C.norms("field1")[5])); + assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5]) + != Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5])); Norm reader4CCNorm = segmentReader4C.norms.get("field1"); assertEquals(3, reader3CCNorm.bytesRef().refCount()); assertEquals(1, reader4CCNorm.bytesRef().refCount()); @@ -283,7 +284,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { assertEquals("number of norms mismatches", numDocNorms, b.length); ArrayList storedNorms = (i == 1 ? modifiedNorms : norms); for (int j = 0; j < b.length; j++) { - float norm = Similarity.decodeNorm(b[j]); + float norm = Similarity.getDefault().decodeNormValue(b[j]); float norm1 = ((Float) storedNorms.get(j)).floatValue(); assertEquals("stored norm value of " + field + " for doc " + j + " is " + norm + " - a mismatch!", norm, norm1, 0.000001); @@ -321,7 +322,8 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { private float nextNorm() { float norm = lastNorm + normDelta; do { - float norm1 = Similarity.decodeNorm(Similarity.encodeNorm(norm)); + float norm1 = Similarity.getDefault().decodeNormValue( + Similarity.getDefault().encodeNormValue(norm)); if (norm1 > lastNorm) { // System.out.println(norm1+" > "+lastNorm); norm = norm1; diff --git a/src/test/org/apache/lucene/index/TestNorms.java b/src/test/org/apache/lucene/index/TestNorms.java index 78b17d959ce..fa544fb5eb4 100755 --- a/src/test/org/apache/lucene/index/TestNorms.java +++ b/src/test/org/apache/lucene/index/TestNorms.java @@ -189,7 +189,7 @@ public class TestNorms extends LuceneTestCase { assertEquals("number of norms mismatches",numDocNorms,b.length); ArrayList storedNorms = (i==1 ? modifiedNorms : norms); for (int j = 0; j < b.length; j++) { - float norm = Similarity.decodeNorm(b[j]); + float norm = similarityOne.decodeNormValue(b[j]); float norm1 = ((Float)storedNorms.get(j)).floatValue(); assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001); } @@ -224,7 +224,7 @@ public class TestNorms extends LuceneTestCase { private float nextNorm() { float norm = lastNorm + normDelta; do { - float norm1 = Similarity.decodeNorm(Similarity.encodeNorm(norm)); + float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm)); if (norm1 > lastNorm) { //System.out.println(norm1+" > "+lastNorm); norm = norm1; diff --git a/src/test/org/apache/lucene/index/TestSegmentReader.java b/src/test/org/apache/lucene/index/TestSegmentReader.java index 58e219d960b..f9cc0ef53d1 100644 --- a/src/test/org/apache/lucene/index/TestSegmentReader.java +++ b/src/test/org/apache/lucene/index/TestSegmentReader.java @@ -26,7 +26,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Similarity; import org.apache.lucene.store.RAMDirectory; public class TestSegmentReader extends LuceneTestCase { @@ -167,7 +167,7 @@ public class TestSegmentReader extends LuceneTestCase { if (!reader.hasNorms(f.name())) { // test for fake norms of 1.0 or null depending on the flag byte [] norms = reader.norms(f.name()); - byte norm1 = DefaultSimilarity.encodeNorm(1.0f); + byte norm1 = Similarity.getDefault().encodeNormValue(1.0f); assertNull(norms); norms = new byte[reader.maxDoc()]; reader.norms(f.name(),norms, 0);