From 24d76157a06bb370308fa94ff698d7c104826df3 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 3 Nov 2008 18:03:58 +0000 Subject: [PATCH] LUCENE-1420: let Similarity.computeNorm compute the norm; add option to discount overlap tokens when computing lengthNorm git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@710117 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 10 +- .../lucene/index/memory/MemoryIndex.java | 20 +++- .../lucene/misc/SweetSpotSimilarity.java | 53 +++++++++- .../apache/lucene/document/AbstractField.java | 10 +- .../org/apache/lucene/document/Fieldable.java | 12 ++- .../org/apache/lucene/index/DocInverter.java | 14 --- .../lucene/index/DocInverterPerField.java | 8 +- .../lucene/index/DocInverterPerThread.java | 2 +- .../apache/lucene/index/FieldInvertState.java | 100 ++++++++++++++++++ .../index/FreqProxTermsWriterPerField.java | 2 +- .../lucene/index/NormsWriterPerField.java | 4 +- .../index/TermVectorsTermsWriterPerField.java | 2 +- .../lucene/index/TermsHashPerField.java | 2 +- .../lucene/search/DefaultSimilarity.java | 43 ++++++++ .../org/apache/lucene/search/Similarity.java | 30 +++++- .../lucene/search/SimilarityDelegator.java | 6 ++ 16 files changed, 281 insertions(+), 37 deletions(-) create mode 100644 src/java/org/apache/lucene/index/FieldInvertState.java diff --git a/CHANGES.txt b/CHANGES.txt index 5729e7b6962..db4158f077a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -36,9 +36,17 @@ New features then retrievable via IndexReader.getCommitUserData instance and static methods. (Shalin Shekhar Mangar via Mike McCandless) - 3. LUCENE-1406: Added Arabic analyzer. (Robert Muir via Grant Ingersoll) + 4. LUCENE-1420: Similarity now has a computeNorm method that allows + custom Similarity classes to override how norm is computed. It's + provided a FieldInvertState instance that contains details from + inverting the field. The default impl is boost * + lengthNorm(numTerms), to be backwards compatible. Also added + {set/get}DiscountOverlaps to DefaultSimilarity, to control whether + overlapping tokens (tokens with 0 position increment) should be + counted in lengthNorm. (Andrzej Bialecki via Mike McCandless) + Optimizations 1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing diff --git a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 60aa39a3fe4..bd5ab608dad 100644 --- a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -41,6 +41,7 @@ import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermPositions; import org.apache.lucene.index.TermVectorMapper; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.search.HitCollector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -348,6 +349,7 @@ public class MemoryIndex implements Serializable { HashMap terms = new HashMap(); int numTokens = 0; + int numOverlapTokens = 0; int pos = -1; final Token reusableToken = new Token(); for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { @@ -355,7 +357,10 @@ public class MemoryIndex implements Serializable { if (term.length() == 0) continue; // nothing to do // if (DEBUG) System.err.println("token='" + term + "'"); numTokens++; - pos += nextToken.getPositionIncrement(); + final int posIncr = nextToken.getPositionIncrement(); + if (posIncr == 0) + numOverlapTokens++; + pos += posIncr; ArrayIntList positions = (ArrayIntList) terms.get(term); if (positions == null) { // term not seen before @@ -372,7 +377,7 @@ public class MemoryIndex implements Serializable { // ensure infos.numTokens > 0 invariant; needed for correct operation of terms() if (numTokens > 0) { boost = boost * docBoost; // see DocumentWriter.addDocument(...) - fields.put(fieldName, new Info(terms, numTokens, boost)); + fields.put(fieldName, new Info(terms, numTokens, numOverlapTokens, boost)); sortedFields = null; // invalidate sorted view, if any } } catch (IOException e) { // can never happen @@ -574,6 +579,9 @@ public class MemoryIndex implements Serializable { /** Number of added tokens for this field */ private final int numTokens; + /** Number of overlapping tokens for this field */ + private final int numOverlapTokens; + /** Boost factor for hits for this field */ private final float boost; @@ -582,9 +590,10 @@ public class MemoryIndex implements Serializable { private static final long serialVersionUID = 2882195016849084649L; - public Info(HashMap terms, int numTokens, float boost) { + public Info(HashMap terms, int numTokens, int numOverlapTokens, float boost) { this.terms = terms; this.numTokens = numTokens; + this.numOverlapTokens = numOverlapTokens; this.boost = boost; } @@ -1067,9 +1076,10 @@ public class MemoryIndex implements Serializable { if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached? Info info = getInfo(fieldName); int numTokens = info != null ? info.numTokens : 0; - float n = sim.lengthNorm(fieldName, numTokens); + int numOverlapTokens = info != null ? info.numOverlapTokens : 0; float boost = info != null ? info.getBoost() : 1.0f; - n = n * boost; // see DocumentWriter.writeNorms(String segment) + FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost); + float n = sim.computeNorm(fieldName, invertState); byte norm = Similarity.encodeNorm(n); norms = new byte[] {norm}; diff --git a/contrib/miscellaneous/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java b/contrib/miscellaneous/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java index 8913efea7d1..862b1241489 100644 --- a/contrib/miscellaneous/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java +++ b/contrib/miscellaneous/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java @@ -19,6 +19,7 @@ package org.apache.lucene.misc; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.index.FieldInvertState; import java.util.Map; import java.util.HashMap; @@ -53,6 +54,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity { private Map ln_mins = new HashMap(7); private Map ln_maxs = new HashMap(7); private Map ln_steeps = new HashMap(7); + private Map ln_overlaps = new HashMap(7); private float tf_base = 0.0f; private float tf_min = 0.0f; @@ -106,17 +108,66 @@ public class SweetSpotSimilarity extends DefaultSimilarity { } /** - * Sets the function variables used by lengthNorm for a specific named field + * Sets the function variables used by lengthNorm for a + * specific named field. + * + * @deprecated Please call {@link #setLengthNormFactors(String, + * int, int, float, boolean)} instead. + * + * @param field field name + * @param min minimum value + * @param max maximum value + * @param steepness steepness of the curve * * @see #lengthNorm */ public void setLengthNormFactors(String field, int min, int max, float steepness) { + setLengthNormFactors(field, min, max, steepness, false); + } + + /** + * Sets the function variables used by lengthNorm for a specific named field. + * + * @param field field name + * @param min minimum value + * @param max maximum value + * @param steepness steepness of the curve + * @param discountOverlaps if true, numOverlapTokens will be + * subtracted from numTokens; if false then + * numOverlapTokens will be assumed to be 0 (see + * {@link DefaultSimilarity#computeNorm(String, FieldInvertState)} for details). + * + * @see #lengthNorm + */ + public void setLengthNormFactors(String field, int min, int max, + float steepness, boolean discountOverlaps) { ln_mins.put(field, new Integer(min)); ln_maxs.put(field, new Integer(max)); ln_steeps.put(field, new Float(steepness)); + ln_overlaps.put(field, new Boolean(discountOverlaps)); } + /** + * Implemented as state.getBoost() * + * lengthNorm(fieldName, numTokens) where + * numTokens does not count overlap tokens if + * discountOverlaps is true by default or true for this + * specific field. */ + public float computeNorm(String fieldName, FieldInvertState state) { + final int numTokens; + boolean overlaps = discountOverlaps; + if (ln_overlaps.containsKey(fieldName)) { + overlaps = ((Boolean)ln_overlaps.get(fieldName)).booleanValue(); + } + if (overlaps) + numTokens = state.getLength() - state.getNumOverlap(); + else + numTokens = state.getLength(); + + return state.getBoost() * lengthNorm(fieldName, numTokens); + } + /** * Implemented as: * diff --git a/src/java/org/apache/lucene/document/AbstractField.java b/src/java/org/apache/lucene/document/AbstractField.java index c930b94fc59..3a218124f64 100755 --- a/src/java/org/apache/lucene/document/AbstractField.java +++ b/src/java/org/apache/lucene/document/AbstractField.java @@ -98,13 +98,19 @@ public abstract class AbstractField implements Fieldable { *

The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document * containing this field. If a document has multiple fields with the same * name, all such values are multiplied together. This product is then - * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and + * used to compute the norm factor for the field. By + * default, in the {@link + * org.apache.lucene.search.Similarity#computeNorm(String, + * FieldInvertState)} method, the boost value is multipled + * by the {@link + * org.apache.lucene.search.Similarity#lengthNorm(String, + * int)} and then * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the * index. One should attempt to ensure that this product does not overflow * the range of that encoding. * * @see org.apache.lucene.document.Document#setBoost(float) - * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) + * @see org.apache.lucene.search.Similarity#computeNorm(String, org.apache.lucene.index.FieldInvertState) * @see org.apache.lucene.search.Similarity#encodeNorm(float) */ public void setBoost(float boost) { diff --git a/src/java/org/apache/lucene/document/Fieldable.java b/src/java/org/apache/lucene/document/Fieldable.java index 8fc11b48b91..363fff6c9cd 100755 --- a/src/java/org/apache/lucene/document/Fieldable.java +++ b/src/java/org/apache/lucene/document/Fieldable.java @@ -17,6 +17,7 @@ package org.apache.lucene.document; */ import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.FieldInvertState; import java.io.Reader; import java.io.Serializable; @@ -39,13 +40,18 @@ public interface Fieldable extends Serializable { *

The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document * containing this field. If a document has multiple fields with the same * name, all such values are multiplied together. This product is then - * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and - * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the + * used to compute the norm factor for the field. By + * default, in the {@link + * org.apache.lucene.search.Similarity#computeNorm(String, + * FieldInvertState)} method, the boost value is multipled + * by the {@link + * org.apache.lucene.search.Similarity#lengthNorm(String, + * int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the * index. One should attempt to ensure that this product does not overflow * the range of that encoding. * * @see org.apache.lucene.document.Document#setBoost(float) - * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) + * @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState) * @see org.apache.lucene.search.Similarity#encodeNorm(float) */ void setBoost(float boost); diff --git a/src/java/org/apache/lucene/index/DocInverter.java b/src/java/org/apache/lucene/index/DocInverter.java index 9a7631ce854..f5e57e47123 100644 --- a/src/java/org/apache/lucene/index/DocInverter.java +++ b/src/java/org/apache/lucene/index/DocInverter.java @@ -92,18 +92,4 @@ final class DocInverter extends DocFieldConsumer { public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) { return new DocInverterPerThread(docFieldProcessorPerThread, this); } - - final static class FieldInvertState { - int position; - int length; - int offset; - float boost; - - void reset(float docBoost) { - position = 0; - length = 0; - offset = 0; - boost = docBoost; - } - } } diff --git a/src/java/org/apache/lucene/index/DocInverterPerField.java b/src/java/org/apache/lucene/index/DocInverterPerField.java index 799d45816aa..a07c982273b 100644 --- a/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -39,7 +39,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField { final InvertedDocConsumerPerField consumer; final InvertedDocEndConsumerPerField endConsumer; final DocumentsWriter.DocState docState; - final DocInverter.FieldInvertState fieldState; + final FieldInvertState fieldState; public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) { this.perThread = perThread; @@ -134,7 +134,11 @@ final class DocInverterPerField extends DocFieldConsumerPerField { Token token = stream.next(localToken); if (token == null) break; - fieldState.position += (token.getPositionIncrement() - 1); + final int posIncr = token.getPositionIncrement(); + fieldState.position += posIncr - 1; + if (posIncr == 0) + fieldState.numOverlap++; + boolean success = false; try { // If we hit an exception in here, we abort diff --git a/src/java/org/apache/lucene/index/DocInverterPerThread.java b/src/java/org/apache/lucene/index/DocInverterPerThread.java index e5b8f566699..1b80286c66c 100644 --- a/src/java/org/apache/lucene/index/DocInverterPerThread.java +++ b/src/java/org/apache/lucene/index/DocInverterPerThread.java @@ -32,7 +32,7 @@ final class DocInverterPerThread extends DocFieldConsumerPerThread { final Token localToken = new Token(); final DocumentsWriter.DocState docState; - final DocInverter.FieldInvertState fieldState = new DocInverter.FieldInvertState(); + final FieldInvertState fieldState = new FieldInvertState(); // Used to read a string value for a field final ReusableStringReader stringReader = new ReusableStringReader(); diff --git a/src/java/org/apache/lucene/index/FieldInvertState.java b/src/java/org/apache/lucene/index/FieldInvertState.java new file mode 100644 index 00000000000..c10455d59d2 --- /dev/null +++ b/src/java/org/apache/lucene/index/FieldInvertState.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import org.apache.lucene.search.Similarity; + +/** + * This class tracks the number and position / offset parameters of terms + * being added to the index. The information collected in this class is + * also used to calculate the normalization factor for a field. + * + *

WARNING: This API is new and experimental, and may suddenly + * change.

+ */ +public final class FieldInvertState { + int position; + int length; + int numOverlap; + int offset; + float boost; + + public FieldInvertState() { + } + + public FieldInvertState(int position, int length, int numOverlap, int offset, float boost) { + this.position = position; + this.length = length; + this.numOverlap = numOverlap; + this.offset = offset; + this.boost = boost; + } + + /** + * Re-initialize the state, using this boost value. + * @param docBoost boost value to use. + */ + void reset(float docBoost) { + position = 0; + length = 0; + numOverlap = 0; + offset = 0; + boost = docBoost; + } + + /** + * Get the last processed term position. + * @return the position + */ + public int getPosition() { + return position; + } + + /** + * Get total number of terms in this field. + * @return the length + */ + public int getLength() { + return length; + } + + /** + * Get the number of terms with positionIncrement == 0. + * @return the numOverlap + */ + public int getNumOverlap() { + return numOverlap; + } + + /** + * Get end offset of the last processed term. + * @return the offset + */ + public int getOffset() { + return offset; + } + + /** + * Get boost value. This is the cumulative product of + * document boost and field boost for all field instances + * sharing the same field name. + * @return the boost + */ + public float getBoost() { + return boost; + } +} diff --git a/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java index 25261943811..151338b138e 100644 --- a/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java +++ b/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java @@ -30,7 +30,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem final TermsHashPerField termsHashPerField; final FieldInfo fieldInfo; final DocumentsWriter.DocState docState; - final DocInverter.FieldInvertState fieldState; + final FieldInvertState fieldState; boolean omitTf; public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) { diff --git a/src/java/org/apache/lucene/index/NormsWriterPerField.java b/src/java/org/apache/lucene/index/NormsWriterPerField.java index b0096056c23..34364de5af7 100644 --- a/src/java/org/apache/lucene/index/NormsWriterPerField.java +++ b/src/java/org/apache/lucene/index/NormsWriterPerField.java @@ -36,7 +36,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement byte[] norms = new byte[1]; int upto; - final DocInverter.FieldInvertState fieldState; + final FieldInvertState fieldState; public void reset() { // Shrink back if we are overallocated now: @@ -68,7 +68,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement docIDs = ArrayUtil.grow(docIDs, 1+upto); norms = ArrayUtil.grow(norms, 1+upto); } - final float norm = fieldState.boost * docState.similarity.lengthNorm(fieldInfo.name, fieldState.length); + final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState); norms[upto] = Similarity.encodeNorm(norm); docIDs[upto] = docState.docID; upto++; diff --git a/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java b/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java index 39c77e58ec4..9b61f5ff6ad 100644 --- a/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java +++ b/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java @@ -30,7 +30,7 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField { final TermVectorsTermsWriter termsWriter; final FieldInfo fieldInfo; final DocumentsWriter.DocState docState; - final DocInverter.FieldInvertState fieldState; + final FieldInvertState fieldState; boolean doVectors; boolean doVectorPositions; diff --git a/src/java/org/apache/lucene/index/TermsHashPerField.java b/src/java/org/apache/lucene/index/TermsHashPerField.java index 3a073a0b7c8..bd87b05f354 100644 --- a/src/java/org/apache/lucene/index/TermsHashPerField.java +++ b/src/java/org/apache/lucene/index/TermsHashPerField.java @@ -30,7 +30,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { final TermsHashPerField nextPerField; final TermsHashPerThread perThread; final DocumentsWriter.DocState docState; - final DocInverter.FieldInvertState fieldState; + final FieldInvertState fieldState; // Copied from our perThread final CharBlockPool charPool; diff --git a/src/java/org/apache/lucene/search/DefaultSimilarity.java b/src/java/org/apache/lucene/search/DefaultSimilarity.java index aaa0631ca59..3612836d427 100644 --- a/src/java/org/apache/lucene/search/DefaultSimilarity.java +++ b/src/java/org/apache/lucene/search/DefaultSimilarity.java @@ -1,5 +1,7 @@ package org.apache.lucene.search; +import org.apache.lucene.index.FieldInvertState; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -19,6 +21,25 @@ package org.apache.lucene.search; /** Expert: Default scoring implementation. */ public class DefaultSimilarity extends Similarity { + + /** Implemented as + * state.getBoost()*lengthNorm(numTerms), where + * numTerms is {@link FieldInvertState#getLength()} if {@link + * #setDiscountOverlaps} is false, else it's {@link + * FieldInvertState#getLength()} - {@link + * FieldInvertState#getNumOverlap()}. + * + *

WARNING: This API is new and experimental, and may suddenly + * change.

*/ + public float computeNorm(String field, FieldInvertState state) { + final int numTerms; + if (discountOverlaps) + numTerms = state.getLength() - state.getNumOverlap(); + else + numTerms = state.getLength(); + return (float) (state.getBoost() * lengthNorm(field, numTerms)); + } + /** Implemented as 1/sqrt(numTerms). */ public float lengthNorm(String fieldName, int numTerms) { return (float)(1.0 / Math.sqrt(numTerms)); @@ -48,4 +69,26 @@ public class DefaultSimilarity extends Similarity { public float coord(int overlap, int maxOverlap) { return overlap / (float)maxOverlap; } + + // Default false + protected boolean discountOverlaps; + + /** Determines whether overlap tokens (Tokens with + * 0 position increment) are ignored when computing + * norm. By default this is false, meaning overlap + * tokens are counted just like non-overlap tokens. + * + *

WARNING: This API is new and experimental, and may suddenly + * change.

+ * + * @see #computeNorm + */ + public void setDiscountOverlaps(boolean v) { + discountOverlaps = v; + } + + /** @see #setDiscountOverlaps */ + public boolean getDiscountOverlaps() { + return discountOverlaps; + } } diff --git a/src/java/org/apache/lucene/search/Similarity.java b/src/java/org/apache/lucene/search/Similarity.java index b9dd63e9d34..cf527d7e891 100644 --- a/src/java/org/apache/lucene/search/Similarity.java +++ b/src/java/org/apache/lucene/search/Similarity.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.Term; import org.apache.lucene.util.SmallFloat; @@ -333,6 +334,29 @@ public abstract class Similarity implements Serializable { return NORM_TABLE; } + /** + * Compute the normalization value for a field, given the accumulated + * state of term processing for this field (see {@link FieldInvertState}). + * + *

Implementations should calculate a float value based on the field + * state and then return that value. + * + *

For backward compatibility this method by default calls + * {@link #lengthNorm(String, int)} passing + * {@link FieldInvertState#getLength()} as the second argument, and + * then multiplies this value by {@link FieldInvertState#getBoost()}.

+ * + *

WARNING: This API is new and experimental and may + * suddenly change.

+ * + * @param field field name + * @param state current processing state for this field + * @return the calculated float norm + */ + public float computeNorm(String field, FieldInvertState state) { + return (float) (state.getBoost() * lengthNorm(field, state.getLength())); + } + /** Computes the normalization value for a field given the total number of * terms contained in a field. These values, together with field boosts, are * stored in an index and multipled into scores for hits on each field by the @@ -341,10 +365,10 @@ public abstract class Similarity implements Serializable { *

Matches in longer fields are less precise, so implementations of this * method usually return smaller values when numTokens is large, * and larger values when numTokens is small. - * - *

That these values are computed under + * + *

Note that the return values are computed under * {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document)} - * and stored then using + * and then stored using * {@link #encodeNorm(float)}. * Thus they have limited precision, and documents * must be re-indexed if this method is altered. diff --git a/src/java/org/apache/lucene/search/SimilarityDelegator.java b/src/java/org/apache/lucene/search/SimilarityDelegator.java index 4fc26efb39e..48a1d2724b0 100644 --- a/src/java/org/apache/lucene/search/SimilarityDelegator.java +++ b/src/java/org/apache/lucene/search/SimilarityDelegator.java @@ -1,5 +1,7 @@ package org.apache.lucene.search; +import org.apache.lucene.index.FieldInvertState; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -32,6 +34,10 @@ public class SimilarityDelegator extends Similarity { this.delegee = delegee; } + public float computeNorm(String fieldName, FieldInvertState state) { + return delegee.computeNorm(fieldName, state); + } + public float lengthNorm(String fieldName, int numTerms) { return delegee.lengthNorm(fieldName, numTerms); }