mirror of https://github.com/apache/lucene.git
LUCENE-1420: let Similarity.computeNorm compute the norm; add option to discount overlap tokens when computing lengthNorm
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@710117 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4990152243
commit
24d76157a0
10
CHANGES.txt
10
CHANGES.txt
|
@ -36,9 +36,17 @@ New features
|
|||
then retrievable via IndexReader.getCommitUserData instance and
|
||||
static methods. (Shalin Shekhar Mangar via Mike McCandless)
|
||||
|
||||
|
||||
3. LUCENE-1406: Added Arabic analyzer. (Robert Muir via Grant Ingersoll)
|
||||
|
||||
4. LUCENE-1420: Similarity now has a computeNorm method that allows
|
||||
custom Similarity classes to override how norm is computed. It's
|
||||
provided a FieldInvertState instance that contains details from
|
||||
inverting the field. The default impl is boost *
|
||||
lengthNorm(numTerms), to be backwards compatible. Also added
|
||||
{set/get}DiscountOverlaps to DefaultSimilarity, to control whether
|
||||
overlapping tokens (tokens with 0 position increment) should be
|
||||
counted in lengthNorm. (Andrzej Bialecki via Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.lucene.index.TermFreqVector;
|
|||
import org.apache.lucene.index.TermPositionVector;
|
||||
import org.apache.lucene.index.TermPositions;
|
||||
import org.apache.lucene.index.TermVectorMapper;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.search.HitCollector;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -348,6 +349,7 @@ public class MemoryIndex implements Serializable {
|
|||
|
||||
HashMap terms = new HashMap();
|
||||
int numTokens = 0;
|
||||
int numOverlapTokens = 0;
|
||||
int pos = -1;
|
||||
final Token reusableToken = new Token();
|
||||
for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
|
||||
|
@ -355,7 +357,10 @@ public class MemoryIndex implements Serializable {
|
|||
if (term.length() == 0) continue; // nothing to do
|
||||
// if (DEBUG) System.err.println("token='" + term + "'");
|
||||
numTokens++;
|
||||
pos += nextToken.getPositionIncrement();
|
||||
final int posIncr = nextToken.getPositionIncrement();
|
||||
if (posIncr == 0)
|
||||
numOverlapTokens++;
|
||||
pos += posIncr;
|
||||
|
||||
ArrayIntList positions = (ArrayIntList) terms.get(term);
|
||||
if (positions == null) { // term not seen before
|
||||
|
@ -372,7 +377,7 @@ public class MemoryIndex implements Serializable {
|
|||
// ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
|
||||
if (numTokens > 0) {
|
||||
boost = boost * docBoost; // see DocumentWriter.addDocument(...)
|
||||
fields.put(fieldName, new Info(terms, numTokens, boost));
|
||||
fields.put(fieldName, new Info(terms, numTokens, numOverlapTokens, boost));
|
||||
sortedFields = null; // invalidate sorted view, if any
|
||||
}
|
||||
} catch (IOException e) { // can never happen
|
||||
|
@ -574,6 +579,9 @@ public class MemoryIndex implements Serializable {
|
|||
/** Number of added tokens for this field */
|
||||
private final int numTokens;
|
||||
|
||||
/** Number of overlapping tokens for this field */
|
||||
private final int numOverlapTokens;
|
||||
|
||||
/** Boost factor for hits for this field */
|
||||
private final float boost;
|
||||
|
||||
|
@ -582,9 +590,10 @@ public class MemoryIndex implements Serializable {
|
|||
|
||||
private static final long serialVersionUID = 2882195016849084649L;
|
||||
|
||||
public Info(HashMap terms, int numTokens, float boost) {
|
||||
public Info(HashMap terms, int numTokens, int numOverlapTokens, float boost) {
|
||||
this.terms = terms;
|
||||
this.numTokens = numTokens;
|
||||
this.numOverlapTokens = numOverlapTokens;
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
|
@ -1067,9 +1076,10 @@ public class MemoryIndex implements Serializable {
|
|||
if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached?
|
||||
Info info = getInfo(fieldName);
|
||||
int numTokens = info != null ? info.numTokens : 0;
|
||||
float n = sim.lengthNorm(fieldName, numTokens);
|
||||
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
|
||||
float boost = info != null ? info.getBoost() : 1.0f;
|
||||
n = n * boost; // see DocumentWriter.writeNorms(String segment)
|
||||
FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
|
||||
float n = sim.computeNorm(fieldName, invertState);
|
||||
byte norm = Similarity.encodeNorm(n);
|
||||
norms = new byte[] {norm};
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.misc;
|
|||
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
@ -53,6 +54,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
private Map ln_mins = new HashMap(7);
|
||||
private Map ln_maxs = new HashMap(7);
|
||||
private Map ln_steeps = new HashMap(7);
|
||||
private Map ln_overlaps = new HashMap(7);
|
||||
|
||||
private float tf_base = 0.0f;
|
||||
private float tf_min = 0.0f;
|
||||
|
@ -106,15 +108,64 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
}
|
||||
|
||||
/**
|
||||
* Sets the function variables used by lengthNorm for a specific named field
|
||||
* Sets the function variables used by lengthNorm for a
|
||||
* specific named field.
|
||||
*
|
||||
* @deprecated Please call {@link #setLengthNormFactors(String,
|
||||
* int, int, float, boolean)} instead.
|
||||
*
|
||||
* @param field field name
|
||||
* @param min minimum value
|
||||
* @param max maximum value
|
||||
* @param steepness steepness of the curve
|
||||
*
|
||||
* @see #lengthNorm
|
||||
*/
|
||||
public void setLengthNormFactors(String field, int min, int max,
|
||||
float steepness) {
|
||||
setLengthNormFactors(field, min, max, steepness, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the function variables used by lengthNorm for a specific named field.
|
||||
*
|
||||
* @param field field name
|
||||
* @param min minimum value
|
||||
* @param max maximum value
|
||||
* @param steepness steepness of the curve
|
||||
* @param discountOverlaps if true, <code>numOverlapTokens</code> will be
|
||||
* subtracted from <code>numTokens</code>; if false then
|
||||
* <code>numOverlapTokens</code> will be assumed to be 0 (see
|
||||
* {@link DefaultSimilarity#computeNorm(String, FieldInvertState)} for details).
|
||||
*
|
||||
* @see #lengthNorm
|
||||
*/
|
||||
public void setLengthNormFactors(String field, int min, int max,
|
||||
float steepness, boolean discountOverlaps) {
|
||||
ln_mins.put(field, new Integer(min));
|
||||
ln_maxs.put(field, new Integer(max));
|
||||
ln_steeps.put(field, new Float(steepness));
|
||||
ln_overlaps.put(field, new Boolean(discountOverlaps));
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as <code> state.getBoost() *
|
||||
* lengthNorm(fieldName, numTokens) </code> where
|
||||
* numTokens does not count overlap tokens if
|
||||
* discountOverlaps is true by default or true for this
|
||||
* specific field. */
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
final int numTokens;
|
||||
boolean overlaps = discountOverlaps;
|
||||
if (ln_overlaps.containsKey(fieldName)) {
|
||||
overlaps = ((Boolean)ln_overlaps.get(fieldName)).booleanValue();
|
||||
}
|
||||
if (overlaps)
|
||||
numTokens = state.getLength() - state.getNumOverlap();
|
||||
else
|
||||
numTokens = state.getLength();
|
||||
|
||||
return state.getBoost() * lengthNorm(fieldName, numTokens);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -98,13 +98,19 @@ public abstract class AbstractField implements Fieldable {
|
|||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
|
||||
* used to compute the norm factor for the field. By
|
||||
* default, in the {@link
|
||||
* org.apache.lucene.search.Similarity#computeNorm(String,
|
||||
* FieldInvertState)} method, the boost value is multipled
|
||||
* by the {@link
|
||||
* org.apache.lucene.search.Similarity#lengthNorm(String,
|
||||
* int)} and then
|
||||
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
|
||||
* @see org.apache.lucene.search.Similarity#computeNorm(String, org.apache.lucene.index.FieldInvertState)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.document;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
|
@ -39,13 +40,18 @@ public interface Fieldable extends Serializable {
|
|||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
|
||||
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
|
||||
* used to compute the norm factor for the field. By
|
||||
* default, in the {@link
|
||||
* org.apache.lucene.search.Similarity#computeNorm(String,
|
||||
* FieldInvertState)} method, the boost value is multipled
|
||||
* by the {@link
|
||||
* org.apache.lucene.search.Similarity#lengthNorm(String,
|
||||
* int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
|
||||
* @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
|
||||
*/
|
||||
void setBoost(float boost);
|
||||
|
|
|
@ -92,18 +92,4 @@ final class DocInverter extends DocFieldConsumer {
|
|||
public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) {
|
||||
return new DocInverterPerThread(docFieldProcessorPerThread, this);
|
||||
}
|
||||
|
||||
final static class FieldInvertState {
|
||||
int position;
|
||||
int length;
|
||||
int offset;
|
||||
float boost;
|
||||
|
||||
void reset(float docBoost) {
|
||||
position = 0;
|
||||
length = 0;
|
||||
offset = 0;
|
||||
boost = docBoost;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
final InvertedDocConsumerPerField consumer;
|
||||
final InvertedDocEndConsumerPerField endConsumer;
|
||||
final DocumentsWriter.DocState docState;
|
||||
final DocInverter.FieldInvertState fieldState;
|
||||
final FieldInvertState fieldState;
|
||||
|
||||
public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) {
|
||||
this.perThread = perThread;
|
||||
|
@ -134,7 +134,11 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
Token token = stream.next(localToken);
|
||||
|
||||
if (token == null) break;
|
||||
fieldState.position += (token.getPositionIncrement() - 1);
|
||||
final int posIncr = token.getPositionIncrement();
|
||||
fieldState.position += posIncr - 1;
|
||||
if (posIncr == 0)
|
||||
fieldState.numOverlap++;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
// If we hit an exception in here, we abort
|
||||
|
|
|
@ -32,7 +32,7 @@ final class DocInverterPerThread extends DocFieldConsumerPerThread {
|
|||
final Token localToken = new Token();
|
||||
final DocumentsWriter.DocState docState;
|
||||
|
||||
final DocInverter.FieldInvertState fieldState = new DocInverter.FieldInvertState();
|
||||
final FieldInvertState fieldState = new FieldInvertState();
|
||||
|
||||
// Used to read a string value for a field
|
||||
final ReusableStringReader stringReader = new ReusableStringReader();
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import org.apache.lucene.search.Similarity;
|
||||
|
||||
/**
|
||||
* This class tracks the number and position / offset parameters of terms
|
||||
* being added to the index. The information collected in this class is
|
||||
* also used to calculate the normalization factor for a field.
|
||||
*
|
||||
* <p><b>WARNING</b>: This API is new and experimental, and may suddenly
|
||||
* change.</p>
|
||||
*/
|
||||
public final class FieldInvertState {
|
||||
int position;
|
||||
int length;
|
||||
int numOverlap;
|
||||
int offset;
|
||||
float boost;
|
||||
|
||||
public FieldInvertState() {
|
||||
}
|
||||
|
||||
public FieldInvertState(int position, int length, int numOverlap, int offset, float boost) {
|
||||
this.position = position;
|
||||
this.length = length;
|
||||
this.numOverlap = numOverlap;
|
||||
this.offset = offset;
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-initialize the state, using this boost value.
|
||||
* @param docBoost boost value to use.
|
||||
*/
|
||||
void reset(float docBoost) {
|
||||
position = 0;
|
||||
length = 0;
|
||||
numOverlap = 0;
|
||||
offset = 0;
|
||||
boost = docBoost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the last processed term position.
|
||||
* @return the position
|
||||
*/
|
||||
public int getPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get total number of terms in this field.
|
||||
* @return the length
|
||||
*/
|
||||
public int getLength() {
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of terms with <code>positionIncrement == 0</code>.
|
||||
* @return the numOverlap
|
||||
*/
|
||||
public int getNumOverlap() {
|
||||
return numOverlap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get end offset of the last processed term.
|
||||
* @return the offset
|
||||
*/
|
||||
public int getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get boost value. This is the cumulative product of
|
||||
* document boost and field boost for all field instances
|
||||
* sharing the same field name.
|
||||
* @return the boost
|
||||
*/
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
}
|
||||
}
|
|
@ -30,7 +30,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
final TermsHashPerField termsHashPerField;
|
||||
final FieldInfo fieldInfo;
|
||||
final DocumentsWriter.DocState docState;
|
||||
final DocInverter.FieldInvertState fieldState;
|
||||
final FieldInvertState fieldState;
|
||||
boolean omitTf;
|
||||
|
||||
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) {
|
||||
|
|
|
@ -36,7 +36,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
|
|||
byte[] norms = new byte[1];
|
||||
int upto;
|
||||
|
||||
final DocInverter.FieldInvertState fieldState;
|
||||
final FieldInvertState fieldState;
|
||||
|
||||
public void reset() {
|
||||
// Shrink back if we are overallocated now:
|
||||
|
@ -68,7 +68,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
|
|||
docIDs = ArrayUtil.grow(docIDs, 1+upto);
|
||||
norms = ArrayUtil.grow(norms, 1+upto);
|
||||
}
|
||||
final float norm = fieldState.boost * docState.similarity.lengthNorm(fieldInfo.name, fieldState.length);
|
||||
final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
|
||||
norms[upto] = Similarity.encodeNorm(norm);
|
||||
docIDs[upto] = docState.docID;
|
||||
upto++;
|
||||
|
|
|
@ -30,7 +30,7 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
|
|||
final TermVectorsTermsWriter termsWriter;
|
||||
final FieldInfo fieldInfo;
|
||||
final DocumentsWriter.DocState docState;
|
||||
final DocInverter.FieldInvertState fieldState;
|
||||
final FieldInvertState fieldState;
|
||||
|
||||
boolean doVectors;
|
||||
boolean doVectorPositions;
|
||||
|
|
|
@ -30,7 +30,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
|||
final TermsHashPerField nextPerField;
|
||||
final TermsHashPerThread perThread;
|
||||
final DocumentsWriter.DocState docState;
|
||||
final DocInverter.FieldInvertState fieldState;
|
||||
final FieldInvertState fieldState;
|
||||
|
||||
// Copied from our perThread
|
||||
final CharBlockPool charPool;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -19,6 +21,25 @@ package org.apache.lucene.search;
|
|||
|
||||
/** Expert: Default scoring implementation. */
|
||||
public class DefaultSimilarity extends Similarity {
|
||||
|
||||
/** Implemented as
|
||||
* <code>state.getBoost()*lengthNorm(numTerms)</code>, where
|
||||
* <code>numTerms</code> is {@link FieldInvertState#getLength()} if {@link
|
||||
* #setDiscountOverlaps} is false, else it's {@link
|
||||
* FieldInvertState#getLength()} - {@link
|
||||
* FieldInvertState#getNumOverlap()}.
|
||||
*
|
||||
* <p><b>WARNING</b>: This API is new and experimental, and may suddenly
|
||||
* change.</p> */
|
||||
public float computeNorm(String field, FieldInvertState state) {
|
||||
final int numTerms;
|
||||
if (discountOverlaps)
|
||||
numTerms = state.getLength() - state.getNumOverlap();
|
||||
else
|
||||
numTerms = state.getLength();
|
||||
return (float) (state.getBoost() * lengthNorm(field, numTerms));
|
||||
}
|
||||
|
||||
/** Implemented as <code>1/sqrt(numTerms)</code>. */
|
||||
public float lengthNorm(String fieldName, int numTerms) {
|
||||
return (float)(1.0 / Math.sqrt(numTerms));
|
||||
|
@ -48,4 +69,26 @@ public class DefaultSimilarity extends Similarity {
|
|||
public float coord(int overlap, int maxOverlap) {
|
||||
return overlap / (float)maxOverlap;
|
||||
}
|
||||
|
||||
// Default false
|
||||
protected boolean discountOverlaps;
|
||||
|
||||
/** Determines whether overlap tokens (Tokens with
|
||||
* 0 position increment) are ignored when computing
|
||||
* norm. By default this is false, meaning overlap
|
||||
* tokens are counted just like non-overlap tokens.
|
||||
*
|
||||
* <p><b>WARNING</b>: This API is new and experimental, and may suddenly
|
||||
* change.</p>
|
||||
*
|
||||
* @see #computeNorm
|
||||
*/
|
||||
public void setDiscountOverlaps(boolean v) {
|
||||
discountOverlaps = v;
|
||||
}
|
||||
|
||||
/** @see #setDiscountOverlaps */
|
||||
public boolean getDiscountOverlaps() {
|
||||
return discountOverlaps;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
|
||||
|
@ -333,6 +334,29 @@ public abstract class Similarity implements Serializable {
|
|||
return NORM_TABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the normalization value for a field, given the accumulated
|
||||
* state of term processing for this field (see {@link FieldInvertState}).
|
||||
*
|
||||
* <p>Implementations should calculate a float value based on the field
|
||||
* state and then return that value.
|
||||
*
|
||||
* <p>For backward compatibility this method by default calls
|
||||
* {@link #lengthNorm(String, int)} passing
|
||||
* {@link FieldInvertState#getLength()} as the second argument, and
|
||||
* then multiplies this value by {@link FieldInvertState#getBoost()}.</p>
|
||||
*
|
||||
* <p><b>WARNING</b>: This API is new and experimental and may
|
||||
* suddenly change.</p>
|
||||
*
|
||||
* @param field field name
|
||||
* @param state current processing state for this field
|
||||
* @return the calculated float norm
|
||||
*/
|
||||
public float computeNorm(String field, FieldInvertState state) {
|
||||
return (float) (state.getBoost() * lengthNorm(field, state.getLength()));
|
||||
}
|
||||
|
||||
/** Computes the normalization value for a field given the total number of
|
||||
* terms contained in a field. These values, together with field boosts, are
|
||||
* stored in an index and multipled into scores for hits on each field by the
|
||||
|
@ -342,9 +366,9 @@ public abstract class Similarity implements Serializable {
|
|||
* method usually return smaller values when <code>numTokens</code> is large,
|
||||
* and larger values when <code>numTokens</code> is small.
|
||||
*
|
||||
* <p>That these values are computed under
|
||||
* <p>Note that the return values are computed under
|
||||
* {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document)}
|
||||
* and stored then using
|
||||
* and then stored using
|
||||
* {@link #encodeNorm(float)}.
|
||||
* Thus they have limited precision, and documents
|
||||
* must be re-indexed if this method is altered.
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -32,6 +34,10 @@ public class SimilarityDelegator extends Similarity {
|
|||
this.delegee = delegee;
|
||||
}
|
||||
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
return delegee.computeNorm(fieldName, state);
|
||||
}
|
||||
|
||||
public float lengthNorm(String fieldName, int numTerms) {
|
||||
return delegee.lengthNorm(fieldName, numTerms);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue