mirror of https://github.com/apache/lucene.git
LUCENE-2912: remove field param from computeNorm, scorePayload ; remove UOE'd lengthNorm, switch SweetSpot to per-field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1069980 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4953202fba
commit
a01e9cbb86
|
@ -332,8 +332,12 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
|
|||
toString(), port your customization over to reflectWith(). reflectAsString() would
|
||||
then return what toString() did before.
|
||||
|
||||
* LUCENE-2236: DefaultSimilarity can no longer be set statically (and dangerously) for the entire JVM.
|
||||
* LUCENE-2236, LUCENE-2912: DefaultSimilarity can no longer be set statically
|
||||
(and dangerously) for the entire JVM.
|
||||
Instead, IndexWriterConfig and IndexSearcher now take a SimilarityProvider.
|
||||
Similarity can now be configured on a per-field basis.
|
||||
Similarity retains only the field-specific relevance methods such as tf() and idf().
|
||||
Previously some (but not all) of these methods, such as computeNorm and scorePayload took
|
||||
field as a parameter, this is removed due to the fact the entire Similarity (all methods)
|
||||
can now be configured per-field.
|
||||
Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider.
|
||||
|
|
|
@ -38,6 +38,11 @@ API Changes
|
|||
* LUCENE-2638 MakeHighFreqTerms.TermStats public to make it more useful
|
||||
for API use. (Andrzej Bialecki)
|
||||
|
||||
* LUCENE-2912: The field-specific hashmaps in SweetSpotSimilarity were removed.
|
||||
Instead, use SimilarityProvider to return different SweetSpotSimilaritys
|
||||
for different fields, this way all parameters (such as TF factors) can be
|
||||
customized on a per-field basis. (Robert Muir)
|
||||
|
||||
======================= Lucene 3.x (not yet released) =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -241,7 +241,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
final FieldInvertState invertState = new FieldInvertState();
|
||||
invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost());
|
||||
invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
|
||||
final float norm = similarityProvider.get(fieldName).computeNorm(fieldName, invertState);
|
||||
final float norm = similarityProvider.get(fieldName).computeNorm(invertState);
|
||||
normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).encodeNormValue(norm);
|
||||
} else {
|
||||
System.currentTimeMillis();
|
||||
|
|
|
@ -1190,7 +1190,7 @@ public class MemoryIndex {
|
|||
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
|
||||
float boost = info != null ? info.getBoost() : 1.0f;
|
||||
FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
|
||||
float n = fieldSim.computeNorm(fieldName, invertState);
|
||||
float n = fieldSim.computeNorm(invertState);
|
||||
byte norm = fieldSim.encodeNormValue(n);
|
||||
norms = new byte[] {norm};
|
||||
|
||||
|
|
|
@ -149,7 +149,7 @@ public class FieldNormModifier {
|
|||
for (int d = 0; d < termCounts.length; d++) {
|
||||
if (delDocs == null || !delDocs.get(d)) {
|
||||
invertState.setLength(termCounts[d]);
|
||||
subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(fieldName, invertState)));
|
||||
subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(invertState)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,9 +20,6 @@ package org.apache.lucene.misc;
|
|||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* A similarity with a lengthNorm that provides for a "plateau" of
|
||||
* equally good lengths, and tf helper functions.
|
||||
|
@ -50,11 +47,6 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
private int ln_max = 1;
|
||||
private float ln_steep = 0.5f;
|
||||
|
||||
private Map<String,Number> ln_maxs = new HashMap<String,Number>(7);
|
||||
private Map<String,Number> ln_mins = new HashMap<String,Number>(7);
|
||||
private Map<String,Float> ln_steeps = new HashMap<String,Float>(7);
|
||||
private Map<String,Boolean> ln_overlaps = new HashMap<String,Boolean>(7);
|
||||
|
||||
private float tf_base = 0.0f;
|
||||
private float tf_min = 0.0f;
|
||||
|
||||
|
@ -98,55 +90,31 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
* Sets the default function variables used by lengthNorm when no field
|
||||
* specific variables have been set.
|
||||
*
|
||||
* @see #lengthNorm
|
||||
* @see #computeLengthNorm
|
||||
*/
|
||||
public void setLengthNormFactors(int min, int max, float steepness) {
|
||||
public void setLengthNormFactors(int min, int max, float steepness, boolean discountOverlaps) {
|
||||
this.ln_min = min;
|
||||
this.ln_max = max;
|
||||
this.ln_steep = steepness;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the function variables used by lengthNorm for a specific named field.
|
||||
*
|
||||
* @param field field name
|
||||
* @param min minimum value
|
||||
* @param max maximum value
|
||||
* @param steepness steepness of the curve
|
||||
* @param discountOverlaps if true, <code>numOverlapTokens</code> will be
|
||||
* subtracted from <code>numTokens</code>; if false then
|
||||
* <code>numOverlapTokens</code> will be assumed to be 0 (see
|
||||
* {@link DefaultSimilarity#computeNorm(String, FieldInvertState)} for details).
|
||||
*
|
||||
* @see #lengthNorm
|
||||
*/
|
||||
public void setLengthNormFactors(String field, int min, int max,
|
||||
float steepness, boolean discountOverlaps) {
|
||||
ln_mins.put(field, Integer.valueOf(min));
|
||||
ln_maxs.put(field, Integer.valueOf(max));
|
||||
ln_steeps.put(field, Float.valueOf(steepness));
|
||||
ln_overlaps.put(field, new Boolean(discountOverlaps));
|
||||
this.discountOverlaps = discountOverlaps;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as <code> state.getBoost() *
|
||||
* lengthNorm(fieldName, numTokens) </code> where
|
||||
* computeLengthNorm(numTokens) </code> where
|
||||
* numTokens does not count overlap tokens if
|
||||
* discountOverlaps is true by default or true for this
|
||||
* specific field. */
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
final int numTokens;
|
||||
boolean overlaps = discountOverlaps;
|
||||
if (ln_overlaps.containsKey(fieldName)) {
|
||||
overlaps = ln_overlaps.get(fieldName).booleanValue();
|
||||
}
|
||||
if (overlaps)
|
||||
|
||||
if (discountOverlaps)
|
||||
numTokens = state.getLength() - state.getNumOverlap();
|
||||
else
|
||||
numTokens = state.getLength();
|
||||
|
||||
return state.getBoost() * computeLengthNorm(fieldName, numTokens);
|
||||
return state.getBoost() * computeLengthNorm(numTokens);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -167,20 +135,10 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
*
|
||||
* @see #setLengthNormFactors
|
||||
*/
|
||||
public float computeLengthNorm(String fieldName, int numTerms) {
|
||||
int l = ln_min;
|
||||
int h = ln_max;
|
||||
float s = ln_steep;
|
||||
|
||||
if (ln_mins.containsKey(fieldName)) {
|
||||
l = ln_mins.get(fieldName).intValue();
|
||||
}
|
||||
if (ln_maxs.containsKey(fieldName)) {
|
||||
h = ln_maxs.get(fieldName).intValue();
|
||||
}
|
||||
if (ln_steeps.containsKey(fieldName)) {
|
||||
s = ln_steeps.get(fieldName).floatValue();
|
||||
}
|
||||
public float computeLengthNorm(int numTerms) {
|
||||
final int l = ln_min;
|
||||
final int h = ln_max;
|
||||
final float s = ln_steep;
|
||||
|
||||
return (float)
|
||||
(1.0f /
|
||||
|
|
|
@ -44,7 +44,7 @@ public class TestFieldNormModifier extends LuceneTestCase {
|
|||
/** inverts the normal notion of lengthNorm */
|
||||
public static SimilarityProvider s = new DefaultSimilarity() {
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
|
||||
}
|
||||
};
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.misc;
|
|||
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
|
||||
|
@ -30,8 +31,8 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
|
||||
public void testSweetSpotComputeNorm() {
|
||||
|
||||
SweetSpotSimilarity ss = new SweetSpotSimilarity();
|
||||
ss.setLengthNormFactors(1,1,0.5f);
|
||||
final SweetSpotSimilarity ss = new SweetSpotSimilarity();
|
||||
ss.setLengthNormFactors(1,1,0.5f,true);
|
||||
|
||||
Similarity d = new DefaultSimilarity();
|
||||
Similarity s = ss;
|
||||
|
@ -43,28 +44,28 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
for (int i = 1; i < 1000; i++) {
|
||||
invertState.setLength(i);
|
||||
assertEquals("base case: i="+i,
|
||||
d.computeNorm("foo", invertState),
|
||||
s.computeNorm("foo", invertState),
|
||||
d.computeNorm(invertState),
|
||||
s.computeNorm(invertState),
|
||||
0.0f);
|
||||
}
|
||||
|
||||
// make a sweet spot
|
||||
|
||||
ss.setLengthNormFactors(3,10,0.5f);
|
||||
ss.setLengthNormFactors(3,10,0.5f,true);
|
||||
|
||||
for (int i = 3; i <=10; i++) {
|
||||
invertState.setLength(i);
|
||||
assertEquals("3,10: spot i="+i,
|
||||
1.0f,
|
||||
s.computeNorm("foo", invertState),
|
||||
s.computeNorm(invertState),
|
||||
0.0f);
|
||||
}
|
||||
|
||||
for (int i = 10; i < 1000; i++) {
|
||||
invertState.setLength(i-9);
|
||||
final float normD = d.computeNorm("foo", invertState);
|
||||
final float normD = d.computeNorm(invertState);
|
||||
invertState.setLength(i);
|
||||
final float normS = s.computeNorm("foo", invertState);
|
||||
final float normS = s.computeNorm(invertState);
|
||||
assertEquals("3,10: 10<x : i="+i,
|
||||
normD,
|
||||
normS,
|
||||
|
@ -74,22 +75,42 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
|
||||
// seperate sweet spot for certain fields
|
||||
|
||||
ss.setLengthNormFactors("bar",8,13, 0.5f, false);
|
||||
ss.setLengthNormFactors("yak",6,9, 0.5f, false);
|
||||
|
||||
final SweetSpotSimilarity ssBar = new SweetSpotSimilarity();
|
||||
ssBar.setLengthNormFactors(8,13, 0.5f, false);
|
||||
final SweetSpotSimilarity ssYak = new SweetSpotSimilarity();
|
||||
ssYak.setLengthNormFactors(6,9, 0.5f, false);
|
||||
final SweetSpotSimilarity ssA = new SweetSpotSimilarity();
|
||||
ssA.setLengthNormFactors(5,8,0.5f, false);
|
||||
final SweetSpotSimilarity ssB = new SweetSpotSimilarity();
|
||||
ssB.setLengthNormFactors(5,8,0.1f, false);
|
||||
|
||||
SimilarityProvider sp = new SweetSpotSimilarity() {
|
||||
public Similarity get(String field) {
|
||||
if (field.equals("bar"))
|
||||
return ssBar;
|
||||
else if (field.equals("yak"))
|
||||
return ssYak;
|
||||
else if (field.equals("a"))
|
||||
return ssA;
|
||||
else if (field.equals("b"))
|
||||
return ssB;
|
||||
else
|
||||
return ss;
|
||||
}
|
||||
};
|
||||
|
||||
for (int i = 3; i <=10; i++) {
|
||||
invertState.setLength(i);
|
||||
assertEquals("f: 3,10: spot i="+i,
|
||||
1.0f,
|
||||
s.computeNorm("foo", invertState),
|
||||
sp.get("foo").computeNorm(invertState),
|
||||
0.0f);
|
||||
}
|
||||
for (int i = 10; i < 1000; i++) {
|
||||
invertState.setLength(i-9);
|
||||
final float normD = d.computeNorm("foo", invertState);
|
||||
final float normD = d.computeNorm(invertState);
|
||||
invertState.setLength(i);
|
||||
final float normS = s.computeNorm("foo", invertState);
|
||||
final float normS = sp.get("foo").computeNorm(invertState);
|
||||
assertEquals("f: 3,10: 10<x : i="+i,
|
||||
normD,
|
||||
normS,
|
||||
|
@ -99,21 +120,21 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
invertState.setLength(i);
|
||||
assertEquals("f: 8,13: spot i="+i,
|
||||
1.0f,
|
||||
s.computeNorm("bar", invertState),
|
||||
sp.get("bar").computeNorm(invertState),
|
||||
0.0f);
|
||||
}
|
||||
for (int i = 6; i <=9; i++) {
|
||||
invertState.setLength(i);
|
||||
assertEquals("f: 6,9: spot i="+i,
|
||||
1.0f,
|
||||
s.computeNorm("yak", invertState),
|
||||
sp.get("yak").computeNorm(invertState),
|
||||
0.0f);
|
||||
}
|
||||
for (int i = 13; i < 1000; i++) {
|
||||
invertState.setLength(i-12);
|
||||
final float normD = d.computeNorm("foo", invertState);
|
||||
final float normD = d.computeNorm(invertState);
|
||||
invertState.setLength(i);
|
||||
final float normS = s.computeNorm("bar", invertState);
|
||||
final float normS = sp.get("bar").computeNorm(invertState);
|
||||
assertEquals("f: 8,13: 13<x : i="+i,
|
||||
normD,
|
||||
normS,
|
||||
|
@ -121,9 +142,9 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
}
|
||||
for (int i = 9; i < 1000; i++) {
|
||||
invertState.setLength(i-8);
|
||||
final float normD = d.computeNorm("foo", invertState);
|
||||
final float normD = d.computeNorm(invertState);
|
||||
invertState.setLength(i);
|
||||
final float normS = s.computeNorm("yak", invertState);
|
||||
final float normS = sp.get("yak").computeNorm(invertState);
|
||||
assertEquals("f: 6,9: 9<x : i="+i,
|
||||
normD,
|
||||
normS,
|
||||
|
@ -133,13 +154,10 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
|
||||
// steepness
|
||||
|
||||
ss.setLengthNormFactors("a",5,8,0.5f, false);
|
||||
ss.setLengthNormFactors("b",5,8,0.1f, false);
|
||||
|
||||
for (int i = 9; i < 1000; i++) {
|
||||
invertState.setLength(i);
|
||||
final float normSS = ss.computeNorm("a", invertState);
|
||||
final float normS = s.computeNorm("b", invertState);
|
||||
final float normSS = sp.get("a").computeNorm(invertState);
|
||||
final float normS = sp.get("b").computeNorm(invertState);
|
||||
assertTrue("s: i="+i+" : a="+normSS+
|
||||
" < b="+normS,
|
||||
normSS < normS);
|
||||
|
|
|
@ -49,7 +49,7 @@ public class TestLengthNormModifier extends LuceneTestCase {
|
|||
/** inverts the normal notion of lengthNorm */
|
||||
public static SimilarityProvider s = new DefaultSimilarity() {
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
|
||||
}
|
||||
};
|
||||
|
@ -165,7 +165,7 @@ public class TestLengthNormModifier extends LuceneTestCase {
|
|||
// override the norms to be inverted
|
||||
SimilarityProvider s = new DefaultSimilarity() {
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
|
||||
}
|
||||
};
|
||||
|
|
|
@ -76,17 +76,14 @@ public abstract class AbstractField implements Fieldable {
|
|||
* name, all such values are multiplied together. This product is then
|
||||
* used to compute the norm factor for the field. By
|
||||
* default, in the {@link
|
||||
* org.apache.lucene.search.Similarity#computeNorm(String,
|
||||
* FieldInvertState)} method, the boost value is multiplied
|
||||
* by the {@link
|
||||
* org.apache.lucene.search.Similarity#lengthNorm(String,
|
||||
* int)} and then
|
||||
* org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied
|
||||
* by the length normalization factor and then
|
||||
* rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState)
|
||||
* @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNormValue(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
|
|
|
@ -43,16 +43,14 @@ public interface Fieldable {
|
|||
* name, all such values are multiplied together. This product is then
|
||||
* used to compute the norm factor for the field. By
|
||||
* default, in the {@link
|
||||
* org.apache.lucene.search.Similarity#computeNorm(String,
|
||||
* FieldInvertState)} method, the boost value is multiplied
|
||||
* by the {@link
|
||||
* org.apache.lucene.search.Similarity#lengthNorm(String,
|
||||
* int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the
|
||||
* org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied
|
||||
* by the length normalization factor
|
||||
* and then rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState)
|
||||
* @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNormValue(float)
|
||||
*/
|
||||
void setBoost(float boost);
|
||||
|
|
|
@ -1004,8 +1004,8 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
|
||||
/** Expert: Resets the normalization factor for the named field of the named
|
||||
* document. The norm represents the product of the field's {@link
|
||||
* org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
|
||||
* int) length normalization}. Thus, to preserve the length normalization
|
||||
* org.apache.lucene.document.Fieldable#setBoost(float) boost} and its
|
||||
* length normalization}. Thus, to preserve the length normalization
|
||||
* values when resetting this, one should base the new value upon the old.
|
||||
*
|
||||
* <b>NOTE:</b> If this field does not store norms, then
|
||||
|
|
|
@ -74,7 +74,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
|
|||
assert norms.length == upto;
|
||||
norms = ArrayUtil.grow(norms, 1+upto);
|
||||
}
|
||||
final float norm = similarity.computeNorm(fieldInfo.name, fieldState);
|
||||
final float norm = similarity.computeNorm(fieldState);
|
||||
norms[upto] = similarity.encodeNormValue(norm);
|
||||
docIDs[upto] = docState.docID;
|
||||
upto++;
|
||||
|
|
|
@ -31,7 +31,7 @@ public class DefaultSimilarity extends Similarity implements SimilarityProvider
|
|||
*
|
||||
* @lucene.experimental */
|
||||
@Override
|
||||
public float computeNorm(String field, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
final int numTerms;
|
||||
if (discountOverlaps)
|
||||
numTerms = state.getLength() - state.getNumOverlap();
|
||||
|
|
|
@ -561,49 +561,13 @@ public abstract class Similarity {
|
|||
* Thus they have limited precision, and documents
|
||||
* must be re-indexed if this method is altered.
|
||||
*
|
||||
* <p>For backward compatibility this method by default calls
|
||||
* {@link #lengthNorm(String, int)} passing
|
||||
* {@link FieldInvertState#getLength()} as the second argument, and
|
||||
* then multiplies this value by {@link FieldInvertState#getBoost()}.</p>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* @param field field name
|
||||
* @param state current processing state for this field
|
||||
* @return the calculated float norm
|
||||
*/
|
||||
public abstract float computeNorm(String field, FieldInvertState state);
|
||||
public abstract float computeNorm(FieldInvertState state);
|
||||
|
||||
/** Computes the normalization value for a field given the total number of
|
||||
* terms contained in a field. These values, together with field boosts, are
|
||||
* stored in an index and multipled into scores for hits on each field by the
|
||||
* search code.
|
||||
*
|
||||
* <p>Matches in longer fields are less precise, so implementations of this
|
||||
* method usually return smaller values when <code>numTokens</code> is large,
|
||||
* and larger values when <code>numTokens</code> is small.
|
||||
*
|
||||
* <p>Note that the return values are computed under
|
||||
* {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document)}
|
||||
* and then stored using
|
||||
* {@link #encodeNormValue(float)}.
|
||||
* Thus they have limited precision, and documents
|
||||
* must be re-indexed if this method is altered.
|
||||
*
|
||||
* @param fieldName the name of the field
|
||||
* @param numTokens the total number of tokens contained in fields named
|
||||
* <i>fieldName</i> of <i>doc</i>.
|
||||
* @return a normalization factor for hits on this field of this document
|
||||
*
|
||||
* @see org.apache.lucene.document.Field#setBoost(float)
|
||||
*
|
||||
* @deprecated Please override computeNorm instead
|
||||
*/
|
||||
@Deprecated
|
||||
public final float lengthNorm(String fieldName, int numTokens) {
|
||||
throw new UnsupportedOperationException("please use computeNorm instead");
|
||||
}
|
||||
|
||||
/** Encodes a normalization factor for storage in an index.
|
||||
*
|
||||
* <p>The encoding uses a three-bit mantissa, a five-bit exponent, and
|
||||
|
@ -781,7 +745,6 @@ public abstract class Similarity {
|
|||
* The default implementation returns 1.
|
||||
*
|
||||
* @param docId The docId currently being scored. If this value is {@link #NO_DOC_ID_PROVIDED}, then it should be assumed that the PayloadQuery implementation does not provide document information
|
||||
* @param fieldName The fieldName of the term this payload belongs to
|
||||
* @param start The start position of the payload
|
||||
* @param end The end position of the payload
|
||||
* @param payload The payload byte array to be scored
|
||||
|
@ -791,7 +754,7 @@ public abstract class Similarity {
|
|||
*
|
||||
*/
|
||||
// TODO: maybe switch this API to BytesRef?
|
||||
public float scorePayload(int docId, String fieldName, int start, int end, byte [] payload, int offset, int length)
|
||||
public float scorePayload(int docId, int start, int end, byte [] payload, int offset, int length)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -192,7 +192,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
protected void processPayloads(Collection<byte[]> payLoads, int start, int end) {
|
||||
for (final byte[] thePayload : payLoads) {
|
||||
payloadScore = function.currentScore(doc, fieldName, start, end,
|
||||
payloadsSeen, payloadScore, similarity.scorePayload(doc, fieldName,
|
||||
payloadsSeen, payloadScore, similarity.scorePayload(doc,
|
||||
spans.start(), spans.end(), thePayload, 0, thePayload.length));
|
||||
++payloadsSeen;
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ import java.io.IOException;
|
|||
* {@link org.apache.lucene.index.Term} occurs.
|
||||
* <p>
|
||||
* In order to take advantage of this, you must override
|
||||
* {@link org.apache.lucene.search.Similarity#scorePayload(int, String, int, int, byte[],int,int)}
|
||||
* {@link org.apache.lucene.search.Similarity#scorePayload(int, int, int, byte[],int,int)}
|
||||
* which returns 1 by default.
|
||||
* <p>
|
||||
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
|
||||
|
@ -119,14 +119,14 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
if (payload != null) {
|
||||
payloadScore = function.currentScore(doc, term.field(),
|
||||
spans.start(), spans.end(), payloadsSeen, payloadScore,
|
||||
similarity.scorePayload(doc, term.field(), spans.start(),
|
||||
similarity.scorePayload(doc, spans.start(),
|
||||
spans.end(), payload.bytes,
|
||||
payload.offset,
|
||||
payload.length));
|
||||
} else {
|
||||
payloadScore = function.currentScore(doc, term.field(),
|
||||
spans.start(), spans.end(), payloadsSeen, payloadScore,
|
||||
similarity.scorePayload(doc, term.field(), spans.start(),
|
||||
similarity.scorePayload(doc, spans.start(),
|
||||
spans.end(), null,
|
||||
0,
|
||||
0));
|
||||
|
|
|
@ -43,7 +43,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
|
||||
private class SimilarityOne extends DefaultSimilarity {
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
// diable length norm
|
||||
return state.getBoost();
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ public class TestMaxTermFrequency extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public float computeNorm(String field, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
return (float) state.getMaxTermFrequency();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
|
||||
private class SimilarityOne extends DefaultSimilarity {
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
// Disable length norm
|
||||
return state.getBoost();
|
||||
}
|
||||
|
@ -252,7 +252,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public float computeNorm(String field, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
return (float) state.getLength();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ import org.apache.lucene.search.Explanation.IDFExplanation;
|
|||
public class TestOmitTf extends LuceneTestCase {
|
||||
|
||||
public static class SimpleSimilarity extends Similarity implements SimilarityProvider {
|
||||
@Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
|
||||
@Override public float computeNorm(FieldInvertState state) { return state.getBoost(); }
|
||||
@Override public float tf(float freq) { return freq; }
|
||||
@Override public float sloppyFreq(int distance) { return 2.0f; }
|
||||
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
|
||||
|
|
|
@ -248,7 +248,7 @@ final class JustCompileSearch {
|
|||
}
|
||||
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
|
|
@ -62,7 +62,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
// Disable length norm
|
||||
return state.getBoost();
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ import org.apache.lucene.search.Explanation.IDFExplanation;
|
|||
public class TestSimilarity extends LuceneTestCase {
|
||||
|
||||
public static class SimpleSimilarity extends Similarity implements SimilarityProvider {
|
||||
@Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
|
||||
@Override public float computeNorm(FieldInvertState state) { return state.getBoost(); }
|
||||
@Override public float tf(float freq) { return freq; }
|
||||
@Override public float sloppyFreq(int distance) { return 2.0f; }
|
||||
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
|
||||
|
|
|
@ -107,7 +107,7 @@ public class TestSimilarityProvider extends LuceneTestCase {
|
|||
|
||||
private class Sim1 extends Similarity {
|
||||
@Override
|
||||
public float computeNorm(String field, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
|
@ -129,7 +129,7 @@ public class TestSimilarityProvider extends LuceneTestCase {
|
|||
|
||||
private class Sim2 extends Similarity {
|
||||
@Override
|
||||
public float computeNorm(String field, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
return 10f;
|
||||
}
|
||||
|
||||
|
|
|
@ -299,14 +299,14 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
// must be static for weight serialization tests
|
||||
static class BoostingSimilarity extends DefaultSimilarity {
|
||||
|
||||
@Override public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
|
||||
@Override public float scorePayload(int docId, int start, int end, byte[] payload, int offset, int length) {
|
||||
//we know it is size 4 here, so ignore the offset/length
|
||||
return payload[offset];
|
||||
}
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
//Make everything else 1 so we see the effect of the payload
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
@Override public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
@Override public float computeNorm(FieldInvertState state) {
|
||||
return state.getBoost();
|
||||
}
|
||||
|
||||
|
|
|
@ -287,7 +287,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
|
||||
// TODO: Remove warning after API has been finalized
|
||||
@Override
|
||||
public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
|
||||
public float scorePayload(int docId, int start, int end, byte[] payload, int offset, int length) {
|
||||
//we know it is size 4 here, so ignore the offset/length
|
||||
return payload[offset];
|
||||
}
|
||||
|
@ -296,7 +296,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
//Make everything else 1 so we see the effect of the payload
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
public float computeNorm(FieldInvertState state) {
|
||||
return state.getBoost();
|
||||
}
|
||||
|
||||
|
|
|
@ -299,7 +299,7 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
|
|||
state.setBoost(1.0f);
|
||||
state.setLength(4);
|
||||
assertQ(req("fl","*,score","q", "{!func}norm(a_t)", "fq","id:2"),
|
||||
"//float[@name='score']='" + similarity.computeNorm("a_t",state) + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte
|
||||
"//float[@name='score']='" + similarity.computeNorm(state) + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte
|
||||
|
||||
// test that ord and rord are working on a global index basis, not just
|
||||
// at the segment level (since Lucene 2.9 has switched to per-segment searching)
|
||||
|
|
Loading…
Reference in New Issue