From 4f7eba849a001ae27d879482e9ee06313874be3b Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sat, 8 Jan 2011 19:20:05 +0000 Subject: [PATCH] LUCENE-2854: remove SimilarityDelegator; force cutover of Similarity.lengthNorm -> Similarity.computeNorm git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1056771 13f79535-47bb-0310-9956-ffa450edef68 --- .../instantiated/InstantiatedIndexWriter.java | 8 +- .../lucene/index/FieldNormModifier.java | 7 +- .../lucene/misc/SweetSpotSimilarity.java | 5 +- .../lucene/index/TestFieldNormModifier.java | 4 +- .../lucene/misc/SweetSpotSimilarityTest.java | 63 +++++++++++---- .../lucene/misc/TestLengthNormModifier.java | 21 ++--- .../lucene/search/FuzzyLikeThisQuery.java | 44 +---------- .../apache/lucene/index/FieldInvertState.java | 12 +++ .../apache/lucene/search/BooleanQuery.java | 38 ++++------ .../apache/lucene/search/BooleanScorer.java | 7 +- .../apache/lucene/search/BooleanScorer2.java | 52 +++++++------ .../lucene/search/ConjunctionScorer.java | 10 +-- .../lucene/search/DefaultSimilarity.java | 8 +- .../org/apache/lucene/search/Similarity.java | 30 ++++++-- .../lucene/search/SimilarityDelegator.java | 76 ------------------- .../index/TestIndexReaderCloneNorms.java | 5 +- .../org/apache/lucene/index/TestNorms.java | 5 +- .../org/apache/lucene/index/TestOmitTf.java | 2 +- .../lucene/search/JustCompileSearch.java | 3 +- .../lucene/search/TestBooleanScorer.java | 2 +- .../search/TestDisjunctionMaxQuery.java | 6 +- .../apache/lucene/search/TestSimilarity.java | 3 +- .../search/payloads/TestPayloadNearQuery.java | 5 +- .../search/payloads/TestPayloadTermQuery.java | 5 +- .../apache/lucene/search/spans/TestSpans.java | 2 +- .../apache/solr/search/SolrSimilarity.java | 37 --------- .../search/function/TestFunctionQuery.java | 6 +- 27 files changed, 191 insertions(+), 275 deletions(-) delete mode 100644 lucene/src/java/org/apache/lucene/search/SimilarityDelegator.java delete mode 100644 solr/src/java/org/apache/solr/search/SolrSimilarity.java diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java index 093b907193b..0bfa8bd6e26 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java @@ -38,6 +38,7 @@ import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermVectorOffsetInfo; @@ -235,9 +236,10 @@ public class InstantiatedIndexWriter implements Closeable { termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size(); if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) { - float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost; - norm *= document.getDocument().getBoost(); - norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength); + final FieldInvertState invertState = new FieldInvertState(); + invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost()); + invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength); + final float norm = similarity.computeNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, invertState); normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm); } else { System.currentTimeMillis(); diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java index 9cfd56803ca..14fa0970fd6 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java @@ -33,7 +33,7 @@ import org.apache.lucene.util.ReaderUtil; /** * Given a directory and a list of fields, updates the fieldNorms in place for every document. * - * If Similarity class is specified, uses its lengthNorm method to set norms. + * If Similarity class is specified, uses its computeNorm method to set norms. * If -n command line argument is used, removed field norms, as if * {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used. * @@ -119,6 +119,7 @@ public class FieldNormModifier { final List subReaders = new ArrayList(); ReaderUtil.gatherSubReaders(subReaders, reader); + final FieldInvertState invertState = new FieldInvertState(); for(IndexReader subReader : subReaders) { final Bits delDocs = subReader.getDeletedDocs(); @@ -143,9 +144,11 @@ public class FieldNormModifier { } } + invertState.setBoost(1.0f); for (int d = 0; d < termCounts.length; d++) { if (delDocs == null || !delDocs.get(d)) { - subReader.setNorm(d, fieldName, sim.encodeNormValue(sim.lengthNorm(fieldName, termCounts[d]))); + invertState.setLength(termCounts[d]); + subReader.setNorm(d, fieldName, sim.encodeNormValue(sim.computeNorm(fieldName, invertState))); } } } diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java b/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java index 00a60eb1a82..cda2f0790bf 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java @@ -146,7 +146,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity { else numTokens = state.getLength(); - return state.getBoost() * lengthNorm(fieldName, numTokens); + return state.getBoost() * computeLengthNorm(fieldName, numTokens); } /** @@ -167,8 +167,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity { * * @see #setLengthNormFactors */ - @Override - public float lengthNorm(String fieldName, int numTerms) { + public float computeLengthNorm(String fieldName, int numTerms) { int l = ln_min; int h = ln_max; float s = ln_steep; diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java index 078ba2c3f26..8066ea4e399 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java @@ -43,8 +43,8 @@ public class TestFieldNormModifier extends LuceneTestCase { /** inverts the normal notion of lengthNorm */ public static Similarity s = new DefaultSimilarity() { @Override - public float lengthNorm(String fieldName, int numTokens) { - return numTokens; + public float computeNorm(String fieldName, FieldInvertState state) { + return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength()); } }; diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java b/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java index 36450e23ae3..170ef247842 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java @@ -21,13 +21,14 @@ package org.apache.lucene.misc; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.index.FieldInvertState; /** * Test of the SweetSpotSimilarity */ public class SweetSpotSimilarityTest extends LuceneTestCase { - public void testSweetSpotLengthNorm() { + public void testSweetSpotComputeNorm() { SweetSpotSimilarity ss = new SweetSpotSimilarity(); ss.setLengthNormFactors(1,1,0.5f); @@ -37,10 +38,13 @@ public class SweetSpotSimilarityTest extends LuceneTestCase { // base case, should degrade - + final FieldInvertState invertState = new FieldInvertState(); + invertState.setBoost(1.0f); for (int i = 1; i < 1000; i++) { + invertState.setLength(i); assertEquals("base case: i="+i, - d.lengthNorm("foo",i), s.lengthNorm("foo",i), + d.computeNorm("foo", invertState), + s.computeNorm("foo", invertState), 0.0f); } @@ -49,14 +53,21 @@ public class SweetSpotSimilarityTest extends LuceneTestCase { ss.setLengthNormFactors(3,10,0.5f); for (int i = 3; i <=10; i++) { + invertState.setLength(i); assertEquals("3,10: spot i="+i, - 1.0f, s.lengthNorm("foo",i), + 1.0f, + s.computeNorm("foo", invertState), 0.0f); } for (int i = 10; i < 1000; i++) { + invertState.setLength(i-9); + final float normD = d.computeNorm("foo", invertState); + invertState.setLength(i); + final float normS = s.computeNorm("foo", invertState); assertEquals("3,10: 10positionIncrement == 0. * @return the numOverlap @@ -81,6 +85,10 @@ public final class FieldInvertState { return numOverlap; } + public void setNumOverlap(int numOverlap) { + this.numOverlap = numOverlap; + } + /** * Get end offset of the last processed term. * @return the offset @@ -99,6 +107,10 @@ public final class FieldInvertState { return boost; } + public void setBoost(float boost) { + this.boost = boost; + } + public AttributeSource getAttributeSource() { return attributeSource; } diff --git a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java index d756eff3bad..65523a6da7d 100644 --- a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java @@ -63,10 +63,12 @@ public class BooleanQuery extends Query implements Iterable { } private ArrayList clauses = new ArrayList(); - private boolean disableCoord; + private final boolean disableCoord; /** Constructs an empty boolean query. */ - public BooleanQuery() {} + public BooleanQuery() { + disableCoord = false; + } /** Constructs an empty boolean query. * @@ -87,22 +89,6 @@ public class BooleanQuery extends Query implements Iterable { */ public boolean isCoordDisabled() { return disableCoord; } - // Implement coord disabling. - // Inherit javadoc. - @Override - public Similarity getSimilarity(IndexSearcher searcher) { - Similarity result = super.getSimilarity(searcher); - if (disableCoord) { // disable coord as requested - result = new SimilarityDelegator(result) { - @Override - public float coord(int overlap, int maxOverlap) { - return 1.0f; - } - }; - } - return result; - } - /** * Specifies a minimum number of the optional BooleanClauses * which must be satisfied. @@ -179,10 +165,12 @@ public class BooleanQuery extends Query implements Iterable { protected Similarity similarity; protected ArrayList weights; protected int maxCoord; // num optional + num required + private final boolean disableCoord; - public BooleanWeight(IndexSearcher searcher) + public BooleanWeight(IndexSearcher searcher, boolean disableCoord) throws IOException { this.similarity = getSimilarity(searcher); + this.disableCoord = disableCoord; weights = new ArrayList(clauses.size()); for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); @@ -285,10 +273,10 @@ public class BooleanQuery extends Query implements Iterable { sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE); sumExpl.setValue(sum); - float coordFactor = similarity.coord(coord, maxCoord); - if (coordFactor == 1.0f) // coord is no-op + final float coordFactor = disableCoord ? 1.0f : similarity.coord(coord, maxCoord); + if (coordFactor == 1.0f) { return sumExpl; // eliminate wrapper - else { + } else { ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(), sum*coordFactor, "product of:"); @@ -324,7 +312,7 @@ public class BooleanQuery extends Query implements Iterable { // Check if we can return a BooleanScorer if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) { - return new BooleanScorer(this, similarity, minNrShouldMatch, optional, prohibited, maxCoord); + return new BooleanScorer(this, disableCoord, similarity, minNrShouldMatch, optional, prohibited, maxCoord); } if (required.size() == 0 && optional.size() == 0) { @@ -338,7 +326,7 @@ public class BooleanQuery extends Query implements Iterable { } // Return a BooleanScorer2 - return new BooleanScorer2(this, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord); + return new BooleanScorer2(this, disableCoord, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord); } @Override @@ -364,7 +352,7 @@ public class BooleanQuery extends Query implements Iterable { @Override public Weight createWeight(IndexSearcher searcher) throws IOException { - return new BooleanWeight(searcher); + return new BooleanWeight(searcher, disableCoord); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/src/java/org/apache/lucene/search/BooleanScorer.java index 3a90fe023ce..a4db5bdb4de 100644 --- a/lucene/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/lucene/src/java/org/apache/lucene/search/BooleanScorer.java @@ -197,9 +197,9 @@ final class BooleanScorer extends Scorer { private Bucket current; private int doc = -1; - BooleanScorer(Weight weight, Similarity similarity, int minNrShouldMatch, + BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch, List optionalScorers, List prohibitedScorers, int maxCoord) throws IOException { - super(similarity, weight); + super(null, weight); // Similarity not used this.minNrShouldMatch = minNrShouldMatch; if (optionalScorers != null && optionalScorers.size() > 0) { @@ -222,9 +222,8 @@ final class BooleanScorer extends Scorer { } coordFactors = new float[optionalScorers.size() + 1]; - Similarity sim = getSimilarity(); for (int i = 0; i < coordFactors.length; i++) { - coordFactors[i] = sim.coord(i, maxCoord); + coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord); } } diff --git a/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java b/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java index c8dcf2eba20..0d47da6ce58 100644 --- a/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java +++ b/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java @@ -42,14 +42,12 @@ class BooleanScorer2 extends Scorer { int maxCoord = 0; // to be increased for each non prohibited scorer int nrMatchers; // to be increased by score() of match counting scorers. - void init() { // use after all scorers have been added. + void init(Similarity sim, boolean disableCoord) { // use after all scorers have been added. coordFactors = new float[optionalScorers.size() + requiredScorers.size() + 1]; - Similarity sim = getSimilarity(); for (int i = 0; i < coordFactors.length; i++) { - coordFactors[i] = sim.coord(i, maxCoord); + coordFactors[i] = disableCoord ? 1.0f : sim.coord(i, maxCoord); } } - } private final Coordinator coordinator; @@ -82,9 +80,9 @@ class BooleanScorer2 extends Scorer { * @param optional * the list of optional scorers. */ - public BooleanScorer2(Weight weight, Similarity similarity, int minNrShouldMatch, + public BooleanScorer2(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch, List required, List prohibited, List optional, int maxCoord) throws IOException { - super(similarity, weight); + super(null, weight); // Similarity not used if (minNrShouldMatch < 0) { throw new IllegalArgumentException("Minimum number of optional scorers should not be negative"); } @@ -96,8 +94,8 @@ class BooleanScorer2 extends Scorer { requiredScorers = required; prohibitedScorers = prohibited; - coordinator.init(); - countingSumScorer = makeCountingSumScorer(); + coordinator.init(similarity, disableCoord); + countingSumScorer = makeCountingSumScorer(disableCoord, similarity); } /** Count a scorer as a single match. */ @@ -109,7 +107,7 @@ class BooleanScorer2 extends Scorer { private float lastDocScore = Float.NaN; SingleMatchScorer(Scorer scorer) { - super(scorer.getSimilarity()); + super(null); // No similarity used. this.scorer = scorer; } @@ -164,12 +162,12 @@ class BooleanScorer2 extends Scorer { }; } - private static final Similarity defaultSimilarity = Similarity.getDefault(); - - private Scorer countingConjunctionSumScorer(List requiredScorers) throws IOException { + private Scorer countingConjunctionSumScorer(boolean disableCoord, + Similarity similarity, + List requiredScorers) throws IOException { // each scorer from the list counted as a single matcher final int requiredNrMatchers = requiredScorers.size(); - return new ConjunctionScorer(defaultSimilarity, requiredScorers) { + return new ConjunctionScorer(disableCoord ? 1.0f : similarity.coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) { private int lastScoredDoc = -1; // Save the score of lastScoredDoc, so that we don't compute it more than // once in score(). @@ -192,8 +190,10 @@ class BooleanScorer2 extends Scorer { }; } - private Scorer dualConjunctionSumScorer(Scorer req1, Scorer req2) throws IOException { // non counting. - return new ConjunctionScorer(defaultSimilarity, req1, req2); + private Scorer dualConjunctionSumScorer(boolean disableCoord, + Similarity similarity, + Scorer req1, Scorer req2) throws IOException { // non counting. + return new ConjunctionScorer(disableCoord ? 1.0f : similarity.coord(2, 2), req1, req2); // All scorers match, so defaultSimilarity always has 1 as // the coordination factor. // Therefore the sum of the scores of two scorers @@ -203,13 +203,14 @@ class BooleanScorer2 extends Scorer { /** Returns the scorer to be used for match counting and score summing. * Uses requiredScorers, optionalScorers and prohibitedScorers. */ - private Scorer makeCountingSumScorer() throws IOException { // each scorer counted as a single matcher + private Scorer makeCountingSumScorer(boolean disableCoord, + Similarity similarity) throws IOException { // each scorer counted as a single matcher return (requiredScorers.size() == 0) - ? makeCountingSumScorerNoReq() - : makeCountingSumScorerSomeReq(); + ? makeCountingSumScorerNoReq(disableCoord, similarity) + : makeCountingSumScorerSomeReq(disableCoord, similarity); } - private Scorer makeCountingSumScorerNoReq() throws IOException { // No required scorers + private Scorer makeCountingSumScorerNoReq(boolean disableCoord, Similarity similarity) throws IOException { // No required scorers // minNrShouldMatch optional scorers are required, but at least 1 int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch; Scorer requiredCountingSumScorer; @@ -217,24 +218,27 @@ class BooleanScorer2 extends Scorer { requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired); else if (optionalScorers.size() == 1) requiredCountingSumScorer = new SingleMatchScorer(optionalScorers.get(0)); - else - requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers); + else { + requiredCountingSumScorer = countingConjunctionSumScorer(disableCoord, similarity, optionalScorers); + } return addProhibitedScorers(requiredCountingSumScorer); } - private Scorer makeCountingSumScorerSomeReq() throws IOException { // At least one required scorer. + private Scorer makeCountingSumScorerSomeReq(boolean disableCoord, Similarity similarity) throws IOException { // At least one required scorer. if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required. ArrayList allReq = new ArrayList(requiredScorers); allReq.addAll(optionalScorers); - return addProhibitedScorers(countingConjunctionSumScorer(allReq)); + return addProhibitedScorers(countingConjunctionSumScorer(disableCoord, similarity, allReq)); } else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer Scorer requiredCountingSumScorer = requiredScorers.size() == 1 ? new SingleMatchScorer(requiredScorers.get(0)) - : countingConjunctionSumScorer(requiredScorers); + : countingConjunctionSumScorer(disableCoord, similarity, requiredScorers); if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers return addProhibitedScorers( dualConjunctionSumScorer( // non counting + disableCoord, + similarity, requiredCountingSumScorer, countingDisjunctionSumScorer( optionalScorers, diff --git a/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java index dd254755a31..3429e3711c5 100644 --- a/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -29,14 +29,14 @@ class ConjunctionScorer extends Scorer { private final float coord; private int lastDoc = -1; - public ConjunctionScorer(Similarity similarity, Collection scorers) throws IOException { - this(similarity, scorers.toArray(new Scorer[scorers.size()])); + public ConjunctionScorer(float coord, Collection scorers) throws IOException { + this(coord, scorers.toArray(new Scorer[scorers.size()])); } - public ConjunctionScorer(Similarity similarity, Scorer... scorers) throws IOException { - super(similarity); + public ConjunctionScorer(float coord, Scorer... scorers) throws IOException { + super(null); this.scorers = scorers; - coord = similarity.coord(scorers.length, scorers.length); + this.coord = coord; for (int i = 0; i < scorers.length; i++) { if (scorers[i].nextDoc() == NO_MORE_DOCS) { diff --git a/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java b/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java index 0ab551ad8b2..e321ff3662c 100644 --- a/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java +++ b/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java @@ -37,13 +37,7 @@ public class DefaultSimilarity extends Similarity { numTerms = state.getLength() - state.getNumOverlap(); else numTerms = state.getLength(); - return (state.getBoost() * lengthNorm(field, numTerms)); - } - - /** Implemented as 1/sqrt(numTerms). */ - @Override - public float lengthNorm(String fieldName, int numTerms) { - return (float)(1.0 / Math.sqrt(numTerms)); + return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms))); } /** Implemented as 1/sqrt(sumOfSquaredWeights). */ diff --git a/lucene/src/java/org/apache/lucene/search/Similarity.java b/lucene/src/java/org/apache/lucene/search/Similarity.java index 81b1a810047..041db1e8ffa 100644 --- a/lucene/src/java/org/apache/lucene/search/Similarity.java +++ b/lucene/src/java/org/apache/lucene/search/Similarity.java @@ -462,12 +462,14 @@ import org.apache.lucene.util.SmallFloat; * {@link org.apache.lucene.document.Fieldable#setBoost(float) field.setBoost()} * before adding the field to a document. * - *
  • {@link #lengthNorm(String, int) lengthNorm(field)} - computed + *
  • lengthNorm - computed * when the document is added to the index in accordance with the number of tokens * of this field in the document, so that shorter fields contribute more to the score. * LengthNorm is computed by the Similarity class in effect at indexing. *
  • * + * The {@link #computeNorm} method is responsible for + * combining all of these factors into a single float. * *

    * When a document is added to the index, all the above factors are multiplied. @@ -480,7 +482,7 @@ import org.apache.lucene.util.SmallFloat; * norm(t,d)   =   * {@link org.apache.lucene.document.Document#getBoost() doc.getBoost()} *  ·  - * {@link #lengthNorm(String, int) lengthNorm(field)} + * lengthNorm *  ·  * * @@ -570,12 +572,23 @@ public abstract class Similarity implements Serializable { } /** - * Compute the normalization value for a field, given the accumulated + * Computes the normalization value for a field, given the accumulated * state of term processing for this field (see {@link FieldInvertState}). * *

    Implementations should calculate a float value based on the field * state and then return that value. * + *

    Matches in longer fields are less precise, so implementations of this + * method usually return smaller values when state.getLength() is large, + * and larger values when state.getLength() is small. + * + *

    Note that the return values are computed under + * {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document)} + * and then stored using + * {@link #encodeNormValue(float)}. + * Thus they have limited precision, and documents + * must be re-indexed if this method is altered. + * *

    For backward compatibility this method by default calls * {@link #lengthNorm(String, int)} passing * {@link FieldInvertState#getLength()} as the second argument, and @@ -587,9 +600,7 @@ public abstract class Similarity implements Serializable { * @param state current processing state for this field * @return the calculated float norm */ - public float computeNorm(String field, FieldInvertState state) { - return (state.getBoost() * lengthNorm(field, state.getLength())); - } + public abstract float computeNorm(String field, FieldInvertState state); /** Computes the normalization value for a field given the total number of * terms contained in a field. These values, together with field boosts, are @@ -613,8 +624,13 @@ public abstract class Similarity implements Serializable { * @return a normalization factor for hits on this field of this document * * @see org.apache.lucene.document.Field#setBoost(float) + * + * @deprecated Please override computeNorm instead */ - public abstract float lengthNorm(String fieldName, int numTokens); + @Deprecated + public final float lengthNorm(String fieldName, int numTokens) { + throw new UnsupportedOperationException("please use computeNorm instead"); + } /** Computes the normalization value for a query given the sum of the squared * weights of each of the query terms. This value is multiplied into the diff --git a/lucene/src/java/org/apache/lucene/search/SimilarityDelegator.java b/lucene/src/java/org/apache/lucene/search/SimilarityDelegator.java deleted file mode 100644 index d68e18aba36..00000000000 --- a/lucene/src/java/org/apache/lucene/search/SimilarityDelegator.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.apache.lucene.search; - -import org.apache.lucene.index.FieldInvertState; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Expert: Delegating scoring implementation. Useful in {@link - * Query#getSimilarity(IndexSearcher)} implementations, to override only certain - * methods of a Searcher's Similarity implementation.. */ -public class SimilarityDelegator extends Similarity { - - private Similarity delegee; - - /** Construct a {@link Similarity} that delegates all methods to another. - * - * @param delegee the Similarity implementation to delegate to - */ - public SimilarityDelegator(Similarity delegee) { - this.delegee = delegee; - } - - @Override - public float computeNorm(String fieldName, FieldInvertState state) { - return delegee.computeNorm(fieldName, state); - } - - @Override - public float lengthNorm(String fieldName, int numTerms) { - return delegee.lengthNorm(fieldName, numTerms); - } - - @Override - public float queryNorm(float sumOfSquaredWeights) { - return delegee.queryNorm(sumOfSquaredWeights); - } - - @Override - public float tf(float freq) { - return delegee.tf(freq); - } - - @Override - public float sloppyFreq(int distance) { - return delegee.sloppyFreq(distance); - } - - @Override - public float idf(int docFreq, int numDocs) { - return delegee.idf(docFreq, numDocs); - } - - @Override - public float coord(int overlap, int maxOverlap) { - return delegee.coord(overlap, maxOverlap); - } - - @Override - public float scorePayload(int docId, String fieldName, int start, int end, byte [] payload, int offset, int length) { - return delegee.scorePayload(docId, fieldName, start, end, payload, offset, length); - } -} diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java index 4a0e6b7162c..157c08719eb 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java @@ -42,8 +42,9 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { private class SimilarityOne extends DefaultSimilarity { @Override - public float lengthNorm(String fieldName, int numTerms) { - return 1; + public float computeNorm(String fieldName, FieldInvertState state) { + // diable length norm + return state.getBoost(); } } diff --git a/lucene/src/test/org/apache/lucene/index/TestNorms.java b/lucene/src/test/org/apache/lucene/index/TestNorms.java index 460e61338c6..ceeae1552fb 100755 --- a/lucene/src/test/org/apache/lucene/index/TestNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestNorms.java @@ -41,8 +41,9 @@ public class TestNorms extends LuceneTestCase { private class SimilarityOne extends DefaultSimilarity { @Override - public float lengthNorm(String fieldName, int numTerms) { - return 1; + public float computeNorm(String fieldName, FieldInvertState state) { + // Disable length norm + return state.getBoost(); } } diff --git a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java index 49c72e6c029..78b96dc5d32 100644 --- a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java +++ b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java @@ -35,7 +35,7 @@ import org.apache.lucene.search.Explanation.IDFExplanation; public class TestOmitTf extends LuceneTestCase { public static class SimpleSimilarity extends Similarity { - @Override public float lengthNorm(String field, int numTerms) { return 1.0f; } + @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); } @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } @Override public float tf(float freq) { return freq; } @Override public float sloppyFreq(int distance) { return 2.0f; } diff --git a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java index 25c43b69921..43cd8370e5a 100644 --- a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.util.PriorityQueue; /** @@ -253,7 +254,7 @@ final class JustCompileSearch { } @Override - public float lengthNorm(String fieldName, int numTokens) { + public float computeNorm(String fieldName, FieldInvertState state) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java index 297aabe3f4c..4628e911773 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -83,7 +83,7 @@ public class TestBooleanScorer extends LuceneTestCase } }}; - BooleanScorer bs = new BooleanScorer(null, sim, 1, Arrays.asList(scorers), null, scorers.length); + BooleanScorer bs = new BooleanScorer(null, false, sim, 1, Arrays.asList(scorers), null, scorers.length); assertEquals("should have received 3000", 3000, bs.nextDoc()); assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc()); diff --git a/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java index 1b89d4dd7dc..aa7b9faaff5 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java @@ -23,6 +23,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.SlowMultiReaderWrapper; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader.ReaderContext; @@ -60,8 +61,9 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { } @Override - public float lengthNorm(String fieldName, int numTerms) { - return 1.0f; + public float computeNorm(String fieldName, FieldInvertState state) { + // Disable length norm + return state.getBoost(); } @Override diff --git a/lucene/src/test/org/apache/lucene/search/TestSimilarity.java b/lucene/src/test/org/apache/lucene/search/TestSimilarity.java index 9518a025ed9..fd459103777 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSimilarity.java +++ b/lucene/src/test/org/apache/lucene/search/TestSimilarity.java @@ -21,6 +21,7 @@ import org.apache.lucene.util.LuceneTestCase; import java.io.IOException; import java.util.Collection; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -38,7 +39,7 @@ import org.apache.lucene.search.Explanation.IDFExplanation; public class TestSimilarity extends LuceneTestCase { public static class SimpleSimilarity extends Similarity { - @Override public float lengthNorm(String field, int numTerms) { return 1.0f; } + @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); } @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } @Override public float tf(float freq) { return freq; } @Override public float sloppyFreq(int distance) { return 2.0f; } diff --git a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java index a006fb04e81..8863e783bb4 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Payload; import org.apache.lucene.index.RandomIndexWriter; @@ -305,8 +306,8 @@ public class TestPayloadNearQuery extends LuceneTestCase { //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //Make everything else 1 so we see the effect of the payload //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - @Override public float lengthNorm(String fieldName, int numTerms) { - return 1.0f; + @Override public float computeNorm(String fieldName, FieldInvertState state) { + return state.getBoost(); } @Override public float queryNorm(float sumOfSquaredWeights) { diff --git a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java index a5a8550f623..0a053584165 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Payload; import org.apache.lucene.index.RandomIndexWriter; @@ -299,8 +300,8 @@ public class TestPayloadTermQuery extends LuceneTestCase { //Make everything else 1 so we see the effect of the payload //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @Override - public float lengthNorm(String fieldName, int numTerms) { - return 1; + public float computeNorm(String fieldName, FieldInvertState state) { + return state.getBoost(); } @Override diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java index 9d3c83adbae..ca83e3a67e0 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -419,7 +419,7 @@ public class TestSpans extends LuceneTestCase { public Similarity getSimilarity(IndexSearcher s) { return sim; } - }; + }; Scorer spanScorer = snq.weight(searcher).scorer(new AtomicReaderContext(new SlowMultiReaderWrapper(searcher.getIndexReader())), true, false); diff --git a/solr/src/java/org/apache/solr/search/SolrSimilarity.java b/solr/src/java/org/apache/solr/search/SolrSimilarity.java deleted file mode 100644 index 60525376d57..00000000000 --- a/solr/src/java/org/apache/solr/search/SolrSimilarity.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.search; - -import org.apache.lucene.search.DefaultSimilarity; - -import java.util.HashMap; - -/** - */ -// don't make it public for now... easier to change later. - -// This class is currently unused. -class SolrSimilarity extends DefaultSimilarity { - private final HashMap lengthNormConfig = new HashMap(); - - public float lengthNorm(String fieldName, int numTerms) { - // Float f = lengthNormConfig. - // if (lengthNormDisabled.) - return super.lengthNorm(fieldName, numTerms); - } -} diff --git a/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java b/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java index 4cfebb79c94..7ac1d8e50fa 100755 --- a/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java +++ b/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java @@ -17,6 +17,7 @@ package org.apache.solr.search.function; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.Similarity; @@ -294,8 +295,11 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { "//float[@name='score']='" + similarity.idf(3,6) + "'"); assertQ(req("fl","*,score","q", "{!func}tf(a_t,cow)", "fq","id:6"), "//float[@name='score']='" + similarity.tf(5) + "'"); + FieldInvertState state = new FieldInvertState(); + state.setBoost(1.0f); + state.setLength(4); assertQ(req("fl","*,score","q", "{!func}norm(a_t)", "fq","id:2"), - "//float[@name='score']='" + similarity.lengthNorm("a_t",4) + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte + "//float[@name='score']='" + similarity.computeNorm("a_t",state) + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte // test that ord and rord are working on a global index basis, not just // at the segment level (since Lucene 2.9 has switched to per-segment searching)