From 3eb1e1ac5485fd86956ad925636bf0c5d5e38486 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 18 Sep 2013 15:29:11 +0000 Subject: [PATCH] LUCENE-5221: SimilarityBase.computeNorm is inconsistent with TFIDFSimilarity git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1524457 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 ++++ .../search/similarities/SimilarityBase.java | 2 +- .../search/similarities/TestSimilarityBase.java | 17 +++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ef389b3e926..c40f3f0e3b8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -193,6 +193,10 @@ Bug Fixes * LUCENE-5201: Fixed compression bug in LZ4.compressHC when the input is highly compressible and the start offset of the array to compress is > 0. (Adrien Grand) + +* LUCENE-5221: SimilarityBase did not write norms the same way as DefaultSimilarity + if discountOverlaps == false and index-time boosts are present for the field. + (Yubin Kim via Robert Muir) API Changes diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java index c1ccff49a9b..72806df506f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java @@ -233,7 +233,7 @@ public abstract class SimilarityBase extends Similarity { if (discountOverlaps) numTerms = state.getLength() - state.getNumOverlap(); else - numTerms = state.getLength() / state.getBoost(); + numTerms = state.getLength(); return encodeNormValue(state.getBoost(), numTerms); } diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java index dc3017e3519..f526c6b6399 100644 --- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java +++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java @@ -25,6 +25,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -572,4 +573,20 @@ public class TestSimilarityBase extends LuceneTestCase { dir.close(); super.tearDown(); } + + // LUCENE-5221 + public void testDiscountOverlapsBoost() throws IOException { + DefaultSimilarity expected = new DefaultSimilarity(); + SimilarityBase actual = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2()); + expected.setDiscountOverlaps(false); + actual.setDiscountOverlaps(false); + FieldInvertState state = new FieldInvertState("foo"); + state.setLength(5); + state.setNumOverlap(2); + state.setBoost(3); + assertEquals(expected.computeNorm(state), actual.computeNorm(state)); + expected.setDiscountOverlaps(true); + actual.setDiscountOverlaps(true); + assertEquals(expected.computeNorm(state), actual.computeNorm(state)); + } }