From 3f255f6ceaf8b6d7e9b8ca31a250207975a562cb Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 24 Jan 2011 19:13:31 +0000 Subject: [PATCH] LUCENE-2236: per-field similarity git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1062927 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + lucene/MIGRATE.txt | 6 + .../instantiated/InstantiatedIndexWriter.java | 18 ++- .../lucene/index/memory/MemoryIndex.java | 16 +- .../lucene/index/FieldNormModifier.java | 13 +- .../lucene/index/TestFieldNormModifier.java | 4 +- .../lucene/misc/TestLengthNormModifier.java | 6 +- .../StandardBooleanQueryNodeBuilder.java | 4 +- .../nodes/StandardBooleanQueryNode.java | 4 +- .../apache/lucene/index/DocumentsWriter.java | 14 +- .../index/DocumentsWriterThreadState.java | 2 +- .../lucene/index/IndexWriterConfig.java | 30 ++-- .../lucene/index/NormsWriterPerField.java | 9 +- .../apache/lucene/search/BooleanQuery.java | 12 +- .../lucene/search/DefaultSimilarity.java | 12 +- .../apache/lucene/search/IndexSearcher.java | 29 +++- .../lucene/search/MatchAllDocsQuery.java | 2 +- .../lucene/search/MultiPhraseQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 2 +- .../java/org/apache/lucene/search/Query.java | 2 +- .../org/apache/lucene/search/Similarity.java | 62 +------ .../lucene/search/SimilarityProvider.java | 66 ++++++++ .../org/apache/lucene/search/TermQuery.java | 2 +- .../java/org/apache/lucene/search/Weight.java | 2 +- .../lucene/search/spans/SpanWeight.java | 2 +- .../org/apache/lucene/index/DocHelper.java | 8 +- .../index/TestBackwardsCompatibility.java | 12 +- .../lucene/index/TestDeletionPolicy.java | 5 +- .../lucene/index/TestIndexFileDeleter.java | 5 +- .../apache/lucene/index/TestIndexReader.java | 26 +-- .../lucene/index/TestIndexReaderClone.java | 18 ++- .../index/TestIndexReaderCloneNorms.java | 35 ++-- .../lucene/index/TestIndexReaderReopen.java | 22 ++- .../lucene/index/TestIndexWriterConfig.java | 20 +-- .../lucene/index/TestMaxTermFrequency.java | 2 +- .../org/apache/lucene/index/TestNorms.java | 23 +-- .../org/apache/lucene/index/TestOmitTf.java | 13 +- .../lucene/index/TestParallelReader.java | 5 +- .../lucene/search/JustCompileSearch.java | 24 +-- .../org/apache/lucene/search/QueryUtils.java | 2 +- .../apache/lucene/search/TestBoolean2.java | 6 +- .../search/TestComplexExplanations.java | 2 +- .../lucene/search/TestConstantScoreQuery.java | 2 +- .../search/TestDisjunctionMaxQuery.java | 6 +- .../lucene/search/TestMatchAllDocsQuery.java | 2 +- .../lucene/search/TestMultiPhraseQuery.java | 2 +- .../org/apache/lucene/search/TestSetNorm.java | 9 +- .../apache/lucene/search/TestSimilarity.java | 13 +- .../lucene/search/TestSimilarityProvider.java | 151 ++++++++++++++++++ .../lucene/search/payloads/PayloadHelper.java | 8 +- .../search/payloads/TestPayloadNearQuery.java | 4 +- .../search/payloads/TestPayloadTermQuery.java | 6 +- .../lucene/search/spans/TestPayloadSpans.java | 10 +- .../apache/lucene/search/spans/TestSpans.java | 10 +- .../org/apache/solr/schema/IndexSchema.java | 15 +- .../apache/solr/schema/SimilarityFactory.java | 4 +- .../solr/search/SolrConstantScoreQuery.java | 2 - .../apache/solr/search/SolrIndexSearcher.java | 2 +- .../solr/search/function/IDFValueSource.java | 2 +- .../solr/search/function/NormValueSource.java | 2 +- .../solr/search/function/TFValueSource.java | 2 +- .../apache/solr/update/SolrIndexConfig.java | 2 +- .../solr/schema/CustomSimilarityFactory.java | 4 +- .../apache/solr/schema/IndexSchemaTest.java | 4 +- 64 files changed, 524 insertions(+), 290 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/search/SimilarityProvider.java create mode 100644 lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 99733821100..90c498d9581 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -131,6 +131,9 @@ Changes in backwards compatibility policy * LUCENE-2882: Cut over SpanQuery#getSpans to AtomicReaderContext to enforce per segment semantics on SpanQuery & Spans. (Simon Willnauer) +* LUCENE-2236: Similarity can now be configured on a per-field basis. See the + migration notes in MIGRATE.txt for more details. (Robert Muir, Doron Cohen) + Changes in Runtime Behavior * LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 0388d64ed15..c5e85969e03 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -331,3 +331,9 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing toString() is no longer implemented by AttributeImpl, so if you have overridden toString(), port your customization over to reflectWith(). reflectAsString() would then return what toString() did before. + +* LUCENE-2236: DefaultSimilarity can no longer be set statically (and dangerously) for the entire JVM. + Instead, IndexWriterConfig and IndexSearcher now take a SimilarityProvider. + Similarity can now be configured on a per-field basis. + Similarity retains only the field-specific relevance methods such as tf() and idf(). + Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider. diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java index 95bb1f01cd4..d7b6de961da 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java @@ -42,7 +42,8 @@ import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermVectorOffsetInfo; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.CollectionUtil; @@ -67,7 +68,7 @@ public class InstantiatedIndexWriter implements Closeable { private final InstantiatedIndex index; private final Analyzer analyzer; - private Similarity similarity = Similarity.getDefault(); // how to normalize; + private SimilarityProvider similarityProvider = IndexSearcher.getDefaultSimilarityProvider(); // how to normalize; private transient Set fieldNameBuffer; /** @@ -236,11 +237,12 @@ public class InstantiatedIndexWriter implements Closeable { termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size(); if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) { + final String fieldName = eFieldTermDocInfoFactoriesByTermText.getKey().fieldName; final FieldInvertState invertState = new FieldInvertState(); invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost()); invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength); - final float norm = similarity.computeNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, invertState); - normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm); + final float norm = similarityProvider.get(fieldName).computeNorm(fieldName, invertState); + normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).encodeNormValue(norm); } else { System.currentTimeMillis(); } @@ -659,12 +661,12 @@ public class InstantiatedIndexWriter implements Closeable { addDocument(doc, analyzer); } - public Similarity getSimilarity() { - return similarity; + public SimilarityProvider getSimilarityProvider() { + return similarityProvider; } - public void setSimilarity(Similarity similarity) { - this.similarity = similarity; + public void setSimilarityProvider(SimilarityProvider similarityProvider) { + this.similarityProvider = similarityProvider; } public Analyzer getAnalyzer() { diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 205cc60f7b6..3fc82b7651b 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -57,6 +57,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.RAMDirectory; // for javadocs import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -1169,9 +1170,9 @@ public class MemoryIndex implements Serializable { }; } - private Similarity getSimilarity() { - if (searcher != null) return searcher.getSimilarity(); - return Similarity.getDefault(); + private SimilarityProvider getSimilarityProvider() { + if (searcher != null) return searcher.getSimilarityProvider(); + return IndexSearcher.getDefaultSimilarityProvider(); } private void setSearcher(IndexSearcher searcher) { @@ -1181,20 +1182,21 @@ public class MemoryIndex implements Serializable { /** performance hack: cache norms to avoid repeated expensive calculations */ private byte[] cachedNorms; private String cachedFieldName; - private Similarity cachedSimilarity; + private SimilarityProvider cachedSimilarity; @Override public byte[] norms(String fieldName) { byte[] norms = cachedNorms; - Similarity sim = getSimilarity(); + SimilarityProvider sim = getSimilarityProvider(); if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached? Info info = getInfo(fieldName); + Similarity fieldSim = sim.get(fieldName); int numTokens = info != null ? info.numTokens : 0; int numOverlapTokens = info != null ? info.numOverlapTokens : 0; float boost = info != null ? info.getBoost() : 1.0f; FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost); - float n = sim.computeNorm(fieldName, invertState); - byte norm = sim.encodeNormValue(n); + float n = fieldSim.computeNorm(fieldName, invertState); + byte norm = fieldSim.encodeNormValue(n); norms = new byte[] {norm}; // cache it for future reuse diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java index 14fa0970fd6..acebe221a9a 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.StringHelper; @@ -57,13 +58,13 @@ public class FieldNormModifier { System.exit(1); } - Similarity s = null; + SimilarityProvider s = null; if (args[1].equals("-d")) args[1] = DefaultSimilarity.class.getName(); try { - s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance(); + s = Class.forName(args[1]).asSubclass(SimilarityProvider.class).newInstance(); } catch (Exception e) { System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]); e.printStackTrace(System.err); @@ -84,7 +85,7 @@ public class FieldNormModifier { private Directory dir; - private Similarity sim; + private SimilarityProvider sim; /** * Constructor for code that wishes to use this class programmatically @@ -93,7 +94,7 @@ public class FieldNormModifier { * @param d the Directory to modify * @param s the Similarity to use (can be null) */ - public FieldNormModifier(Directory d, Similarity s) { + public FieldNormModifier(Directory d, SimilarityProvider s) { dir = d; sim = s; } @@ -111,7 +112,7 @@ public class FieldNormModifier { */ public void reSetNorms(String field) throws IOException { String fieldName = StringHelper.intern(field); - + Similarity fieldSim = sim.get(field); IndexReader reader = null; try { reader = IndexReader.open(dir, false); @@ -148,7 +149,7 @@ public class FieldNormModifier { for (int d = 0; d < termCounts.length; d++) { if (delDocs == null || !delDocs.get(d)) { invertState.setLength(termCounts[d]); - subReader.setNorm(d, fieldName, sim.encodeNormValue(sim.computeNorm(fieldName, invertState))); + subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(fieldName, invertState))); } } } diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java index 345e86c4092..8c83d449341 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java @@ -28,7 +28,7 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -42,7 +42,7 @@ public class TestFieldNormModifier extends LuceneTestCase { public Directory store; /** inverts the normal notion of lengthNorm */ - public static Similarity s = new DefaultSimilarity() { + public static SimilarityProvider s = new DefaultSimilarity() { @Override public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength()); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java index 06fb554c632..a856dd9fa58 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java @@ -33,7 +33,7 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -47,7 +47,7 @@ public class TestLengthNormModifier extends LuceneTestCase { public Directory store; /** inverts the normal notion of lengthNorm */ - public static Similarity s = new DefaultSimilarity() { + public static SimilarityProvider s = new DefaultSimilarity() { @Override public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength()); @@ -163,7 +163,7 @@ public class TestLengthNormModifier extends LuceneTestCase { } // override the norms to be inverted - Similarity s = new DefaultSimilarity() { + SimilarityProvider s = new DefaultSimilarity() { @Override public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength()); diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java index 312728690f2..b2e898dc870 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java @@ -31,7 +31,7 @@ import org.apache.lucene.queryParser.standard.parser.EscapeQuerySyntaxImpl; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.BooleanQuery.TooManyClauses; /** @@ -41,7 +41,7 @@ import org.apache.lucene.search.BooleanQuery.TooManyClauses; * * @see BooleanQueryNodeBuilder * @see BooleanQuery - * @see Similarity#coord(int, int) + * @see SimilarityProvider#coord(int, int) */ public class StandardBooleanQueryNodeBuilder implements StandardQueryBuilder { diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java index 3ddd6396d1e..d09d234a04d 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java @@ -22,14 +22,14 @@ import java.util.List; import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; /** * A {@link StandardBooleanQueryNode} has the same behavior as * {@link BooleanQueryNode}. It only indicates if the coord should be enabled or * not for this boolean query.
* - * @see Similarity#coord(int, int) + * @see SimilarityProvider#coord(int, int) * @see BooleanQuery */ public class StandardBooleanQueryNode extends BooleanQueryNode { diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java index 617a70a802e..25cd8cac737 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -30,7 +30,7 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMFile; @@ -127,7 +127,7 @@ final class DocumentsWriter { private boolean aborting; // True if an abort is pending PrintStream infoStream; - Similarity similarity; + SimilarityProvider similarityProvider; // max # simultaneous threads; if there are more than // this, they wait for others to finish first @@ -140,7 +140,7 @@ final class DocumentsWriter { DocumentsWriter docWriter; Analyzer analyzer; PrintStream infoStream; - Similarity similarity; + SimilarityProvider similarityProvider; int docID; Document doc; String maxTermPrefix; @@ -284,7 +284,7 @@ final class DocumentsWriter { DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletes bufferedDeletes) throws IOException { this.directory = directory; this.writer = writer; - this.similarity = writer.getConfig().getSimilarity(); + this.similarityProvider = writer.getConfig().getSimilarityProvider(); this.maxThreadStates = maxThreadStates; this.fieldInfos = fieldInfos; this.bufferedDeletes = bufferedDeletes; @@ -357,10 +357,10 @@ final class DocumentsWriter { } } - synchronized void setSimilarity(Similarity similarity) { - this.similarity = similarity; + synchronized void setSimilarityProvider(SimilarityProvider similarity) { + this.similarityProvider = similarity; for(int i=0;i - * NOTE: the similarity cannot be null. If null is passed, - * the similarity will be set to the default. - * - * @see Similarity#setDefault(Similarity) + * NOTE: the similarity provider cannot be null. If null is passed, + * the similarity provider will be set to the default implementation (unspecified). */ - public IndexWriterConfig setSimilarity(Similarity similarity) { - this.similarity = similarity == null ? Similarity.getDefault() : similarity; + public IndexWriterConfig setSimilarityProvider(SimilarityProvider similarityProvider) { + this.similarityProvider = similarityProvider == null ? IndexSearcher.getDefaultSimilarityProvider() : similarityProvider; return this; } /** - * Expert: returns the {@link Similarity} implementation used by this - * IndexWriter. This defaults to the current value of - * {@link Similarity#getDefault()}. + * Expert: returns the {@link SimilarityProvider} implementation used by this + * IndexWriter. */ - public Similarity getSimilarity() { - return similarity; + public SimilarityProvider getSimilarityProvider() { + return similarityProvider; } /** @@ -576,7 +574,7 @@ public final class IndexWriterConfig implements Cloneable { sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n"); sb.append("commit=").append(commit == null ? "null" : commit).append("\n"); sb.append("openMode=").append(openMode).append("\n"); - sb.append("similarity=").append(similarity.getClass().getName()).append("\n"); + sb.append("similarityProvider=").append(similarityProvider.getClass().getName()).append("\n"); sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n"); sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n"); diff --git a/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java index 3e2a577c6db..49a726e6a90 100644 --- a/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java @@ -17,6 +17,7 @@ package org.apache.lucene.index; * limitations under the License. */ +import org.apache.lucene.search.Similarity; import org.apache.lucene.util.ArrayUtil; /** Taps into DocInverter, as an InvertedDocEndConsumer, @@ -29,7 +30,8 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement final NormsWriterPerThread perThread; final FieldInfo fieldInfo; final DocumentsWriter.DocState docState; - + final Similarity similarity; + // Holds all docID/norm pairs we've seen int[] docIDs = new int[1]; byte[] norms = new byte[1]; @@ -49,6 +51,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement this.fieldInfo = fieldInfo; docState = perThread.docState; fieldState = docInverterPerField.fieldState; + similarity = docState.similarityProvider.get(fieldInfo.name); } @Override @@ -71,8 +74,8 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement assert norms.length == upto; norms = ArrayUtil.grow(norms, 1+upto); } - final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState); - norms[upto] = docState.similarity.encodeNormValue(norm); + final float norm = similarity.computeNorm(fieldInfo.name, fieldState); + norms[upto] = similarity.encodeNormValue(norm); docIDs[upto] = docState.docID; upto++; } diff --git a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java index 07e2aeca1e3..8e2240cdea9 100644 --- a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java @@ -72,18 +72,18 @@ public class BooleanQuery extends Query implements Iterable { /** Constructs an empty boolean query. * - * {@link Similarity#coord(int,int)} may be disabled in scoring, as + * {@link SimilarityProvider#coord(int,int)} may be disabled in scoring, as * appropriate. For example, this score factor does not make sense for most * automatically generated queries, like {@link WildcardQuery} and {@link * FuzzyQuery}. * - * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring. + * @param disableCoord disables {@link SimilarityProvider#coord(int,int)} in scoring. */ public BooleanQuery(boolean disableCoord) { this.disableCoord = disableCoord; } - /** Returns true iff {@link Similarity#coord(int,int)} is disabled in + /** Returns true iff {@link SimilarityProvider#coord(int,int)} is disabled in * scoring for this query instance. * @see #BooleanQuery(boolean) */ @@ -162,14 +162,14 @@ public class BooleanQuery extends Query implements Iterable { */ protected class BooleanWeight extends Weight { /** The Similarity implementation. */ - protected Similarity similarity; + protected SimilarityProvider similarityProvider; protected ArrayList weights; protected int maxCoord; // num optional + num required private final boolean disableCoord; public BooleanWeight(IndexSearcher searcher, boolean disableCoord) throws IOException { - this.similarity = searcher.getSimilarity(); + this.similarityProvider = searcher.getSimilarityProvider(); this.disableCoord = disableCoord; weights = new ArrayList(clauses.size()); for (int i = 0 ; i < clauses.size(); i++) { @@ -202,7 +202,7 @@ public class BooleanQuery extends Query implements Iterable { } public float coord(int overlap, int maxOverlap) { - return similarity.coord(overlap, maxOverlap); + return similarityProvider.coord(overlap, maxOverlap); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java b/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java index e321ff3662c..71c8a229089 100644 --- a/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java +++ b/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java @@ -20,7 +20,7 @@ import org.apache.lucene.index.FieldInvertState; */ /** Expert: Default scoring implementation. */ -public class DefaultSimilarity extends Similarity { +public class DefaultSimilarity extends Similarity implements SimilarityProvider { /** Implemented as * state.getBoost()*lengthNorm(numTerms), where @@ -41,7 +41,6 @@ public class DefaultSimilarity extends Similarity { } /** Implemented as 1/sqrt(sumOfSquaredWeights). */ - @Override public float queryNorm(float sumOfSquaredWeights) { return (float)(1.0 / Math.sqrt(sumOfSquaredWeights)); } @@ -65,7 +64,6 @@ public class DefaultSimilarity extends Similarity { } /** Implemented as overlap / maxOverlap. */ - @Override public float coord(int overlap, int maxOverlap) { return overlap / (float)maxOverlap; } @@ -90,4 +88,12 @@ public class DefaultSimilarity extends Similarity { public boolean getDiscountOverlaps() { return discountOverlaps; } + + /** + * Returns this default implementation for all fields. + * Override this method to customize scoring on a per-field basis. + */ + public Similarity get(String field) { + return this; + } } diff --git a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java index 9bf54bb53a0..cd1dd0a208e 100644 --- a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java @@ -70,8 +70,22 @@ public class IndexSearcher { private final ExecutorService executor; protected final IndexSearcher[] subSearchers; - /** The Similarity implementation used by this searcher. */ - private Similarity similarity = Similarity.getDefault(); + // the default SimilarityProvider + private static final SimilarityProvider defaultProvider = new DefaultSimilarity(); + + /** + * Expert: returns a default SimilarityProvider instance. + * In general, this method is only called to initialize searchers and writers. + * User code and query implementations should respect + * {@link IndexSearcher#getSimilarityProvider()}. + * @lucene.internal + */ + public static SimilarityProvider getDefaultSimilarityProvider() { + return defaultProvider; + } + + /** The SimilarityProvider implementation used by this searcher. */ + private SimilarityProvider similarityProvider = defaultProvider; /** Creates a searcher searching the index in the named * directory, with readOnly=true @@ -248,16 +262,15 @@ public class IndexSearcher { return reader.document(docID, fieldSelector); } - /** Expert: Set the Similarity implementation used by this Searcher. + /** Expert: Set the SimilarityProvider implementation used by this Searcher. * - * @see Similarity#setDefault(Similarity) */ - public void setSimilarity(Similarity similarity) { - this.similarity = similarity; + public void setSimilarityProvider(SimilarityProvider similarityProvider) { + this.similarityProvider = similarityProvider; } - public Similarity getSimilarity() { - return similarity; + public SimilarityProvider getSimilarityProvider() { + return similarityProvider; } /** diff --git a/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index 6ade92d4a5a..40cdc1877cd 100644 --- a/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -98,7 +98,7 @@ public class MatchAllDocsQuery extends Query { private float queryNorm; public MatchAllDocsWeight(IndexSearcher searcher) { - this.similarity = searcher.getSimilarity(); + this.similarity = normsField == null ? null : searcher.getSimilarityProvider().get(normsField); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 4a7624a78e0..c5c979cb904 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -139,7 +139,7 @@ public class MultiPhraseQuery extends Query { public MultiPhraseWeight(IndexSearcher searcher) throws IOException { - this.similarity = searcher.getSimilarity(); + this.similarity = searcher.getSimilarityProvider().get(field); // compute idf ArrayList allTerms = new ArrayList(); diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java index 313bf7331fa..8c71ad78bd5 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java @@ -146,7 +146,7 @@ public class PhraseQuery extends Query { public PhraseWeight(IndexSearcher searcher) throws IOException { - this.similarity = searcher.getSimilarity(); + this.similarity = searcher.getSimilarityProvider().get(field); idfExp = similarity.idfExplain(terms, searcher); idf = idfExp.getIdf(); diff --git a/lucene/src/java/org/apache/lucene/search/Query.java b/lucene/src/java/org/apache/lucene/search/Query.java index 6c8f9b8af8c..8b937aa5b32 100644 --- a/lucene/src/java/org/apache/lucene/search/Query.java +++ b/lucene/src/java/org/apache/lucene/search/Query.java @@ -98,7 +98,7 @@ public abstract class Query implements java.io.Serializable, Cloneable { Query query = searcher.rewrite(this); Weight weight = query.createWeight(searcher); float sum = weight.sumOfSquaredWeights(); - float norm = searcher.getSimilarity().queryNorm(sum); + float norm = searcher.getSimilarityProvider().queryNorm(sum); if (Float.isInfinite(norm) || Float.isNaN(norm)) norm = 1.0f; weight.normalize(norm); diff --git a/lucene/src/java/org/apache/lucene/search/Similarity.java b/lucene/src/java/org/apache/lucene/search/Similarity.java index e989db72d75..306f904c270 100644 --- a/lucene/src/java/org/apache/lucene/search/Similarity.java +++ b/lucene/src/java/org/apache/lucene/search/Similarity.java @@ -362,7 +362,7 @@ import org.apache.lucene.util.SmallFloat; * Typically, a document that contains more of the query's terms will receive a higher score * than another document with fewer query terms. * This is a search time factor computed in - * {@link #coord(int, int) coord(q,d)} + * {@link SimilarityProvider#coord(int, int) coord(q,d)} * by the Similarity in effect at search time. *
 
* @@ -522,40 +522,13 @@ import org.apache.lucene.util.SmallFloat; * * * - * @see #setDefault(Similarity) - * @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity) - * @see IndexSearcher#setSimilarity(Similarity) + * @see org.apache.lucene.index.IndexWriterConfig#setSimilarityProvider(SimilarityProvider) + * @see IndexSearcher#setSimilarityProvider(SimilarityProvider) */ public abstract class Similarity implements Serializable { - /** - * The Similarity implementation used by default. - **/ - private static Similarity defaultImpl = new DefaultSimilarity(); public static final int NO_DOC_ID_PROVIDED = -1; - /** Set the default Similarity implementation used by indexing and search - * code. - * - * @see IndexSearcher#setSimilarity(Similarity) - * @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity) - */ - public static void setDefault(Similarity similarity) { - Similarity.defaultImpl = similarity; - } - - /** Return the default Similarity implementation used by indexing and search - * code. - * - *

This is initially an instance of {@link DefaultSimilarity}. - * - * @see IndexSearcher#setSimilarity(Similarity) - * @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity) - */ - public static Similarity getDefault() { - return Similarity.defaultImpl; - } - /** Cache of decoded bytes. */ private static final float[] NORM_TABLE = new float[256]; @@ -632,21 +605,6 @@ public abstract class Similarity implements Serializable { throw new UnsupportedOperationException("please use computeNorm instead"); } - /** Computes the normalization value for a query given the sum of the squared - * weights of each of the query terms. This value is multiplied into the - * weight of each query term. While the classic query normalization factor is - * computed as 1/sqrt(sumOfSquaredWeights), other implementations might - * completely ignore sumOfSquaredWeights (ie return 1). - * - *

This does not affect ranking, but the default implementation does make scores - * from different queries more comparable than they would be by eliminating the - * magnitude of the Query vector as a factor in the score. - * - * @param sumOfSquaredWeights the sum of the squares of query term weights - * @return a normalization factor for query weights - */ - public abstract float queryNorm(float sumOfSquaredWeights); - /** Encodes a normalization factor for storage in an index. * *

The encoding uses a three-bit mantissa, a five-bit exponent, and @@ -816,20 +774,6 @@ public abstract class Similarity implements Serializable { */ public abstract float idf(int docFreq, int numDocs); - /** Computes a score factor based on the fraction of all query terms that a - * document contains. This value is multiplied into scores. - * - *

The presence of a large portion of the query terms indicates a better - * match with the query, so implementations of this method usually return - * larger values when the ratio between these parameters is large and smaller - * values when the ratio between them is small. - * - * @param overlap the number of query terms matched in the document - * @param maxOverlap the total number of terms in the query - * @return a score factor based on term overlap with the query - */ - public abstract float coord(int overlap, int maxOverlap); - /** * Calculate a scoring factor based on the data in the payload. Overriding implementations * are responsible for interpreting what is in the payload. Lucene makes no assumptions about diff --git a/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java b/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java new file mode 100644 index 00000000000..ef9a034e7eb --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java @@ -0,0 +1,66 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Expert: Scoring API. + * + * Provides top-level scoring functions that aren't specific to a field, + * and work across multi-field queries (such as {@link BooleanQuery}). + * + * Field-specific scoring is accomplished through {@link Similarity}. + * + * @lucene.experimental + */ +public interface SimilarityProvider { + + /** Computes a score factor based on the fraction of all query terms that a + * document contains. This value is multiplied into scores. + * + *

The presence of a large portion of the query terms indicates a better + * match with the query, so implementations of this method usually return + * larger values when the ratio between these parameters is large and smaller + * values when the ratio between them is small. + * + * @param overlap the number of query terms matched in the document + * @param maxOverlap the total number of terms in the query + * @return a score factor based on term overlap with the query + */ + public abstract float coord(int overlap, int maxOverlap); + + /** Computes the normalization value for a query given the sum of the squared + * weights of each of the query terms. This value is multiplied into the + * weight of each query term. While the classic query normalization factor is + * computed as 1/sqrt(sumOfSquaredWeights), other implementations might + * completely ignore sumOfSquaredWeights (ie return 1). + * + *

This does not affect ranking, but the default implementation does make scores + * from different queries more comparable than they would be by eliminating the + * magnitude of the Query vector as a factor in the score. + * + * @param sumOfSquaredWeights the sum of the squares of query term weights + * @return a normalization factor for query weights + */ + public abstract float queryNorm(float sumOfSquaredWeights); + + /** Returns a {@link Similarity} for scoring a field + * @param field field name. + * @return a field-specific Similarity. + */ + public abstract Similarity get(String field); +} diff --git a/lucene/src/java/org/apache/lucene/search/TermQuery.java b/lucene/src/java/org/apache/lucene/search/TermQuery.java index 5dc9514e5a0..cb729ababc5 100644 --- a/lucene/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/TermQuery.java @@ -54,7 +54,7 @@ public class TermQuery extends Query { throws IOException { assert termStates != null : "PerReaderTermState must not be null"; this.termStates = termStates; - this.similarity = searcher.getSimilarity(); + this.similarity = searcher.getSimilarityProvider().get(term.field()); if (docFreq != -1) { idfExp = similarity.idfExplain(term, searcher, docFreq); } else { diff --git a/lucene/src/java/org/apache/lucene/search/Weight.java b/lucene/src/java/org/apache/lucene/search/Weight.java index 65cc6ec61e2..7ea739b7e1e 100644 --- a/lucene/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/src/java/org/apache/lucene/search/Weight.java @@ -44,7 +44,7 @@ import org.apache.lucene.index.IndexReader.ReaderContext; * IndexSearcher ({@link Query#createWeight(IndexSearcher)}). *

  • The {@link #sumOfSquaredWeights()} method is called on the * Weight to compute the query normalization factor - * {@link Similarity#queryNorm(float)} of the query clauses contained in the + * {@link SimilarityProvider#queryNorm(float)} of the query clauses contained in the * query. *
  • The query normalization factor is passed to {@link #normalize(float)}. At * this point the weighting is complete. diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java index d9fd36db0c5..104bacf0a37 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -42,7 +42,7 @@ public class SpanWeight extends Weight { public SpanWeight(SpanQuery query, IndexSearcher searcher) throws IOException { - this.similarity = searcher.getSimilarity(); + this.similarity = searcher.getSimilarityProvider().get(query.getField()); this.query = query; terms=new HashSet(); diff --git a/lucene/src/test/org/apache/lucene/index/DocHelper.java b/lucene/src/test/org/apache/lucene/index/DocHelper.java index d6095f8a144..28bcdff4f7f 100644 --- a/lucene/src/test/org/apache/lucene/index/DocHelper.java +++ b/lucene/src/test/org/apache/lucene/index/DocHelper.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; @@ -220,7 +220,7 @@ class DocHelper { */ public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException { - return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), Similarity.getDefault(), doc); + return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), null, doc); } /** @@ -233,9 +233,9 @@ class DocHelper { * @param doc * @throws IOException */ - public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException { + public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException { IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity)); + TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity)); //writer.setUseCompoundFile(false); writer.addDocument(doc); writer.commit(); diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 7324889411a..83bbc0b69be 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -38,12 +38,13 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -412,7 +413,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { Term searchTerm = new Term("id", "6"); int delCount = reader.deleteDocuments(searchTerm); assertEquals("wrong delete count", 1, delCount); - reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", Similarity.getDefault().encodeNormValue(2.0f)); + reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", searcher.getSimilarityProvider().get("content").encodeNormValue(2.0f)); reader.close(); searcher.close(); @@ -460,7 +461,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { Term searchTerm = new Term("id", "6"); int delCount = reader.deleteDocuments(searchTerm); assertEquals("wrong delete count", 1, delCount); - reader.setNorm(22, "content", Similarity.getDefault().encodeNormValue(2.0f)); + reader.setNorm(22, "content", searcher.getSimilarityProvider().get("content").encodeNormValue(2.0f)); reader.close(); // make sure they "took": @@ -519,7 +520,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { assertEquals("didn't delete the right number of documents", 1, delCount); // Set one norm so we get a .s0 file: - reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f)); + reader.setNorm(21, "content", conf.getSimilarityProvider().get("content").encodeNormValue(1.5f)); reader.close(); } @@ -556,7 +557,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase { assertEquals("didn't delete the right number of documents", 1, delCount); // Set one norm so we get a .s0 file: - reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f)); + SimilarityProvider sim = new DefaultSimilarity(); + reader.setNorm(21, "content", sim.get("content").encodeNormValue(1.5f)); reader.close(); // The numbering of fields can vary depending on which diff --git a/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java b/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java index e96bd86334a..55592662b0b 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java +++ b/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java @@ -30,7 +30,6 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -608,7 +607,7 @@ public class TestDeletionPolicy extends LuceneTestCase { writer.close(); IndexReader reader = IndexReader.open(dir, policy, false); reader.deleteDocument(3*i+1); - reader.setNorm(4*i+1, "content", Similarity.getDefault().encodeNormValue(2.0F)); + reader.setNorm(4*i+1, "content", conf.getSimilarityProvider().get("content").encodeNormValue(2.0F)); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals(16*(1+i), hits.length); @@ -716,7 +715,7 @@ public class TestDeletionPolicy extends LuceneTestCase { writer.close(); IndexReader reader = IndexReader.open(dir, policy, false); reader.deleteDocument(3); - reader.setNorm(5, "content", Similarity.getDefault().encodeNormValue(2.0F)); + reader.setNorm(5, "content", conf.getSimilarityProvider().get("content").encodeNormValue(2.0F)); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals(16, hits.length); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java index e730335b62f..ea514a861ea 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java @@ -18,6 +18,7 @@ package org.apache.lucene.index; */ import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -67,9 +68,9 @@ public class TestIndexFileDeleter extends LuceneTestCase { Term searchTerm = new Term("id", "7"); int delCount = reader.deleteDocuments(searchTerm); assertEquals("didn't delete the right number of documents", 1, delCount); - + Similarity sim = new DefaultSimilarity().get("content"); // Set one norm so we get a .s0 file: - reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f)); + reader.setNorm(21, "content", sim.encodeNormValue(1.5f)); reader.close(); // Now, artificially create an extra .del file & extra diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java index 409141c6a37..41fb07fbf73 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java @@ -39,11 +39,12 @@ import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Similarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -464,8 +465,9 @@ public class TestIndexReader extends LuceneTestCase // expected } + Similarity sim = new DefaultSimilarity().get("aaa"); try { - reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f)); + reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f)); fail("setNorm after close failed to throw IOException"); } catch (AlreadyClosedException e) { // expected @@ -504,8 +506,9 @@ public class TestIndexReader extends LuceneTestCase // expected } + Similarity sim = new DefaultSimilarity().get("aaa"); try { - reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f)); + reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f)); fail("setNorm should have hit LockObtainFailedException"); } catch (LockObtainFailedException e) { // expected @@ -535,7 +538,8 @@ public class TestIndexReader extends LuceneTestCase // now open reader & set norm for doc 0 IndexReader reader = IndexReader.open(dir, false); - reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f)); + Similarity sim = new DefaultSimilarity().get("content"); + reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); // we should be holding the write lock now: assertTrue("locked", IndexWriter.isLocked(dir)); @@ -549,7 +553,7 @@ public class TestIndexReader extends LuceneTestCase IndexReader reader2 = IndexReader.open(dir, false); // set norm again for doc 0 - reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(3.0f)); + reader.setNorm(0, "content", sim.encodeNormValue(3.0f)); assertTrue("locked", IndexWriter.isLocked(dir)); reader.close(); @@ -579,15 +583,16 @@ public class TestIndexReader extends LuceneTestCase addDoc(writer, searchTerm.text()); writer.close(); + Similarity sim = new DefaultSimilarity().get("content"); // now open reader & set norm for doc 0 (writes to // _0_1.s0) reader = IndexReader.open(dir, false); - reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f)); + reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); reader.close(); // now open reader again & set norm for doc 0 (writes to _0_2.s0) reader = IndexReader.open(dir, false); - reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f)); + reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); reader.close(); assertFalse("failed to remove first generation norms file on writing second generation", dir.fileExists("_0_1.s0")); @@ -966,13 +971,13 @@ public class TestIndexReader extends LuceneTestCase dir.setMaxSizeInBytes(thisDiskFree); dir.setRandomIOExceptionRate(rate); - + Similarity sim = new DefaultSimilarity().get("content"); try { if (0 == x) { int docId = 12; for(int i=0;i<13;i++) { reader.deleteDocument(docId); - reader.setNorm(docId, "content", Similarity.getDefault().encodeNormValue(2.0f)); + reader.setNorm(docId, "content", sim.encodeNormValue(2.0f)); docId += 12; } } @@ -1130,8 +1135,9 @@ public class TestIndexReader extends LuceneTestCase } reader = IndexReader.open(dir, false); + Similarity sim = new DefaultSimilarity().get("content"); try { - reader.setNorm(1, "content", Similarity.getDefault().encodeNormValue(2.0f)); + reader.setNorm(1, "content", sim.encodeNormValue(2.0f)); fail("did not hit exception when calling setNorm on an invalid doc number"); } catch (ArrayIndexOutOfBoundsException e) { // expected diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java index 3666b8d9055..7854aac9168 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java @@ -18,6 +18,7 @@ package org.apache.lucene.index; */ import org.apache.lucene.index.SegmentReader.Norm; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -272,13 +273,14 @@ public class TestIndexReaderClone extends LuceneTestCase { * @throws Exception */ private void performDefaultTests(IndexReader r1) throws Exception { - float norm1 = Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]); + Similarity sim = new DefaultSimilarity().get("field1"); + float norm1 = sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]); IndexReader pr1Clone = (IndexReader) r1.clone(); pr1Clone.deleteDocument(10); - pr1Clone.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f)); - assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1); - assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1); + pr1Clone.setNorm(4, "field1", sim.encodeNormValue(0.5f)); + assertTrue(sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1); + assertTrue(sim.decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1); final Bits delDocs = MultiFields.getDeletedDocs(r1); assertTrue(delDocs == null || !delDocs.get(10)); @@ -327,7 +329,8 @@ public class TestIndexReaderClone extends LuceneTestCase { TestIndexReaderReopen.createIndex(random, dir1, false); SegmentReader origSegmentReader = getOnlySegmentReader(IndexReader.open(dir1, false)); origSegmentReader.deleteDocument(1); - origSegmentReader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f)); + Similarity sim = new DefaultSimilarity().get("field1"); + origSegmentReader.setNorm(4, "field1", sim.encodeNormValue(0.5f)); SegmentReader clonedSegmentReader = (SegmentReader) origSegmentReader .clone(); @@ -426,8 +429,9 @@ public class TestIndexReaderClone extends LuceneTestCase { final Directory dir1 = newDirectory(); TestIndexReaderReopen.createIndex(random, dir1, false); IndexReader orig = IndexReader.open(dir1, false); - orig.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(17.0f)); - final byte encoded = Similarity.getDefault().encodeNormValue(17.0f); + Similarity sim = new DefaultSimilarity().get("field1"); + orig.setNorm(1, "field1", sim.encodeNormValue(17.0f)); + final byte encoded = sim.encodeNormValue(17.0f); assertEquals(encoded, MultiNorms.norms(orig, "field1")[1]); // the cloned segmentreader should have 2 references, 1 to itself, and 1 to diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java index fc97f6465e6..77c5d606cf7 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java @@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.SegmentReader.Norm; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -50,7 +51,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { private static final int NUM_FIELDS = 10; - private Similarity similarityOne; + private SimilarityProvider similarityOne; private Analyzer anlzr; @@ -203,19 +204,20 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { IndexReader reader4C = (IndexReader) reader3C.clone(); SegmentReader segmentReader4C = getOnlySegmentReader(reader4C); assertEquals(4, reader3CCNorm.bytesRef().get()); - reader4C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.33f)); + Similarity sim = new DefaultSimilarity().get("field1"); + reader4C.setNorm(5, "field1", sim.encodeNormValue(0.33f)); // generate a cannot update exception in reader1 try { - reader3C.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(0.99f)); + reader3C.setNorm(1, "field1", sim.encodeNormValue(0.99f)); fail("did not hit expected exception"); } catch (Exception ex) { // expected } // norm values should be different - assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5]) - != Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5])); + assertTrue(sim.decodeNormValue(segmentReader3C.norms("field1")[5]) + != sim.decodeNormValue(segmentReader4C.norms("field1")[5])); Norm reader4CCNorm = segmentReader4C.norms.get("field1"); assertEquals(3, reader3CCNorm.bytesRef().get()); assertEquals(1, reader4CCNorm.bytesRef().get()); @@ -223,7 +225,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { IndexReader reader5C = (IndexReader) reader4C.clone(); SegmentReader segmentReader5C = getOnlySegmentReader(reader5C); Norm reader5CCNorm = segmentReader5C.norms.get("field1"); - reader5C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.7f)); + reader5C.setNorm(5, "field1", sim.encodeNormValue(0.7f)); assertEquals(1, reader5CCNorm.bytesRef().get()); reader5C.close(); @@ -237,7 +239,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { private void createIndex(Random random, Directory dir) throws IOException { IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE) - .setMaxBufferedDocs(5).setSimilarity(similarityOne)); + .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne)); LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy(); lmp.setMergeFactor(3); lmp.setUseCompoundFile(true); @@ -256,8 +258,9 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { // System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm); modifiedNorms.set(i, Float.valueOf(newNorm)); modifiedNorms.set(k, Float.valueOf(origNorm)); - ir.setNorm(i, "f" + 1, Similarity.getDefault().encodeNormValue(newNorm)); - ir.setNorm(k, "f" + 1, Similarity.getDefault().encodeNormValue(origNorm)); + Similarity sim = new DefaultSimilarity().get("f" + 1); + ir.setNorm(i, "f" + 1, sim.encodeNormValue(newNorm)); + ir.setNorm(k, "f" + 1, sim.encodeNormValue(origNorm)); // System.out.println("setNorm i: "+i); // break; } @@ -277,7 +280,8 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { assertEquals("number of norms mismatches", numDocNorms, b.length); ArrayList storedNorms = (i == 1 ? modifiedNorms : norms); for (int j = 0; j < b.length; j++) { - float norm = Similarity.getDefault().decodeNormValue(b[j]); + Similarity sim = new DefaultSimilarity().get(field); + float norm = sim.decodeNormValue(b[j]); float norm1 = storedNorms.get(j).floatValue(); assertEquals("stored norm value of " + field + " for doc " + j + " is " + norm + " - a mismatch!", norm, norm1, 0.000001); @@ -289,7 +293,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { throws IOException { IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND) - .setMaxBufferedDocs(5).setSimilarity(similarityOne); + .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); lmp.setMergeFactor(3); lmp.setUseCompoundFile(compound); @@ -303,7 +307,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { // create the next document private Document newDoc() { Document d = new Document(); - float boost = nextNorm(); + float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed for (int i = 0; i < 10; i++) { Field f = newField("f" + i, "v" + i, Store.NO, Index.NOT_ANALYZED); f.setBoost(boost); @@ -313,11 +317,12 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { } // return unique norm values that are unchanged by encoding/decoding - private float nextNorm() { + private float nextNorm(String fname) { float norm = lastNorm + normDelta; + Similarity sim = new DefaultSimilarity().get(fname); do { - float norm1 = Similarity.getDefault().decodeNormValue( - Similarity.getDefault().encodeNormValue(norm)); + float norm1 = sim.decodeNormValue( + sim.encodeNormValue(norm)); if (norm1 > lastNorm) { // System.out.println(norm1+" > "+lastNorm); norm = norm1; diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java index e008f7fdea2..c6bdd8c380f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java @@ -35,9 +35,11 @@ import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -615,8 +617,9 @@ public class TestIndexReaderReopen extends LuceneTestCase { IndexReader reader2 = reader1.reopen(); modifier = IndexReader.open(dir1, false); - modifier.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(50f)); - modifier.setNorm(1, "field2", Similarity.getDefault().encodeNormValue(50f)); + SimilarityProvider sim = new DefaultSimilarity(); + modifier.setNorm(1, "field1", sim.get("field1").encodeNormValue(50f)); + modifier.setNorm(1, "field2", sim.get("field2").encodeNormValue(50f)); modifier.close(); IndexReader reader3 = reader2.reopen(); @@ -709,7 +712,8 @@ public class TestIndexReaderReopen extends LuceneTestCase { protected void modifyIndex(int i) throws IOException { if (i % 3 == 0) { IndexReader modifier = IndexReader.open(dir, false); - modifier.setNorm(i, "field1", Similarity.getDefault().encodeNormValue(50f)); + Similarity sim = new DefaultSimilarity().get("field1"); + modifier.setNorm(i, "field1", sim.encodeNormValue(50f)); modifier.close(); } else if (i % 3 == 1) { IndexReader modifier = IndexReader.open(dir, false); @@ -989,9 +993,10 @@ public class TestIndexReaderReopen extends LuceneTestCase { } case 1: { IndexReader reader = IndexReader.open(dir, false); - reader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(123f)); - reader.setNorm(44, "field2", Similarity.getDefault().encodeNormValue(222f)); - reader.setNorm(44, "field4", Similarity.getDefault().encodeNormValue(22f)); + SimilarityProvider sim = new DefaultSimilarity(); + reader.setNorm(4, "field1", sim.get("field1").encodeNormValue(123f)); + reader.setNorm(44, "field2", sim.get("field2").encodeNormValue(222f)); + reader.setNorm(44, "field4", sim.get("field4").encodeNormValue(22f)); reader.close(); break; } @@ -1012,8 +1017,9 @@ public class TestIndexReaderReopen extends LuceneTestCase { } case 4: { IndexReader reader = IndexReader.open(dir, false); - reader.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(123f)); - reader.setNorm(55, "field2", Similarity.getDefault().encodeNormValue(222f)); + SimilarityProvider sim = new DefaultSimilarity(); + reader.setNorm(5, "field1", sim.get("field1").encodeNormValue(123f)); + reader.setNorm(55, "field2", sim.get("field2").encodeNormValue(222f)); reader.close(); break; } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java index 909e67833d7..1e12d8531fd 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java @@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.index.DocumentsWriter.IndexingChain; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarity; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; @@ -55,7 +55,8 @@ public class TestIndexWriterConfig extends LuceneTestCase { assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode()); - assertTrue(Similarity.getDefault() == conf.getSimilarity()); + // we don't need to assert this, it should be unspecified + assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider()); assertEquals(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.getTermIndexInterval()); assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout()); assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout()); @@ -77,7 +78,7 @@ public class TestIndexWriterConfig extends LuceneTestCase { getters.add("getMaxFieldLength"); getters.add("getMergeScheduler"); getters.add("getOpenMode"); - getters.add("getSimilarity"); + getters.add("getSimilarityProvider"); getters.add("getTermIndexInterval"); getters.add("getWriteLockTimeout"); getters.add("getDefaultWriteLockTimeout"); @@ -173,12 +174,13 @@ public class TestIndexWriterConfig extends LuceneTestCase { conf.setMergeScheduler(null); assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); - // Test Similarity - assertTrue(Similarity.getDefault() == conf.getSimilarity()); - conf.setSimilarity(new MySimilarity()); - assertEquals(MySimilarity.class, conf.getSimilarity().getClass()); - conf.setSimilarity(null); - assertTrue(Similarity.getDefault() == conf.getSimilarity()); + // Test Similarity: + // we shouldnt assert what the default is, just that its not null. + assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider()); + conf.setSimilarityProvider(new MySimilarity()); + assertEquals(MySimilarity.class, conf.getSimilarityProvider().getClass()); + conf.setSimilarityProvider(null); + assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider()); // Test IndexingChain assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); diff --git a/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java b/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java index 15a1fefe823..f3b7f2b3ca8 100644 --- a/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java +++ b/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java @@ -46,7 +46,7 @@ public class TestMaxTermFrequency extends LuceneTestCase { dir = newDirectory(); IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)); - config.setSimilarity(new TestSimilarity()); + config.setSimilarityProvider(new TestSimilarity()); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); Document doc = new Document(); Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); diff --git a/lucene/src/test/org/apache/lucene/index/TestNorms.java b/lucene/src/test/org/apache/lucene/index/TestNorms.java index 78921264628..4ecbc8948d8 100755 --- a/lucene/src/test/org/apache/lucene/index/TestNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestNorms.java @@ -30,6 +30,7 @@ import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -49,7 +50,7 @@ public class TestNorms extends LuceneTestCase { private static final int NUM_FIELDS = 10; - private Similarity similarityOne; + private SimilarityProvider similarityOne; private Analyzer anlzr; private int numDocNorms; private ArrayList norms; @@ -151,7 +152,7 @@ public class TestNorms extends LuceneTestCase { private void createIndex(Random random, Directory dir) throws IOException { IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE) - .setMaxBufferedDocs(5).setSimilarity(similarityOne)); + .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne)); LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy(); lmp.setMergeFactor(3); lmp.setUseCompoundFile(true); @@ -169,8 +170,9 @@ public class TestNorms extends LuceneTestCase { //System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm); modifiedNorms.set(i, Float.valueOf(newNorm)); modifiedNorms.set(k, Float.valueOf(origNorm)); - ir.setNorm(i, "f"+1, Similarity.getDefault().encodeNormValue(newNorm)); - ir.setNorm(k, "f"+1, Similarity.getDefault().encodeNormValue(origNorm)); + Similarity sim = new DefaultSimilarity().get("f"+1); + ir.setNorm(i, "f"+1, sim.encodeNormValue(newNorm)); + ir.setNorm(k, "f"+1, sim.encodeNormValue(origNorm)); } ir.close(); } @@ -184,7 +186,7 @@ public class TestNorms extends LuceneTestCase { assertEquals("number of norms mismatches",numDocNorms,b.length); ArrayList storedNorms = (i==1 ? modifiedNorms : norms); for (int j = 0; j < b.length; j++) { - float norm = similarityOne.decodeNormValue(b[j]); + float norm = similarityOne.get(field).decodeNormValue(b[j]); float norm1 = storedNorms.get(j).floatValue(); assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001); } @@ -195,7 +197,7 @@ public class TestNorms extends LuceneTestCase { private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException { IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND) - .setMaxBufferedDocs(5).setSimilarity(similarityOne)); + .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne)); LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy(); lmp.setMergeFactor(3); lmp.setUseCompoundFile(compound); @@ -208,7 +210,7 @@ public class TestNorms extends LuceneTestCase { // create the next document private Document newDoc() { Document d = new Document(); - float boost = nextNorm(); + float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed for (int i = 0; i < 10; i++) { Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED); f.setBoost(boost); @@ -218,10 +220,11 @@ public class TestNorms extends LuceneTestCase { } // return unique norm values that are unchanged by encoding/decoding - private float nextNorm() { + private float nextNorm(String fname) { float norm = lastNorm + normDelta; + Similarity similarity = similarityOne.get(fname); do { - float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm)); + float norm1 = similarity.decodeNormValue(similarity.encodeNormValue(norm)); if (norm1 > lastNorm) { //System.out.println(norm1+" > "+lastNorm); norm = norm1; @@ -258,7 +261,7 @@ public class TestNorms extends LuceneTestCase { public void testCustomEncoder() throws Exception { Directory dir = newDirectory(); IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); - config.setSimilarity(new CustomNormEncodingSimilarity()); + config.setSimilarityProvider(new CustomNormEncodingSimilarity()); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); Document doc = new Document(); Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); diff --git a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java index be29150b09a..aa693035795 100644 --- a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java +++ b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java @@ -35,13 +35,11 @@ import org.apache.lucene.search.Explanation.IDFExplanation; public class TestOmitTf extends LuceneTestCase { - public static class SimpleSimilarity extends Similarity { + public static class SimpleSimilarity extends Similarity implements SimilarityProvider { @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); } - @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } @Override public float tf(float freq) { return freq; } @Override public float sloppyFreq(int distance) { return 2.0f; } @Override public float idf(int docFreq, int numDocs) { return 1.0f; } - @Override public float coord(int overlap, int maxOverlap) { return 1.0f; } @Override public IDFExplanation idfExplain(Collection terms, IndexSearcher searcher) throws IOException { return new IDFExplanation() { @Override @@ -54,6 +52,11 @@ public class TestOmitTf extends LuceneTestCase { } }; } + public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + public float coord(int overlap, int maxOverlap) { return 1.0f; } + public Similarity get(String field) { + return this; + } } // Tests whether the DocumentWriter correctly enable the @@ -251,7 +254,7 @@ public class TestOmitTf extends LuceneTestCase { dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). setMaxBufferedDocs(2). - setSimilarity(new SimpleSimilarity()). + setSimilarityProvider(new SimpleSimilarity()). setMergePolicy(newLogMergePolicy(2)) ); @@ -281,7 +284,7 @@ public class TestOmitTf extends LuceneTestCase { * Verify the index */ IndexSearcher searcher = new IndexSearcher(dir, true); - searcher.setSimilarity(new SimpleSimilarity()); + searcher.setSimilarityProvider(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); diff --git a/lucene/src/test/org/apache/lucene/index/TestParallelReader.java b/lucene/src/test/org/apache/lucene/index/TestParallelReader.java index 07aa6c5dce7..7c755389d09 100644 --- a/lucene/src/test/org/apache/lucene/index/TestParallelReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestParallelReader.java @@ -147,7 +147,8 @@ public class TestParallelReader extends LuceneTestCase { assertTrue(pr.isCurrent()); IndexReader modifier = IndexReader.open(dir1, false); - modifier.setNorm(0, "f1", Similarity.getDefault().encodeNormValue(100f)); + SimilarityProvider sim = new DefaultSimilarity(); + modifier.setNorm(0, "f1", sim.get("f1").encodeNormValue(100f)); modifier.close(); // one of the two IndexReaders which ParallelReader is using @@ -155,7 +156,7 @@ public class TestParallelReader extends LuceneTestCase { assertFalse(pr.isCurrent()); modifier = IndexReader.open(dir2, false); - modifier.setNorm(0, "f3", Similarity.getDefault().encodeNormValue(100f)); + modifier.setNorm(0, "f3", sim.get("f3").encodeNormValue(100f)); modifier.close(); // now both are not current anymore diff --git a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java index 21d5a59cfb0..db54970334d 100644 --- a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -242,11 +242,6 @@ final class JustCompileSearch { static final class JustCompileSimilarity extends Similarity { - @Override - public float coord(int overlap, int maxOverlap) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - @Override public float idf(int docFreq, int numDocs) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); @@ -257,11 +252,6 @@ final class JustCompileSearch { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - @Override - public float queryNorm(float sumOfSquaredWeights) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - @Override public float sloppyFreq(int distance) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); @@ -270,8 +260,22 @@ final class JustCompileSearch { @Override public float tf(float freq) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + } + + static final class JustCompileSimilarityProvider implements SimilarityProvider { + + public float queryNorm(float sumOfSquaredWeights) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + public float coord(int overlap, int maxOverlap) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Similarity get(String field) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } } static final class JustCompileSpanFilter extends SpanFilter { diff --git a/lucene/src/test/org/apache/lucene/search/QueryUtils.java b/lucene/src/test/org/apache/lucene/search/QueryUtils.java index 1b30da01171..947019ad4a1 100644 --- a/lucene/src/test/org/apache/lucene/search/QueryUtils.java +++ b/lucene/src/test/org/apache/lucene/search/QueryUtils.java @@ -158,7 +158,7 @@ public class QueryUtils { 0 < edge ? r : IndexReader.open(makeEmptyIndex(random, 0), true)) }; IndexSearcher out = new IndexSearcher(new MultiReader(readers)); - out.setSimilarity(s.getSimilarity()); + out.setSimilarityProvider(s.getSimilarityProvider()); return out; } diff --git a/lucene/src/test/org/apache/lucene/search/TestBoolean2.java b/lucene/src/test/org/apache/lucene/search/TestBoolean2.java index f1cb84a51e5..b4dfdbb6b6b 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBoolean2.java +++ b/lucene/src/test/org/apache/lucene/search/TestBoolean2.java @@ -208,9 +208,9 @@ public class TestBoolean2 extends LuceneTestCase { public void testQueries10() throws Exception { String queryText = "+w3 +xx +w2 zz"; int[] expDocNrs = {2, 3}; - Similarity oldSimilarity = searcher.getSimilarity(); + SimilarityProvider oldSimilarity = searcher.getSimilarityProvider(); try { - searcher.setSimilarity(new DefaultSimilarity(){ + searcher.setSimilarityProvider(new DefaultSimilarity(){ @Override public float coord(int overlap, int maxOverlap) { return overlap / ((float)maxOverlap - 1); @@ -218,7 +218,7 @@ public class TestBoolean2 extends LuceneTestCase { }); queriesTest(queryText, expDocNrs); } finally { - searcher.setSimilarity(oldSimilarity); + searcher.setSimilarityProvider(oldSimilarity); } } diff --git a/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java b/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java index da1d75668de..91122a7bad2 100644 --- a/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java +++ b/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java @@ -34,7 +34,7 @@ public class TestComplexExplanations extends TestExplanations { @Override public void setUp() throws Exception { super.setUp(); - searcher.setSimilarity(createQnorm1Similarity()); + searcher.setSimilarityProvider(createQnorm1Similarity()); } // must be static for weight serialization tests diff --git a/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java b/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java index 5849d57354f..574d75b2428 100644 --- a/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java @@ -97,7 +97,7 @@ public class TestConstantScoreQuery extends LuceneTestCase { searcher = new IndexSearcher(reader); // set a similarity that does not normalize our boost away - searcher.setSimilarity(new DefaultSimilarity() { + searcher.setSimilarityProvider(new DefaultSimilarity() { @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; diff --git a/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java index 4f147de3930..e2462e9207c 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java @@ -73,7 +73,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { } } - public Similarity sim = new TestSimilarity(); + public SimilarityProvider sim = new TestSimilarity(); public Directory index; public IndexReader r; public IndexSearcher s; @@ -85,7 +85,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { index = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, index, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setSimilarity(sim)); + .setSimilarityProvider(sim)); // hed is the most important field, dek is secondary @@ -150,7 +150,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { r = new SlowMultiReaderWrapper(writer.getReader()); writer.close(); s = new IndexSearcher(r); - s.setSimilarity(sim); + s.setSimilarityProvider(sim); } @Override diff --git a/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java b/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java index 33b3e5ed297..394f387cb41 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java @@ -69,7 +69,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase { assertEquals("one", ir.document(hits[2].doc).get("key")); // change norm & retest - ir.setNorm(0, "key", Similarity.getDefault().encodeNormValue(400f)); + ir.setNorm(0, "key", is.getSimilarityProvider().get("key").encodeNormValue(400f)); normsQuery = new MatchAllDocsQuery("key"); hits = is.search(normsQuery, null, 1000).scoreDocs; assertEquals(3, hits.length); diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java b/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java index 7b9086fc70f..c79e1e52c90 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java @@ -295,7 +295,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase { IndexReader reader = writer.getReader(); IndexSearcher searcher = new IndexSearcher(reader); - searcher.setSimilarity(new DefaultSimilarity() { + searcher.setSimilarityProvider(new DefaultSimilarity() { @Override public IDFExplanation idfExplain(Collection terms, diff --git a/lucene/src/test/org/apache/lucene/search/TestSetNorm.java b/lucene/src/test/org/apache/lucene/search/TestSetNorm.java index 7339ce6dc7f..7c873bacae4 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSetNorm.java +++ b/lucene/src/test/org/apache/lucene/search/TestSetNorm.java @@ -51,10 +51,11 @@ public class TestSetNorm extends LuceneTestCase { // reset the boost of each instance of this document IndexReader reader = IndexReader.open(store, false); - reader.setNorm(0, "field", Similarity.getDefault().encodeNormValue(1.0f)); - reader.setNorm(1, "field", Similarity.getDefault().encodeNormValue(2.0f)); - reader.setNorm(2, "field", Similarity.getDefault().encodeNormValue(4.0f)); - reader.setNorm(3, "field", Similarity.getDefault().encodeNormValue(16.0f)); + Similarity similarity = new DefaultSimilarity().get("field"); + reader.setNorm(0, "field", similarity.encodeNormValue(1.0f)); + reader.setNorm(1, "field", similarity.encodeNormValue(2.0f)); + reader.setNorm(2, "field", similarity.encodeNormValue(4.0f)); + reader.setNorm(3, "field", similarity.encodeNormValue(16.0f)); reader.close(); // check that searches are ordered by this boost diff --git a/lucene/src/test/org/apache/lucene/search/TestSimilarity.java b/lucene/src/test/org/apache/lucene/search/TestSimilarity.java index c425ef504f6..efd6b5892cd 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSimilarity.java +++ b/lucene/src/test/org/apache/lucene/search/TestSimilarity.java @@ -39,13 +39,11 @@ import org.apache.lucene.search.Explanation.IDFExplanation; */ public class TestSimilarity extends LuceneTestCase { - public static class SimpleSimilarity extends Similarity { + public static class SimpleSimilarity extends Similarity implements SimilarityProvider { @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); } - @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } @Override public float tf(float freq) { return freq; } @Override public float sloppyFreq(int distance) { return 2.0f; } @Override public float idf(int docFreq, int numDocs) { return 1.0f; } - @Override public float coord(int overlap, int maxOverlap) { return 1.0f; } @Override public IDFExplanation idfExplain(Collection terms, IndexSearcher searcher) throws IOException { return new IDFExplanation() { @Override @@ -58,13 +56,18 @@ public class TestSimilarity extends LuceneTestCase { } }; } + public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + public float coord(int overlap, int maxOverlap) { return 1.0f; } + public Similarity get(String field) { + return this; + } } public void testSimilarity() throws Exception { Directory store = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, store, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setSimilarity(new SimpleSimilarity())); + .setSimilarityProvider(new SimpleSimilarity())); Document d1 = new Document(); d1.add(newField("field", "a c", Field.Store.YES, Field.Index.ANALYZED)); @@ -78,7 +81,7 @@ public class TestSimilarity extends LuceneTestCase { writer.close(); IndexSearcher searcher = new IndexSearcher(reader); - searcher.setSimilarity(new SimpleSimilarity()); + searcher.setSimilarityProvider(new SimpleSimilarity()); Term a = new Term("field", "a"); Term b = new Term("field", "b"); diff --git a/lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java b/lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java new file mode 100644 index 00000000000..d626b2d9a48 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java @@ -0,0 +1,151 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MultiNorms; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestSimilarityProvider extends LuceneTestCase { + private Directory directory; + private IndexReader reader; + private IndexSearcher searcher; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + SimilarityProvider sim = new ExampleSimilarityProvider(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer()).setSimilarityProvider(sim); + RandomIndexWriter iw = new RandomIndexWriter(random, directory, iwc); + Document doc = new Document(); + Field field = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(field); + Field field2 = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(field2); + + field.setValue("quick brown fox"); + field2.setValue("quick brown fox"); + iw.addDocument(doc); + field.setValue("jumps over lazy brown dog"); + field2.setValue("jumps over lazy brown dog"); + iw.addDocument(doc); + reader = iw.getReader(); + iw.close(); + searcher = new IndexSearcher(reader); + searcher.setSimilarityProvider(sim); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testBasics() throws Exception { + // sanity check of norms writer + byte fooNorms[] = MultiNorms.norms(reader, "foo"); + byte barNorms[] = MultiNorms.norms(reader, "bar"); + for (int i = 0; i < fooNorms.length; i++) { + assertFalse(fooNorms[i] == barNorms[i]); + } + + // sanity check of searching + TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10); + assertTrue(foodocs.totalHits > 0); + TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10); + assertTrue(bardocs.totalHits > 0); + assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score); + } + + private class ExampleSimilarityProvider implements SimilarityProvider { + private Similarity sim1 = new Sim1(); + private Similarity sim2 = new Sim2(); + + public float coord(int overlap, int maxOverlap) { + return 1f; + } + + public float queryNorm(float sumOfSquaredWeights) { + return 1f; + } + + public Similarity get(String field) { + if (field.equals("foo")) { + return sim1; + } else { + return sim2; + } + } + } + + private class Sim1 extends Similarity { + @Override + public float computeNorm(String field, FieldInvertState state) { + return 1f; + } + + @Override + public float sloppyFreq(int distance) { + return 1f; + } + + @Override + public float tf(float freq) { + return 1f; + } + + @Override + public float idf(int docFreq, int numDocs) { + return 1f; + } + } + + private class Sim2 extends Similarity { + @Override + public float computeNorm(String field, FieldInvertState state) { + return 10f; + } + + @Override + public float sloppyFreq(int distance) { + return 10f; + } + + @Override + public float tf(float freq) { + return 10f; + } + + @Override + public float idf(int docFreq, int numDocs) { + return 10f; + } + } +} diff --git a/lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java b/lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java index 2c856d160c4..07221a2c6b1 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java @@ -27,7 +27,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.util.English; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; @@ -111,13 +111,13 @@ public class PayloadHelper { * @throws IOException */ // TODO: randomize - public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException { + public IndexSearcher setUp(Random random, SimilarityProvider similarity, int numDocs) throws IOException { Directory directory = new MockDirectoryWrapper(random, new RAMDirectory()); PayloadAnalyzer analyzer = new PayloadAnalyzer(); // TODO randomize this IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity)); + TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity)); // writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); @@ -130,7 +130,7 @@ public class PayloadHelper { writer.close(); IndexSearcher searcher = new IndexSearcher(reader); - searcher.setSimilarity(similarity); + searcher.setSimilarityProvider(similarity); return searcher; } diff --git a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java index 24189b77fff..996d434c805 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java @@ -105,7 +105,7 @@ public class TestPayloadNearQuery extends LuceneTestCase { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) - .setSimilarity(similarity)); + .setSimilarityProvider(similarity)); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); @@ -118,7 +118,7 @@ public class TestPayloadNearQuery extends LuceneTestCase { writer.close(); searcher = new IndexSearcher(reader); - searcher.setSimilarity(similarity); + searcher.setSimilarityProvider(similarity); } @Override diff --git a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java index a478ba41a37..c2837b8ca56 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java @@ -110,7 +110,7 @@ public class TestPayloadTermQuery extends LuceneTestCase { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) - .setSimilarity(similarity)); + .setSimilarityProvider(similarity)); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); @@ -125,7 +125,7 @@ public class TestPayloadTermQuery extends LuceneTestCase { writer.close(); searcher = new IndexSearcher(reader); - searcher.setSimilarity(similarity); + searcher.setSimilarityProvider(similarity); } @Override @@ -220,7 +220,7 @@ public class TestPayloadTermQuery extends LuceneTestCase { new MaxPayloadFunction(), false); IndexSearcher theSearcher = new IndexSearcher(directory, true); - theSearcher.setSimilarity(new FullSimilarity()); + theSearcher.setSimilarityProvider(new FullSimilarity()); TopDocs hits = searcher.search(query, null, 100); assertTrue("hits is null and it shouldn't be", hits != null); assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java b/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java index b795f9ba66f..0b04340d8a3 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java @@ -39,7 +39,7 @@ import org.apache.lucene.index.Payload; import org.apache.lucene.index.Term; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.payloads.PayloadHelper; @@ -50,7 +50,7 @@ import org.apache.lucene.util.LuceneTestCase; public class TestPayloadSpans extends LuceneTestCase { private IndexSearcher searcher; - private Similarity similarity = new DefaultSimilarity(); + private SimilarityProvider similarity = new DefaultSimilarity(); protected IndexReader indexReader; private IndexReader closeIndexReader; private Directory directory; @@ -110,7 +110,7 @@ public class TestPayloadSpans extends LuceneTestCase { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity)); Document doc = new Document(); doc.add(newField(PayloadHelper.FIELD, "one two three one four three", @@ -370,7 +370,7 @@ public class TestPayloadSpans extends LuceneTestCase { public void testPayloadSpanUtil() throws Exception { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity)); Document doc = new Document(); doc.add(newField(PayloadHelper.FIELD,"xx rr yy mm pp", Field.Store.YES, Field.Index.ANALYZED)); @@ -430,7 +430,7 @@ public class TestPayloadSpans extends LuceneTestCase { directory = newDirectory(); String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"}; RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity)); Document doc = null; for(int i = 0; i < docs.length; i++) { diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java index d3c8ea5fb80..768b62d51c3 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -20,9 +20,9 @@ package org.apache.lucene.search.spans; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Query; import org.apache.lucene.search.CheckHits; -import org.apache.lucene.search.Similarity; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Weight.ScorerContext; @@ -410,17 +410,17 @@ public class TestSpans extends LuceneTestCase { for (int i = 0; i < leaves.length; i++) { - final Similarity sim = new DefaultSimilarity() { + final SimilarityProvider sim = new DefaultSimilarity() { @Override public float sloppyFreq(int distance) { return 0.0f; } }; - final Similarity oldSim = searcher.getSimilarity(); + final SimilarityProvider oldSim = searcher.getSimilarityProvider(); Scorer spanScorer; try { - searcher.setSimilarity(sim); + searcher.setSimilarityProvider(sim); SpanNearQuery snq = new SpanNearQuery( new SpanQuery[] { makeSpanTermQuery("t1"), @@ -430,7 +430,7 @@ public class TestSpans extends LuceneTestCase { spanScorer = snq.weight(searcher).scorer(leaves[i], ScorerContext.def()); } finally { - searcher.setSimilarity(oldSim); + searcher.setSimilarityProvider(oldSim); } if (i == subIndex) { assertTrue("first doc", spanScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/solr/src/java/org/apache/solr/schema/IndexSchema.java b/solr/src/java/org/apache/solr/schema/IndexSchema.java index 42528160e95..df47bbf4de0 100644 --- a/solr/src/java/org/apache/solr/schema/IndexSchema.java +++ b/solr/src/java/org/apache/solr/schema/IndexSchema.java @@ -20,7 +20,8 @@ package org.apache.solr.schema; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.util.Version; import org.apache.solr.common.ResourceLoader; @@ -192,7 +193,7 @@ public final class IndexSchema { /** * Returns the Similarity used for this index */ - public Similarity getSimilarity() { return similarityFactory.getSimilarity(); } + public SimilarityProvider getSimilarityProvider() { return similarityFactory.getSimilarityProvider(); } /** * Returns the SimilarityFactory used for this index @@ -496,8 +497,8 @@ public final class IndexSchema { Node node = (Node) xpath.evaluate("/schema/similarity", document, XPathConstants.NODE); if (node==null) { similarityFactory = new SimilarityFactory() { - public Similarity getSimilarity() { - return Similarity.getDefault(); + public SimilarityProvider getSimilarityProvider() { + return IndexSearcher.getDefaultSimilarityProvider(); } }; log.debug("using default similarity"); @@ -509,10 +510,10 @@ public final class IndexSchema { similarityFactory = (SimilarityFactory)obj; similarityFactory.init(params); } else { - // just like always, assume it's a Similarlity and get a ClassCastException - reasonable error handling + // just like always, assume it's a SimilarityProvider and get a ClassCastException - reasonable error handling similarityFactory = new SimilarityFactory() { - public Similarity getSimilarity() { - return (Similarity) obj; + public SimilarityProvider getSimilarityProvider() { + return (SimilarityProvider) obj; } }; } diff --git a/solr/src/java/org/apache/solr/schema/SimilarityFactory.java b/solr/src/java/org/apache/solr/schema/SimilarityFactory.java index 25b03a0ad68..f2e29d575b5 100644 --- a/solr/src/java/org/apache/solr/schema/SimilarityFactory.java +++ b/solr/src/java/org/apache/solr/schema/SimilarityFactory.java @@ -16,7 +16,7 @@ package org.apache.solr.schema; * limitations under the License. */ -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.solr.common.params.SolrParams; public abstract class SimilarityFactory { @@ -25,5 +25,5 @@ public abstract class SimilarityFactory { public void init(SolrParams params) { this.params = params; } public SolrParams getParams() { return params; } - public abstract Similarity getSimilarity(); + public abstract SimilarityProvider getSimilarityProvider(); } diff --git a/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java b/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java index 0c6c67ad43f..653b9c93beb 100755 --- a/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java +++ b/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java @@ -55,13 +55,11 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery { } protected class ConstantWeight extends Weight { - private Similarity similarity; private float queryNorm; private float queryWeight; private Map context; public ConstantWeight(IndexSearcher searcher) throws IOException { - this.similarity = searcher.getSimilarity(); this.context = ValueSource.newContext(searcher); if (filter instanceof SolrFilter) ((SolrFilter)filter).createWeight(context, searcher); diff --git a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java index d874f13722a..fbc4cedac08 100644 --- a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -132,7 +132,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { } this.closeReader = closeReader; - setSimilarity(schema.getSimilarity()); + setSimilarityProvider(schema.getSimilarityProvider()); SolrConfig solrConfig = core.getSolrConfig(); queryResultWindowSize = solrConfig.queryResultWindowSize; diff --git a/solr/src/java/org/apache/solr/search/function/IDFValueSource.java b/solr/src/java/org/apache/solr/search/function/IDFValueSource.java index a9543b1c893..522e94623f3 100755 --- a/solr/src/java/org/apache/solr/search/function/IDFValueSource.java +++ b/solr/src/java/org/apache/solr/search/function/IDFValueSource.java @@ -41,7 +41,7 @@ public class IDFValueSource extends DocFreqValueSource { @Override public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); - Similarity sim = searcher.getSimilarity(); + Similarity sim = searcher.getSimilarityProvider().get(field); // todo: we need docFreq that takes a BytesRef String strVal = ByteUtils.UTF8toUTF16(indexedBytes); int docfreq = searcher.docFreq(new Term(indexedField, strVal)); diff --git a/solr/src/java/org/apache/solr/search/function/NormValueSource.java b/solr/src/java/org/apache/solr/search/function/NormValueSource.java index 004bd14be71..107cb268c5d 100755 --- a/solr/src/java/org/apache/solr/search/function/NormValueSource.java +++ b/solr/src/java/org/apache/solr/search/function/NormValueSource.java @@ -46,7 +46,7 @@ public class NormValueSource extends ValueSource { @Override public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); - final Similarity similarity = searcher.getSimilarity(); + final Similarity similarity = searcher.getSimilarityProvider().get(field); final byte[] norms = readerContext.reader.norms(field); if (norms == null) { return new ConstDoubleDocValues(0.0, this); diff --git a/solr/src/java/org/apache/solr/search/function/TFValueSource.java b/solr/src/java/org/apache/solr/search/function/TFValueSource.java index cffb2264c59..c37a4949c18 100755 --- a/solr/src/java/org/apache/solr/search/function/TFValueSource.java +++ b/solr/src/java/org/apache/solr/search/function/TFValueSource.java @@ -25,7 +25,7 @@ public class TFValueSource extends TermFreqValueSource { public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { Fields fields = readerContext.reader.fields(); final Terms terms = fields.terms(field); - final Similarity similarity = ((IndexSearcher)context.get("searcher")).getSimilarity(); + final Similarity similarity = ((IndexSearcher)context.get("searcher")).getSimilarityProvider().get(field); return new FloatDocValues(this) { DocsEnum docs ; diff --git a/solr/src/java/org/apache/solr/update/SolrIndexConfig.java b/solr/src/java/org/apache/solr/update/SolrIndexConfig.java index 467b274c8ac..9c86dbc98ba 100644 --- a/solr/src/java/org/apache/solr/update/SolrIndexConfig.java +++ b/solr/src/java/org/apache/solr/update/SolrIndexConfig.java @@ -153,7 +153,7 @@ public class SolrIndexConfig { if (writeLockTimeout != -1) iwc.setWriteLockTimeout(writeLockTimeout); - iwc.setSimilarity(schema.getSimilarity()); + iwc.setSimilarityProvider(schema.getSimilarityProvider()); iwc.setMergePolicy(buildMergePolicy(schema)); iwc.setMergeScheduler(buildMergeScheduler(schema)); diff --git a/solr/src/test/org/apache/solr/schema/CustomSimilarityFactory.java b/solr/src/test/org/apache/solr/schema/CustomSimilarityFactory.java index a770296b44b..88296e00b03 100644 --- a/solr/src/test/org/apache/solr/schema/CustomSimilarityFactory.java +++ b/solr/src/test/org/apache/solr/schema/CustomSimilarityFactory.java @@ -16,10 +16,10 @@ */ package org.apache.solr.schema; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; public class CustomSimilarityFactory extends SimilarityFactory { - public Similarity getSimilarity() { + public SimilarityProvider getSimilarityProvider() { return new MockConfigurableSimilarity(params.get("echo")); } } diff --git a/solr/src/test/org/apache/solr/schema/IndexSchemaTest.java b/solr/src/test/org/apache/solr/schema/IndexSchemaTest.java index cb4b90a1c34..cb176d8dee8 100644 --- a/solr/src/test/org/apache/solr/schema/IndexSchemaTest.java +++ b/solr/src/test/org/apache/solr/schema/IndexSchemaTest.java @@ -27,7 +27,7 @@ import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.core.SolrCore; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.junit.BeforeClass; import org.junit.Test; @@ -83,7 +83,7 @@ public class IndexSchemaTest extends SolrTestCaseJ4 { @Test public void testSimilarityFactory() { SolrCore core = h.getCore(); - Similarity similarity = core.getSchema().getSimilarity(); + SimilarityProvider similarity = core.getSchema().getSimilarityProvider(); assertTrue("wrong class", similarity instanceof MockConfigurableSimilarity); assertEquals("is there an echo?", ((MockConfigurableSimilarity)similarity).getPassthrough()); }