LUCENE-2959: add state of the art ranking to Lucene

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1169470 13f79535-47bb-0310-9956-ffa450edef68
2011-09-11 15:47:21 +00:00 · 2011-09-11 15:47:21 +00:00 · cfaf91c739
parent 82649a21b4
commit cfaf91c739
141 changed files with 4345 additions and 183 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -495,6 +495,34 @@ New features
 * LUCENE-3423: add Terms.getDocCount(), which returns the number of documents
  that have at least one term for a field.  (Yonik Seeley, Robert Muir)

+* LUCENE-2959: Added a variety of different relevance ranking systems to Lucene.
+
+  - Added Okapi BM25, Language Models, Divergence from Randomness, and 
+    Information-Based Models. The models are pluggable, support all of lucene's 
+    features (boosts, slops, explanations, etc) and queries (spans, etc).
+
+  - All models default to the same index-time norm encoding as DefaultSimilarity: 
+    so you can easily try these out/switch back and forth/run experiments and 
+    comparisons without reindexing. Note: most of the models do rely upon index
+    statistics that are new in Lucene 4.0, so for existing 3.x indexes its a good
+    idea to upgrade your index to the new format with IndexUpgrader first.
+
+  - Added a new subclass SimilarityBase which provides a simplified API 
+    for plugging in new ranking algorithms without dealing with all of the
+    nuances and implementation details of Lucene. 
+
+  - Added a new helper class BasicSimilarityProvider that just applies one
+    scoring algorithm to all fields, with queryNorm() and coord() returning 1.
+    In general, it is recommended to disable coord() when using the new models.
+    For example, to use BM25 for all fields: 
+     searcher.setSimilarityProvider(new BasicSimilarityProvider(new BM25Similarity()));
+
+    If you instead want to apply different similarities (e.g. ones with different
+    parameter values or different algorithms entirely) to different fields, implement
+    SimilarityProvider with your per-field logic.
+
+  (David Mark Nemeskey via Robert Muir)
+
 Optimizations

 * LUCENE-2588: Don't store unnecessary suffixes when writing the terms
--- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
+++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
@ -43,7 +43,7 @@ import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermVectorOffsetInfo;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.CollectionUtil;
 import org.apache.lucene.util.AttributeImpl;
--- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@ -57,8 +57,8 @@ import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.RAMDirectory; // for javadocs
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
--- a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java
@ -22,9 +22,9 @@ import java.util.Date;
 import java.util.List;
 import java.util.ArrayList;

-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Bits;
--- a/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java
@ -17,7 +17,7 @@

 package org.apache.lucene.misc;

-import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.index.FieldInvertState;

 /**
--- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
+++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
@ -26,13 +26,13 @@ import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.DefaultSimilarityProvider;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.SimilarityProvider;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

--- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java
+++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java
@ -18,11 +18,11 @@

 package org.apache.lucene.misc;

-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.DefaultSimilarityProvider;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.TFIDFSimilarity;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
+import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.index.FieldInvertState;

--- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
+++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
@ -31,13 +31,13 @@ import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.MultiNorms;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.DefaultSimilarityProvider;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.SimilarityProvider;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

--- a/lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java
+++ b/lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java
@ -31,6 +31,8 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.*;
+import org.apache.lucene.search.similarities.TFIDFSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
--- a/lucene/src/java/org/apache/lucene/document/Field.java
+++ b/lucene/src/java/org/apache/lucene/document/Field.java
@ -223,14 +223,14 @@ public class Field implements IndexableField {
   * document.
   *
   * <p>The boost is used to compute the norm factor for the field.  By
-   * default, in the {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, 
+   * default, in the {@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState)} method, 
   * the boost value is multiplied by the length normalization factor and then
-   * rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
+   * rounded by {@link org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
   * index.  One should attempt to ensure that this product does not overflow
   * the range of that encoding.
   *
-   * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
-   * @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)
+   * @see org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState)
+   * @see org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)
   */
  public void setBoost(float boost) {
    this.boost = boost;
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
@ -32,7 +32,7 @@ import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
 import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
 import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;

--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@ -26,7 +26,7 @@ import java.text.NumberFormat;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FlushInfo;
 import org.apache.lucene.store.IOContext;
--- a/lucene/src/java/org/apache/lucene/index/IndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java
@ -32,7 +32,7 @@ import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.codecs.PerDocValues;
 import org.apache.lucene.index.values.IndexDocValues;
 import org.apache.lucene.search.FieldCache; // javadocs
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
@ -1012,7 +1012,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
   *
   * @see #norms(String)
   * @see Similarity#computeNorm(FieldInvertState)
-   * @see org.apache.lucene.search.DefaultSimilarity#decodeNormValue(byte)
+   * @see org.apache.lucene.search.similarities.DefaultSimilarity#decodeNormValue(byte)
   * @throws StaleReaderException if the index has changed
   *  since this reader was opened
   * @throws CorruptIndexException if the index is corrupt
--- a/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
@ -22,7 +22,7 @@ import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
 import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.util.Version;

 /**
--- a/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
+++ b/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
@ -17,7 +17,7 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.ArrayUtil;

 /** Taps into DocInverter, as an InvertedDocEndConsumer,
--- a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
@ -24,7 +24,8 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
-import org.apache.lucene.search.Similarity.ExactDocScorer;
+import org.apache.lucene.search.similarities.SimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
 import org.apache.lucene.search.TermQuery.TermWeight;

 import java.io.IOException;
--- a/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java
+++ b/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java
@ -24,6 +24,7 @@ import java.util.List;

 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery.BooleanWeight;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.search.Scorer.ChildScorer;

 /* See the description in BooleanScorer.java, comparing
--- a/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
@ -18,7 +18,7 @@ package org.apache.lucene.search;
 */

 import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.Similarity.ExactDocScorer;
+import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
 import org.apache.lucene.util.ArrayUtil;
 import java.io.IOException;
 import java.util.Comparator;
--- a/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Arrays;

 import org.apache.lucene.index.*;
+import org.apache.lucene.search.similarities.Similarity;

 final class ExactPhraseScorer extends Scorer {
  private final int endMinus1;
--- a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
@ -38,6 +38,8 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Weight.ScorerContext;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.NIOFSDirectory;    // javadoc
 import org.apache.lucene.util.ReaderUtil;
--- a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@ -26,7 +26,8 @@ import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.search.Similarity.SloppyDocScorer;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TermContext;
@ -164,8 +165,7 @@ public class MultiPhraseQuery extends Query {

    @Override
    public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
-      if (termArrays.size() == 0)                  // optimize zero-term case
-        return null;
+      assert !termArrays.isEmpty();
      final IndexReader reader = context.reader;
      final Bits liveDocs = reader.getLiveDocs();
      
@ -249,7 +249,11 @@ public class MultiPhraseQuery extends Query {

  @Override
  public Query rewrite(IndexReader reader) {
-    if (termArrays.size() == 1) {                 // optimize one-term case
+    if (termArrays.isEmpty()) {
+      BooleanQuery bq = new BooleanQuery();
+      bq.setBoost(getBoost());
+      return bq;
+    } else if (termArrays.size() == 1) {                 // optimize one-term case
      Term[] terms = termArrays.get(0);
      BooleanQuery boq = new BooleanQuery(true);
      for (int i=0; i<terms.length; i++) {
--- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
@ -29,7 +29,8 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.Similarity.SloppyDocScorer;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TermContext;
 import org.apache.lucene.util.ToStringUtils;
@ -119,7 +120,11 @@ public class PhraseQuery extends Query {

  @Override
  public Query rewrite(IndexReader reader) throws IOException {
-    if (terms.size() == 1) {
+    if (terms.isEmpty()) {
+      BooleanQuery bq = new BooleanQuery();
+      bq.setBoost(getBoost());
+      return bq;
+    } else if (terms.size() == 1) {
      TermQuery tq = new TermQuery(terms.get(0));
      tq.setBoost(getBoost());
      return tq;
@ -208,8 +213,7 @@ public class PhraseQuery extends Query {

    @Override
    public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
-      if (terms.size() == 0)			  // optimize zero-term case
-        return null;
+      assert !terms.isEmpty();
      final IndexReader reader = context.reader;
      final Bits liveDocs = reader.getLiveDocs();
      PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];
@ -285,12 +289,6 @@ public class PhraseQuery extends Query {

  @Override
  public Weight createWeight(IndexSearcher searcher) throws IOException {
-    if (terms.size() == 1) {			  // optimize one-term case
-      Term term = terms.get(0);
-      Query termQuery = new TermQuery(term);
-      termQuery.setBoost(getBoost());
-      return termQuery.createWeight(searcher);
-    }
    return new PhraseWeight(searcher);
  }

--- a/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
@ -19,6 +19,8 @@ package org.apache.lucene.search;

 import java.io.IOException;

+import org.apache.lucene.search.similarities.Similarity;
+
 /** Expert: Scoring functionality for phrase queries.
 * <br>A document is considered matching if it contains the phrase-query terms  
 * at "valid" positions. What "valid positions" are
--- a/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@ -20,6 +20,8 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.LinkedHashSet;

+import org.apache.lucene.search.similarities.Similarity;
+
 final class SloppyPhraseScorer extends PhraseScorer {
    private int slop;
    private PhrasePositions repeats[];
--- a/lucene/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/TermQuery.java
@ -28,7 +28,8 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.Similarity.ExactDocScorer;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TermContext;
 import org.apache.lucene.util.ReaderUtil;
--- a/lucene/src/java/org/apache/lucene/search/TermScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/TermScorer.java
@ -20,6 +20,7 @@ package org.apache.lucene.search;
 import java.io.IOException;

 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.search.similarities.Similarity;

 /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
 */
--- a/lucene/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/src/java/org/apache/lucene/search/Weight.java
@ -22,6 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader.ReaderContext;
+import org.apache.lucene.search.similarities.SimilarityProvider;

 /**
 * Expert: Calculate query weights and build query scorers.
--- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
@ -22,10 +22,10 @@ import org.apache.lucene.search.ComplexExplanation;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.DefaultSimilarity; // javadocs only
 import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.Similarity.SloppyDocScorer;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
 import org.apache.lucene.search.spans.NearSpansOrdered;
 import org.apache.lucene.search.spans.NearSpansUnordered;
 import org.apache.lucene.search.spans.SpanNearQuery;
@ -52,7 +52,7 @@ import java.util.Iterator;
 * <p/>
 * Payload scores are aggregated using a pluggable {@link PayloadFunction}.
 * 
- * @see org.apache.lucene.search.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef)
+ * @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef)
 */
 public class PayloadNearQuery extends SpanNearQuery {
  protected String fieldName;
--- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
@ -20,16 +20,16 @@ package org.apache.lucene.search.payloads;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.search.DefaultSimilarity; // javadocs only
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.Similarity;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.ComplexExplanation;
-import org.apache.lucene.search.Similarity.SloppyDocScorer;
 import org.apache.lucene.search.Weight.ScorerContext;
 import org.apache.lucene.search.payloads.PayloadNearQuery.PayloadNearSpanScorer;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
 import org.apache.lucene.search.spans.TermSpans;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.search.spans.SpanWeight;
@ -49,7 +49,7 @@ import java.io.IOException;
 * which returns 1 by default.
 * <p/>
 * Payload scores are aggregated using a pluggable {@link PayloadFunction}.
- * @see org.apache.lucene.search.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef)
+ * @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef)
 **/
 public class PayloadTermQuery extends SpanTermQuery {
  protected PayloadFunction function;
--- a/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java
@ -0,0 +1,63 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * This class acts as the base class for the implementations of the <em>first
+ * normalization of the informative content</em> in the DFR framework. This
+ * component is also called the <em>after effect</em> and is defined by the
+ * formula <em>Inf<sub>2</sub> = 1 - Prob<sub>2</sub></em>, where
+ * <em>Prob<sub>2</sub></em> measures the <em>information gain</em>.
+ * 
+ * @see DFRSimilarity
+ * @lucene.experimental
+ */
+public abstract class AfterEffect {
+  /** Returns the aftereffect score. */
+  public abstract float score(BasicStats stats, float tfn);
+  
+  /** Returns an explanation for the score. */
+  public abstract Explanation explain(BasicStats stats, float tfn);
+
+  /** Implementation used when there is no aftereffect. */
+  public static final class NoAfterEffect extends AfterEffect {
+    @Override
+    public final float score(BasicStats stats, float tfn) {
+      return 1f;
+    }
+
+    @Override
+    public final Explanation explain(BasicStats stats, float tfn) {
+      return new Explanation(1, "no aftereffect");
+    }
+    
+    @Override
+    public String toString() {
+      return "";
+    }
+  }
+  
+  /**
+   * Subclasses must override this method to return the code of the
+   * after effect formula. Refer to the original paper for the list. 
+   */
+  @Override
+  public abstract String toString();
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
@ -0,0 +1,49 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * Model of the information gain based on the ratio of two Bernoulli processes.
+ * @lucene.experimental
+ */
+public class AfterEffectB extends AfterEffect {
+  @Override
+  public final float score(BasicStats stats, float tfn) {
+    long F = stats.getTotalTermFreq();
+    int n = stats.getDocFreq();
+    return (F + 1) / (n * (tfn + 1));
+  }
+  
+  @Override
+  public final Explanation explain(BasicStats stats, float tfn) {
+    Explanation result = new Explanation();
+    result.setDescription(getClass().getSimpleName() + ", computed from: ");
+    result.setValue(score(stats, tfn));
+    result.addDetail(new Explanation(tfn, "tfn"));
+    result.addDetail(new Explanation(stats.getTotalTermFreq(), "totalTermFreq"));
+    result.addDetail(new Explanation(stats.getDocFreq(), "docFreq"));
+    return result;
+  }
+
+  @Override
+  public String toString() {
+    return "B";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
@ -0,0 +1,45 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * Model of the information gain based on Laplace's law of succession.
+ * @lucene.experimental
+ */
+public class AfterEffectL extends AfterEffect {
+  @Override
+  public final float score(BasicStats stats, float tfn) {
+    return 1 / (tfn + 1);
+  }
+  
+  @Override
+  public final Explanation explain(BasicStats stats, float tfn) {
+    Explanation result = new Explanation();
+    result.setDescription(getClass().getSimpleName() + ", computed from: ");
+    result.setValue(score(stats, tfn));
+    result.addDetail(new Explanation(tfn, "tfn"));
+    return result;
+  }
+  
+  @Override
+  public String toString() {
+    return "L";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
@ -0,0 +1,339 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.SmallFloat;
+import org.apache.lucene.util.TermContext;
+
+/**
+ * BM25 Similarity. Introduced in Stephen E. Robertson, Steve Walker,
+ * Susan Jones, Micheline Hancock-Beaulieu, and Mike Gatford. Okapi at TREC-3.
+ * In Proceedings of the Third Text REtrieval Conference (TREC 1994).
+ * Gaithersburg, USA, November 1994.
+ * @lucene.experimental
+ */
+public class BM25Similarity extends Similarity {
+  private final float k1;
+  private final float b;
+  // TODO: should we add a delta like sifaka.cs.uiuc.edu/~ylv2/pub/sigir11-bm25l.pdf ?
+
+  public BM25Similarity(float k1, float b) {
+    this.k1 = k1;
+    this.b  = b;
+  }
+  
+  /** BM25 with these default values:
+   * <ul>
+   *   <li>{@code k1 = 1.2},
+   *   <li>{@code b = 0.75}.</li>
+   * </ul>
+   */
+  public BM25Similarity() {
+    this.k1 = 1.2f;
+    this.b  = 0.75f;
+  }
+  
+  /** Implemented as <code>log(1 + (numDocs - docFreq + 0.5)/(docFreq + 0.5))</code>. */
+  protected float idf(int docFreq, int numDocs) {
+    return (float) Math.log(1 + (numDocs - docFreq + 0.5D)/(docFreq + 0.5D));
+  }
+  
+  /** Implemented as <code>1 / (distance + 1)</code>. */
+  protected float sloppyFreq(int distance) {
+    return 1.0f / (distance + 1);
+  }
+  
+  /** The default implementation returns <code>1</code> */
+  protected float scorePayload(int doc, int start, int end, BytesRef payload) {
+    return 1;
+  }
+  
+  /** The default implementation computes the average as <code>sumTotalTermFreq / maxDoc</code>,
+   * or returns <code>1</code> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
+   * or any field that omits frequency information). */
+  protected float avgFieldLength(IndexSearcher searcher, String field) throws IOException {
+    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), field);
+    if (terms == null) {
+      // field does not exist;
+      return 1f;
+    }
+    long sumTotalTermFreq = terms.getSumTotalTermFreq();
+    long maxdoc = searcher.maxDoc();
+    return sumTotalTermFreq == -1 ? 1f : (float) (sumTotalTermFreq / (double) maxdoc);
+  }
+  
+  /** The default implementation encodes <code>boost / sqrt(length)</code>
+   * with {@link SmallFloat#floatToByte315(float)}.  This is compatible with 
+   * Lucene's default implementation.  If you change this, then you should 
+   * change {@link #decodeNormValue(byte)} to match. */
+  protected byte encodeNormValue(float boost, int fieldLength) {
+    return SmallFloat.floatToByte315(boost / (float) Math.sqrt(fieldLength));
+  }
+
+  /** The default implementation returns <code>1 / f<sup>2</sup></code>
+   * where <code>f</code> is {@link SmallFloat#byte315ToFloat(byte)}. */
+  protected float decodeNormValue(byte b) {
+    return NORM_TABLE[b & 0xFF];
+  }
+  
+  // Default true
+  protected boolean discountOverlaps = true;
+
+  /** Determines whether overlap tokens (Tokens with 0 position increment) are 
+   *  ignored when computing norm.  By default this is true, meaning overlap
+   *  tokens do not count when computing norms. */
+  public void setDiscountOverlaps(boolean v) {
+    discountOverlaps = v;
+  }
+
+  /** @see #setDiscountOverlaps */
+  public boolean getDiscountOverlaps() {
+    return discountOverlaps;
+  }
+  
+  /** Cache of decoded bytes. */
+  private static final float[] NORM_TABLE = new float[256];
+
+  static {
+    for (int i = 0; i < 256; i++) {
+      float f = SmallFloat.byte315ToFloat((byte)i);
+      NORM_TABLE[i] = 1.0f / (f*f);
+    }
+  }
+
+  @Override
+  public final byte computeNorm(FieldInvertState state) {
+    final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
+    return encodeNormValue(state.getBoost(), numTerms);
+  }
+
+  public Explanation idfExplain(TermContext stats, final IndexSearcher searcher) throws IOException {
+    final int df = stats.docFreq();
+    final int max = searcher.maxDoc();
+    final float idf = idf(df, max);
+    return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
+  }
+
+  public Explanation idfExplain(final TermContext stats[], IndexSearcher searcher) throws IOException {
+    final int max = searcher.maxDoc();
+    float idf = 0.0f;
+    final Explanation exp = new Explanation();
+    exp.setDescription("idf(), sum of:");
+    for (final TermContext stat : stats ) {
+      final int df = stat.docFreq();
+      final float termIdf = idf(df, max);
+      exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
+      idf += termIdf;
+    }
+    exp.setValue(idf);
+    return exp;
+  }
+
+  @Override
+  public final Stats computeStats(IndexSearcher searcher, String fieldName, float queryBoost, TermContext... termStats) throws IOException {
+    Explanation idf = termStats.length == 1 ? idfExplain(termStats[0], searcher) : idfExplain(termStats, searcher);
+
+    float avgdl = avgFieldLength(searcher, fieldName);
+
+    // compute freq-independent part of bm25 equation across all norm values
+    float cache[] = new float[256];
+    for (int i = 0; i < cache.length; i++) {
+      cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
+    }
+    return new BM25Stats(idf, queryBoost, avgdl, cache);
+  }
+
+  @Override
+  public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
+    final byte[] norms = context.reader.norms(fieldName);
+    return norms == null 
+      ? new ExactBM25DocScorerNoNorms((BM25Stats)stats)
+      : new ExactBM25DocScorer((BM25Stats)stats, norms);
+  }
+
+  @Override
+  public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
+    return new SloppyBM25DocScorer((BM25Stats) stats, context.reader.norms(fieldName));
+  }
+  
+  private class ExactBM25DocScorer extends ExactDocScorer {
+    private final BM25Stats stats;
+    private final float weightValue;
+    private final byte[] norms;
+    private final float[] cache;
+    
+    ExactBM25DocScorer(BM25Stats stats, byte norms[]) {
+      assert norms != null;
+      this.stats = stats;
+      this.weightValue = stats.weight * (k1 + 1); // boost * idf * (k1 + 1)
+      this.cache = stats.cache;
+      this.norms = norms;
+    }
+    
+    @Override
+    public float score(int doc, int freq) {
+      return weightValue * freq / (freq + cache[norms[doc] & 0xFF]);
+    }
+    
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      return explainScore(doc, freq, stats, norms);
+    }
+  }
+  
+  /** there are no norms, we act as if b=0 */
+  private class ExactBM25DocScorerNoNorms extends ExactDocScorer {
+    private final BM25Stats stats;
+    private final float weightValue;
+    private static final int SCORE_CACHE_SIZE = 32;
+    private float[] scoreCache = new float[SCORE_CACHE_SIZE];
+
+    ExactBM25DocScorerNoNorms(BM25Stats stats) {
+      this.stats = stats;
+      this.weightValue = stats.weight * (k1 + 1); // boost * idf * (k1 + 1)
+      for (int i = 0; i < SCORE_CACHE_SIZE; i++)
+        scoreCache[i] = weightValue * i / (i + k1);
+    }
+    
+    @Override
+    public float score(int doc, int freq) {
+      // TODO: maybe score cache is more trouble than its worth?
+      return freq < SCORE_CACHE_SIZE        // check cache
+        ? scoreCache[freq]                  // cache hit
+        : weightValue * freq / (freq + k1); // cache miss
+    }
+    
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      return explainScore(doc, freq, stats, null);
+    }
+  }
+  
+  private class SloppyBM25DocScorer extends SloppyDocScorer {
+    private final BM25Stats stats;
+    private final float weightValue; // boost * idf * (k1 + 1)
+    private final byte[] norms;
+    private final float[] cache;
+    
+    SloppyBM25DocScorer(BM25Stats stats, byte norms[]) {
+      this.stats = stats;
+      this.weightValue = stats.weight * (k1 + 1);
+      this.cache = stats.cache;
+      this.norms = norms;
+    }
+    
+    @Override
+    public float score(int doc, float freq) {
+      // if there are no norms, we act as if b=0
+      float norm = norms == null ? k1 : cache[norms[doc] & 0xFF];
+      return weightValue * freq / (freq + norm);
+    }
+    
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      return explainScore(doc, freq, stats, norms);
+    }
+
+    @Override
+    public float computeSlopFactor(int distance) {
+      return sloppyFreq(distance);
+    }
+
+    @Override
+    public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+      return scorePayload(doc, start, end, payload);
+    }
+  }
+  
+  /** Collection statistics for the BM25 model. */
+  private static class BM25Stats extends Stats {
+    /** BM25's idf */
+    private final Explanation idf;
+    /** The average document length. */
+    private final float avgdl;
+    /** query's inner boost */
+    private final float queryBoost;
+    /** weight (idf * boost) */
+    private float weight;
+    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
+    private final float cache[];
+
+    BM25Stats(Explanation idf, float queryBoost, float avgdl, float cache[]) {
+      this.idf = idf;
+      this.queryBoost = queryBoost;
+      this.avgdl = avgdl;
+      this.cache = cache;
+    }
+
+    @Override
+    public float getValueForNormalization() {
+      // we return a TF-IDF like normalization to be nice, but we don't actually normalize ourselves.
+      final float queryWeight = idf.getValue() * queryBoost;
+      return queryWeight * queryWeight;
+    }
+
+    @Override
+    public void normalize(float queryNorm, float topLevelBoost) {
+      // we don't normalize with queryNorm at all, we just capture the top-level boost
+      this.weight = idf.getValue() * queryBoost * topLevelBoost;
+    } 
+  }
+  
+  private Explanation explainScore(int doc, Explanation freq, BM25Stats stats, byte[] norms) {
+    Explanation result = new Explanation();
+    result.setDescription("score(doc="+doc+",freq="+freq+"), product of:");
+    
+    Explanation boostExpl = new Explanation(stats.queryBoost, "boost");
+    if (stats.queryBoost != 1.0f)
+      result.addDetail(boostExpl);
+    
+    result.addDetail(stats.idf);
+
+    Explanation tfNormExpl = new Explanation();
+    tfNormExpl.setDescription("tfNorm, computed from:");
+    tfNormExpl.addDetail(freq);
+    tfNormExpl.addDetail(new Explanation(k1, "parameter k1"));
+    if (norms == null) {
+      tfNormExpl.addDetail(new Explanation(0, "parameter b (norms omitted for field)"));
+      tfNormExpl.setValue((freq.getValue() * (k1 + 1)) / (freq.getValue() + k1));
+    } else {
+      float doclen = decodeNormValue(norms[doc]);
+      tfNormExpl.addDetail(new Explanation(b, "parameter b"));
+      tfNormExpl.addDetail(new Explanation(stats.avgdl, "avgFieldLength"));
+      tfNormExpl.addDetail(new Explanation(doclen, "fieldLength"));
+      tfNormExpl.setValue((freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)));
+    }
+    result.addDetail(tfNormExpl);
+    result.setValue(boostExpl.getValue() * stats.idf.getValue() * tfNormExpl.getValue());
+    return result;
+  }
+
+  @Override
+  public String toString() {
+    return "BM25(k1=" + k1 + ",b=" + b + ")";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java
@ -0,0 +1,60 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * This class acts as the base class for the specific <em>basic model</em>
+ * implementations in the DFR framework. Basic models compute the
+ * <em>informative content Inf<sub>1</sub> = -log<sub>2</sub>Prob<sub>1</sub>
+ * </em>.
+ * 
+ * @see DFRSimilarity
+ * @lucene.experimental
+ */
+public abstract class BasicModel {
+  /** Returns the informative content score. */
+  public abstract float score(BasicStats stats, float tfn);
+  
+  /**
+   * Returns an explanation for the score.
+   * <p>Most basic models use the number of documents and the total term
+   * frequency to compute Inf<sub>1</sub>. This method provides a generic
+   * explanation for such models. Subclasses that use other statistics must
+   * override this method.</p>
+   */
+  public Explanation explain(BasicStats stats, float tfn) {
+    Explanation result = new Explanation();
+    result.setDescription(getClass().getSimpleName() + ", computed from: ");
+    result.setValue(score(stats, tfn));
+    result.addDetail(new Explanation(tfn, "tfn"));
+    result.addDetail(
+        new Explanation(stats.getNumberOfDocuments(), "numberOfDocuments"));
+    result.addDetail(
+        new Explanation(stats.getTotalTermFreq(), "totalTermFreq"));
+    return result;
+  }
+  
+  /**
+   * Subclasses must override this method to return the code of the
+   * basic model formula. Refer to the original paper for the list. 
+   */
+  @Override
+  public abstract String toString();
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
@ -0,0 +1,47 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
+
+/**
+ * Limiting form of the Bose-Einstein model. The formula used in Lucene differs
+ * slightly from the one in the original paper: {@code F} is increased by {@code tfn}
+ * and {@code N} is increased by {@code F} 
+ * @lucene.experimental
+ */
+public class BasicModelBE extends BasicModel {
+  @Override
+  public final float score(BasicStats stats, float tfn) {
+    double F = stats.getTotalTermFreq() + tfn;
+    // approximation only holds true when F << N, so we use N += F
+    double N = F + stats.getNumberOfDocuments();
+    return (float)(-log2((N - 1) * Math.E)
+        + f(N + F - 1, N + F - tfn - 2) - f(F, F - tfn));
+  }
+  
+  /** The <em>f</em> helper function defined for <em>B<sub>E</sub></em>. */
+  private final double f(double n, double m) {
+    return (m + 0.5) * log2(n / m) + (n - m) * log2(n);
+  }
+  
+  @Override
+  public String toString() {
+    return "Be";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java
@ -0,0 +1,52 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
+
+/**
+ * Implements the approximation of the binomial model with the divergence
+ * for DFR. The formula used in Lucene differs slightly from the one in the
+ * original paper: to avoid underflow for small values of {@code N} and
+ * {@code F}, {@code N} is increased by {@code 1} and
+ * {@code F} is always increased by {@code tfn}.
+ * <p>
+ * WARNING: for terms that do not meet the expected random distribution
+ * (e.g. stopwords), this model may give poor performance, such as
+ * abnormally high scores for low tf values.
+ * @lucene.experimental
+ */
+public class BasicModelD extends BasicModel {
+  @Override
+  public final float score(BasicStats stats, float tfn) {
+    // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative,
+    // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq
+    // to create a 'normalized' F.
+    double F = stats.getTotalTermFreq() + tfn;
+    double phi = (double)tfn / F;
+    double nphi = 1 - phi;
+    double p = 1.0 / (stats.getNumberOfDocuments() + 1);
+    double D = phi * log2(phi / p) + nphi * log2(nphi / (1 - p));
+    return (float)(D * F + 0.5 * log2(1 + 2 * Math.PI * tfn * nphi));
+  }
+  
+  @Override
+  public String toString() {
+    return "D";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java
@ -0,0 +1,41 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
+
+/**
+ * Geometric as limiting form of the Bose-Einstein model.  The formula used in Lucene differs
+ * slightly from the one in the original paper: {@code F} is increased by {@code tfn}
+ * and {@code N} is increased by {@code F}.
+ * @lucene.experimental
+ */
+public class BasicModelG extends BasicModel {
+  @Override
+  public final float score(BasicStats stats, float tfn) {
+    // just like in BE, approximation only holds true when F << N, so we use lambda = F / (N + F)
+    double lambda = stats.getTotalTermFreq() / (double) (stats.getNumberOfDocuments() + stats.getTotalTermFreq());
+    // -log(1 / (lambda + 1)) -> log(lambda + 1)
+    return (float)(log2(lambda + 1) + tfn * log2((1 + lambda) / lambda));
+  }
+
+  @Override
+  public String toString() {
+    return "G";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
@ -0,0 +1,38 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
+
+/**
+ * An approximation of the <em>I(n<sub>e</sub>)</em> model.
+ * @lucene.experimental
+ */ 
+public class BasicModelIF extends BasicModel {
+  @Override
+  public final float score(BasicStats stats, float tfn) {
+    int N = stats.getNumberOfDocuments();
+    long F = stats.getTotalTermFreq();
+    return tfn * (float)(log2(1 + (N + 1) / (F + 0.5)));
+  }
+
+  @Override
+  public String toString() {
+    return "I(F)";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
@ -0,0 +1,52 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
+
+/**
+ * The basic tf-idf model of randomness.
+ * @lucene.experimental
+ */ 
+public class BasicModelIn extends BasicModel {
+  @Override
+  public final float score(BasicStats stats, float tfn) {
+    int N = stats.getNumberOfDocuments();
+    int n = stats.getDocFreq();
+    return tfn * (float)(log2((N + 1) / (n + 0.5)));
+  }
+  
+  @Override
+  public final Explanation explain(BasicStats stats, float tfn) {
+    Explanation result = new Explanation();
+    result.setDescription(getClass().getSimpleName() + ", computed from: ");
+    result.setValue(score(stats, tfn));
+    result.addDetail(new Explanation(tfn, "tfn"));
+    result.addDetail(
+        new Explanation(stats.getNumberOfDocuments(), "numberOfDocuments"));
+    result.addDetail(
+        new Explanation(stats.getDocFreq(), "docFreq"));
+    return result;
+  }
+
+  @Override
+  public String toString() {
+    return "I(n)";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
@ -0,0 +1,40 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
+
+/**
+ * Tf-idf model of randomness, based on a mixture of Poisson and inverse
+ * document frequency.
+ * @lucene.experimental
+ */ 
+public class BasicModelIne extends BasicModel {
+  @Override
+  public final float score(BasicStats stats, float tfn) {
+    int N = stats.getNumberOfDocuments();
+    long F = stats.getTotalTermFreq();
+    double ne = N * (1 - Math.pow((N - 1) / (double)N, F));
+    return tfn * (float)(log2((N + 1) / (ne + 0.5)));
+  }
+
+  @Override
+  public String toString() {
+    return "I(ne)";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java
@ -0,0 +1,46 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
+
+/**
+ * Implements the Poisson approximation for the binomial model for DFR.
+ * @lucene.experimental
+ * <p>
+ * WARNING: for terms that do not meet the expected random distribution
+ * (e.g. stopwords), this model may give poor performance, such as
+ * abnormally high scores for low tf values.
+ */
+public class BasicModelP extends BasicModel {
+  /** {@code log2(Math.E)}, precomputed. */
+  protected static double LOG2_E = log2(Math.E);
+  
+  @Override
+  public final float score(BasicStats stats, float tfn) {
+    float lambda = (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
+    return (float)(tfn * log2(tfn / lambda)
+        + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E
+        + 0.5 * log2(2 * Math.PI * tfn));
+  }
+
+  @Override
+  public String toString() {
+    return "P";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicSimilarityProvider.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicSimilarityProvider.java
@ -0,0 +1,54 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A simple {@link Similarity} provider that returns in
+ * {@code get(String field)} the object passed to its constructor. This class
+ * is aimed at non-VSM models, and therefore both the {@link #coord} and
+ * {@link #queryNorm} methods return {@code 1}. Use
+ * {@link DefaultSimilarityProvider} for {@link DefaultSimilarity}.
+ * @lucene.experimental
+ */
+public class BasicSimilarityProvider implements SimilarityProvider {
+  private final Similarity sim;
+  
+  public BasicSimilarityProvider(Similarity sim) {
+    this.sim = sim;
+  }
+  
+  @Override
+  public float coord(int overlap, int maxOverlap) {
+    return 1f;
+  }
+
+  @Override
+  public float queryNorm(float sumOfSquaredWeights) {
+    return 1f;
+  }
+
+  @Override
+  public Similarity get(String field) {
+    return sim;
+  }
+
+  @Override
+  public String toString() {
+    return "BasicSimilarityProvider(" + sim + ")";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java
@ -0,0 +1,144 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Terms;
+
+/**
+ * Stores all statistics commonly used ranking methods.
+ * @lucene.experimental
+ */
+public class BasicStats extends Similarity.Stats {
+  /** The number of documents. */
+  protected int numberOfDocuments;
+  /** The total number of tokens in the field. */
+  protected long numberOfFieldTokens;
+  /** The average field length. */
+  protected float avgFieldLength;
+  /** The document frequency. */
+  protected int docFreq;
+  /** The total number of occurrences of this term across all documents. */
+  protected long totalTermFreq;
+  
+  // -------------------------- Boost-related stuff --------------------------
+  
+  /** Query's inner boost. */
+  protected final float queryBoost;
+  /** Any outer query's boost. */
+  protected float topLevelBoost;
+  /** For most Similarities, the immediate and the top level query boosts are
+   * not handled differently. Hence, this field is just the product of the
+   * other two. */
+  protected float totalBoost;
+  
+  /** Constructor. Sets the query boost. */
+  public BasicStats(float queryBoost) {
+    this.queryBoost = queryBoost;
+    this.totalBoost = queryBoost;
+  }
+  
+  // ------------------------- Getter/setter methods -------------------------
+  
+  /** Returns the number of documents. */
+  public int getNumberOfDocuments() {
+    return numberOfDocuments;
+  }
+  
+  /** Sets the number of documents. */
+  public void setNumberOfDocuments(int numberOfDocuments) {
+    this.numberOfDocuments = numberOfDocuments;
+  }
+  
+  /**
+   * Returns the total number of tokens in the field.
+   * @see Terms#getSumTotalTermFreq()
+   */
+  public long getNumberOfFieldTokens() {
+    return numberOfFieldTokens;
+  }
+  
+  /**
+   * Sets the total number of tokens in the field.
+   * @see Terms#getSumTotalTermFreq()
+   */
+  public void setNumberOfFieldTokens(long numberOfFieldTokens) {
+    this.numberOfFieldTokens = numberOfFieldTokens;
+  }
+  
+  /** Returns the average field length. */
+  public float getAvgFieldLength() {
+    return avgFieldLength;
+  }
+  
+  /** Sets the average field length. */
+  public void setAvgFieldLength(float avgFieldLength) {
+    this.avgFieldLength = avgFieldLength;
+  }
+  
+  /** Returns the document frequency. */
+  public int getDocFreq() {
+    return docFreq;
+  }
+  
+  /** Sets the document frequency. */
+  public void setDocFreq(int docFreq) {
+    this.docFreq = docFreq;
+  }
+  
+  /** Returns the total number of occurrences of this term across all documents. */
+  public long getTotalTermFreq() {
+    return totalTermFreq;
+  }
+  
+  /** Sets the total number of occurrences of this term across all documents. */
+  public void setTotalTermFreq(long totalTermFreq) {
+    this.totalTermFreq = totalTermFreq;
+  }
+  
+  // -------------------------- Boost-related stuff --------------------------
+  
+  /** The square of the raw normalization value.
+   * @see #rawNormalizationValue() */
+  @Override
+  public float getValueForNormalization() {
+    float rawValue = rawNormalizationValue();
+    return rawValue * rawValue;
+  }
+  
+  /** Computes the raw normalization value. This basic implementation returns
+   * the query boost. Subclasses may override this method to include other
+   * factors (such as idf), or to save the value for inclusion in
+   * {@link #normalize(float, float)}, etc.
+   */
+  protected float rawNormalizationValue() {
+    return queryBoost;
+  }
+  
+  /** No normalization is done. {@code topLevelBoost} is saved in the object,
+   * however. */
+  @Override
+  public void normalize(float queryNorm, float topLevelBoost) {
+    this.topLevelBoost = topLevelBoost;
+    totalBoost = queryBoost * topLevelBoost;
+  }
+  
+  /** Returns the total boost. */
+  public float getTotalBoost() {
+    return totalBoost;
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
@ -0,0 +1,86 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * Implements the <em>divergence from randomness (DFR)</em> framework
+ * introduced in Gianni Amati and Cornelis Joost Van Rijsbergen. 2002.
+ * Probabilistic models of information retrieval based on measuring the
+ * divergence from randomness. ACM Trans. Inf. Syst. 20, 4 (October 2002),
+ * 357-389.
+ * <p>The DFR scoring formula is composed of three separate components: the
+ * <em>basic model</em>, the <em>aftereffect</em> and an additional
+ * <em>normalization</em> component, represented by the classes
+ * {@code BasicModel}, {@code AfterEffect} and {@code Normalization},
+ * respectively. The names of these classes were chosen to match the names of
+ * their counterparts in the Terrier IR engine.</p>
+ * <p>Note that <em>qtf</em>, the multiplicity of term-occurrence in the query,
+ * is not handled by this implementation.</p>
+ * @see BasicModel
+ * @see AfterEffect
+ * @see Normalization
+ * @lucene.experimental
+ */
+public class DFRSimilarity extends SimilarityBase {
+  /** The basic model for information content. */
+  protected final BasicModel basicModel;
+  /** The first normalization of the information content. */
+  protected final AfterEffect afterEffect;
+  /** The term frequency normalization. */
+  protected final Normalization normalization;
+  
+  public DFRSimilarity(BasicModel basicModel,
+                       AfterEffect afterEffect,
+                       Normalization normalization) {
+    if (basicModel == null || afterEffect == null || normalization == null) {
+      throw new NullPointerException("null parameters not allowed.");
+    }
+    this.basicModel = basicModel;
+    this.afterEffect = afterEffect;
+    this.normalization = normalization;
+  }
+
+  @Override
+  protected float score(BasicStats stats, float freq, float docLen) {
+    float tfn = normalization.tfn(stats, freq, docLen);
+    return stats.getTotalBoost() *
+        basicModel.score(stats, tfn) * afterEffect.score(stats, tfn);
+  }
+  
+  @Override
+  protected void explain(Explanation expl,
+      BasicStats stats, int doc, float freq, float docLen) {
+    if (stats.getTotalBoost() != 1.0f) {
+      expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
+    }
+    
+    Explanation normExpl = normalization.explain(stats, freq, docLen);
+    float tfn = normExpl.getValue();
+    expl.addDetail(normExpl);
+    expl.addDetail(basicModel.explain(stats, tfn));
+    expl.addDetail(afterEffect.explain(stats, tfn));
+  }
+
+  @Override
+  public String toString() {
+    return "DFR " + basicModel.toString() + afterEffect.toString()
+                  + normalization.toString();
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java
@ -1,4 +1,4 @@
-package org.apache.lucene.search;
+package org.apache.lucene.search.similarities;

 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.util.BytesRef;
@ -85,4 +85,9 @@ public class DefaultSimilarity extends TFIDFSimilarity {
  public boolean getDiscountOverlaps() {
    return discountOverlaps;
  }
+
+  @Override
+  public String toString() {
+    return "DefaultSimilarity";
+  }
 }
--- a/lucene/src/java/org/apache/lucene/search/similarities/DefaultSimilarityProvider.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/DefaultSimilarityProvider.java
@ -1,4 +1,5 @@
-package org.apache.lucene.search;
+package org.apache.lucene.search.similarities;
+

 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
--- a/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java
@ -0,0 +1,45 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * The probabilistic distribution used to model term occurrence
+ * in information-based models.
+ * @see IBSimilarity
+ * @lucene.experimental
+ */
+public abstract class Distribution {
+  /** Computes the score. */
+  public abstract float score(BasicStats stats, float tfn, float lambda);
+  
+  /** Explains the score. Returns the name of the model only, since
+   * both {@code tfn} and {@code lambda} are explained elsewhere. */
+  public Explanation explain(BasicStats stats, float tfn, float lambda) {
+    return new Explanation(
+        score(stats, tfn, lambda), getClass().getSimpleName());
+  }
+  
+  /**
+   * Subclasses must override this method to return the name of the
+   * distribution. 
+   */
+  @Override
+  public abstract String toString();
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java
@ -0,0 +1,37 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Log-logistic distribution.
+ * <p>Unlike for DFR, the natural logarithm is used, as
+ * it is faster to compute and the original paper does not express any
+ * preference to a specific base.</p>
+ * @lucene.experimental
+ */
+public class DistributionLL extends Distribution {
+  @Override
+  public final float score(BasicStats stats, float tfn, float lambda) {
+    return (float)-Math.log(lambda / (tfn + lambda));
+  }
+  
+  @Override
+  public String toString() {
+    return "LL";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
@ -0,0 +1,42 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The smoothed power-law (SPL) distribution for the information-based framework
+ * that is described in the original paper.
+ * <p>Unlike for DFR, the natural logarithm is used, as
+ * it is faster to compute and the original paper does not express any
+ * preference to a specific base.</p>
+ * @lucene.experimental
+ */
+public class DistributionSPL extends Distribution {
+  @Override
+  public final float score(BasicStats stats, float tfn, float lambda) {
+    if (lambda == 1f) {
+      lambda = 0.99f;
+    }
+    return (float)-Math.log(
+        (Math.pow(lambda, (tfn / (tfn + 1))) - lambda) / (1 - lambda));
+  }
+  
+  @Override
+  public String toString() {
+    return "SPL";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
@ -0,0 +1,94 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * Provides a framework for the family of information-based models, as described
+ * in St&eacute;phane Clinchant and Eric Gaussier. 2010. Information-based
+ * models for ad hoc IR. In Proceeding of the 33rd international ACM SIGIR
+ * conference on Research and development in information retrieval (SIGIR '10).
+ * ACM, New York, NY, USA, 234-241.
+ * <p>The retrieval function is of the form <em>RSV(q, d) = &sum;
+ * -x<sup>q</sup><sub>w</sub> log Prob(X<sub>w</sub> &ge;
+ * t<sup>d</sup><sub>w</sub> | &lambda;<sub>w</sub>)</em>, where
+ * <ul>
+ *   <li><em>x<sup>q</sup><sub>w</sub></em> is the query boost;</li>
+ *   <li><em>X<sub>w</sub></em> is a random variable that counts the occurrences
+ *   of word <em>w</em>;</li>
+ *   <li><em>t<sup>d</sup><sub>w</sub></em> is the normalized term frequency;</li>
+ *   <li><em>&lambda;<sub>w</sub></em> is a parameter.</li>
+ * </ul>
+ * </p>
+ * <p>The framework described in the paper has many similarities to the DFR
+ * framework (see {@link DFRSimilarity}). It is possible that the two
+ * Similarities will be merged at one point.</p>
+ * @lucene.experimental 
+ */
+public class IBSimilarity extends SimilarityBase {
+  /** The probabilistic distribution used to model term occurrence. */
+  protected final Distribution distribution;
+  /** The <em>lambda (&lambda;<sub>w</sub>)</em> parameter. */
+  protected final Lambda lambda;
+  /** The term frequency normalization. */
+  protected final Normalization normalization;
+  
+  public IBSimilarity(Distribution distribution,
+                      Lambda lambda,
+                      Normalization normalization) {
+    this.distribution = distribution;
+    this.lambda = lambda;
+    this.normalization = normalization;
+  }
+  
+  @Override
+  protected float score(BasicStats stats, float freq, float docLen) {
+    return stats.getTotalBoost() *
+        distribution.score(
+            stats,
+            normalization.tfn(stats, freq, docLen),
+            lambda.lambda(stats));
+  }
+
+  @Override
+  protected void explain(
+      Explanation expl, BasicStats stats, int doc, float freq, float docLen) {
+    if (stats.getTotalBoost() != 1.0f) {
+      expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
+    }
+    Explanation normExpl = normalization.explain(stats, freq, docLen);
+    Explanation lambdaExpl = lambda.explain(stats);
+    expl.addDetail(normExpl);
+    expl.addDetail(lambdaExpl);
+    expl.addDetail(distribution.explain(
+        stats, normExpl.getValue(), lambdaExpl.getValue()));
+  }
+  
+  /**
+   * The name of IB methods follow the pattern
+   * {@code IB <distribution> <lambda><normalization>}. The name of the
+   * distribution is the same as in the original paper; for the names of lambda
+   * parameters, refer to the javadoc of the {@link Lambda} classes.
+   */
+  @Override
+  public String toString() {
+    return "IB " + distribution.toString() + "-" + lambda.toString()
+                 + normalization.toString();
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
@ -0,0 +1,97 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * Bayesian smoothing using Dirichlet priors. From Chengxiang Zhai and John
+ * Lafferty. 2001. A study of smoothing methods for language models applied to
+ * Ad Hoc information retrieval. In Proceedings of the 24th annual international
+ * ACM SIGIR conference on Research and development in information retrieval
+ * (SIGIR '01). ACM, New York, NY, USA, 334-342.
+ * <p>
+ * The formula as defined the paper assigns a negative score to documents that
+ * contain the term, but with fewer occurrences than predicted by the collection
+ * language model. The Lucene implementation returns {@code 0} for such
+ * documents.
+ * </p>
+ * 
+ * @lucene.experimental
+ */
+public class LMDirichletSimilarity extends LMSimilarity {
+  /** The &mu; parameter. */
+  private final float mu;
+  
+  /** @param mu the &mu; parameter. */
+  public LMDirichletSimilarity(CollectionModel collectionModel, float mu) {
+    super(collectionModel);
+    this.mu = mu;
+  }
+  
+  /** @param mu the &mu; parameter. */
+  public LMDirichletSimilarity(float mu) {
+    this.mu = mu;
+  }
+
+  /** Instantiates the similarity with the default &mu; value of 2000. */
+  public LMDirichletSimilarity(CollectionModel collectionModel) {
+    this(collectionModel, 2000);
+  }
+  
+  /** Instantiates the similarity with the default &mu; value of 2000. */
+  public LMDirichletSimilarity() {
+    this(2000);
+  }
+  
+  @Override
+  protected float score(BasicStats stats, float freq, float docLen) {
+    float score = stats.getTotalBoost() * (float)(Math.log(1 + freq /
+        (mu * ((LMStats)stats).getCollectionProbability())) +
+        Math.log(mu / (docLen + mu)));
+    return score > 0.0f ? score : 0.0f;
+  }
+  
+  @Override
+  protected void explain(Explanation expl, BasicStats stats, int doc,
+      float freq, float docLen) {
+    if (stats.getTotalBoost() != 1.0f) {
+      expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
+    }
+
+    expl.addDetail(new Explanation(mu, "mu"));
+    Explanation weightExpl = new Explanation();
+    weightExpl.setValue((float)Math.log(1 + freq /
+        (mu * ((LMStats)stats).getCollectionProbability())));
+    weightExpl.setDescription("term weight");
+    expl.addDetail(weightExpl);
+    expl.addDetail(new Explanation(
+        (float)Math.log(mu / (docLen + mu)), "document norm"));
+    super.explain(expl, stats, doc, freq, docLen);
+  }
+
+  /** Returns the &mu; parameter. */
+  public float getMu() {
+    return mu;
+  }
+  
+  @Override
+  public String getName() {
+    return String.format("Dirichlet(%f)", getMu());
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
@ -0,0 +1,77 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * Language model based on the Jelinek-Mercer smoothing method. From Chengxiang
+ * Zhai and John Lafferty. 2001. A study of smoothing methods for language
+ * models applied to Ad Hoc information retrieval. In Proceedings of the 24th
+ * annual international ACM SIGIR conference on Research and development in
+ * information retrieval (SIGIR '01). ACM, New York, NY, USA, 334-342.
+ * <p>The model has a single parameter, &lambda;. According to said paper, the
+ * optimal value depends on both the collection and the query. The optimal value
+ * is around {@code 0.1} for title queries and {@code 0.7} for long queries.</p>
+ *
+ * @lucene.experimental
+ */
+public class LMJelinekMercerSimilarity extends LMSimilarity {
+  /** The &lambda; parameter. */
+  private final float lambda;
+  
+  /** @param lambda the &lambda; parameter. */
+  public LMJelinekMercerSimilarity(
+      CollectionModel collectionModel, float lambda) {
+    super(collectionModel);
+    this.lambda = lambda;
+  }
+
+  /** @param lambda the &lambda; parameter. */
+  public LMJelinekMercerSimilarity(float lambda) {
+    this.lambda = lambda;
+  }
+  
+  @Override
+  protected float score(BasicStats stats, float freq, float docLen) {
+    return stats.getTotalBoost() *
+        (float)Math.log(1 +
+            ((1 - lambda) * freq / docLen) /
+            (lambda * ((LMStats)stats).getCollectionProbability()));
+  }
+  
+  @Override
+  protected void explain(Explanation expl, BasicStats stats, int doc,
+      float freq, float docLen) {
+    if (stats.getTotalBoost() != 1.0f) {
+      expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
+    }
+    expl.addDetail(new Explanation(lambda, "lambda"));
+    super.explain(expl, stats, doc, freq, docLen);
+  }
+
+  /** Returns the &lambda; parameter. */
+  public float getLambda() {
+    return lambda;
+  }
+
+  @Override
+  public String getName() {
+    return String.format("Jelinek-Mercer(%f)", getLambda());
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
@ -0,0 +1,155 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.TermContext;
+
+/**
+ * Abstract superclass for language modeling Similarities. The following inner
+ * types are introduced:
+ * <ul>
+ *   <li>{@link LMStats}, which defines a new statistic, the probability that
+ *   the collection language model generates the current term;</li>
+ *   <li>{@link CollectionModel}, which is a strategy interface for object that
+ *   compute the collection language model {@code p(w|C)};</li>
+ *   <li>{@link DefaultCollectionModel}, an implementation of the former, that
+ *   computes the term probability as the number of occurrences of the term in the
+ *   collection, divided by the total number of tokens.</li>
+ * </ul> 
+ * 
+ * @lucene.experimental
+ */
+public abstract class LMSimilarity extends SimilarityBase {
+  /** The collection model. */
+  protected final CollectionModel collectionModel;
+  
+  /** Creates a new instance with the specified collection language model. */
+  public LMSimilarity(CollectionModel collectionModel) {
+    this.collectionModel = collectionModel;
+  }
+  
+  /** Creates a new instance with the default collection language model. */
+  public LMSimilarity() {
+    this(new DefaultCollectionModel());
+  }
+  
+  @Override
+  protected BasicStats newStats(float queryBoost) {
+    return new LMStats(queryBoost);
+  }
+
+  /**
+   * Computes the collection probability of the current term in addition to the
+   * usual statistics.
+   */
+  @Override
+  protected void fillBasicStats(BasicStats stats, IndexSearcher searcher, String fieldName, TermContext termContext) throws IOException {
+    super.fillBasicStats(stats, searcher, fieldName, termContext);
+    LMStats lmStats = (LMStats) stats;
+    lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
+  }
+
+  @Override
+  protected void explain(Explanation expl, BasicStats stats, int doc,
+      float freq, float docLen) {
+    expl.addDetail(new Explanation(collectionModel.computeProbability(stats),
+                                   "collection probability"));
+  }
+  
+  /**
+   * Returns the name of the LM method. The values of the parameters should be
+   * included as well.
+   * <p>Used in {@link #toString()}</p>.
+   */
+  public abstract String getName();
+  
+  /**
+   * Returns the name of the LM method. If a custom collection model strategy is
+   * used, its name is included as well.
+   * @see #getName()
+   * @see CollectionModel#getName()
+   * @see DefaultCollectionModel 
+   */
+  @Override
+  public String toString() {
+    String coll = collectionModel.getName();
+    if (coll != null) {
+      return String.format("LM %s - %s", getName(), coll);
+    } else {
+      return String.format("LM %s", getName());
+    }
+  }
+
+  /** Stores the collection distribution of the current term. */
+  public static class LMStats extends BasicStats {
+    /** The probability that the current term is generated by the collection. */
+    private float collectionProbability;
+    
+    public LMStats(float queryBoost) {
+      super(queryBoost);
+    }
+    
+    /**
+     * Returns the probability that the current term is generated by the
+     * collection.
+     */
+    public final float getCollectionProbability() {
+      return collectionProbability;
+    }
+    
+    /**
+     * Sets the probability that the current term is generated by the
+     * collection.
+     */
+    public final void setCollectionProbability(float collectionProbability) {
+      this.collectionProbability = collectionProbability;
+    } 
+  }
+  
+  /** A strategy for computing the collection language model. */
+  public static interface CollectionModel {
+    /**
+     * Computes the probability {@code p(w|C)} according to the language model
+     * strategy for the current term.
+     */
+    public float computeProbability(BasicStats stats);
+    
+    /** The name of the collection model strategy. */
+    public String getName();
+  }
+  
+  /**
+   * Models {@code p(w|C)} as the number of occurrences of the term in the
+   * collection, divided by the total number of tokens {@code + 1}.
+   */
+  public static class DefaultCollectionModel implements CollectionModel {
+    @Override
+    public float computeProbability(BasicStats stats) {
+      return (float)stats.getTotalTermFreq() / (stats.getNumberOfFieldTokens() +1);
+    }
+    
+    @Override
+    public String getName() {
+      return null;
+    }
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java
@ -0,0 +1,42 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * The <em>lambda (&lambda;<sub>w</sub>)</em> parameter in information-based
+ * models.
+ * @see IBSimilarity
+ * @lucene.experimental
+ */
+public abstract class Lambda {
+  /** Computes the lambda parameter. */
+  public abstract float lambda(BasicStats stats);
+  /** Explains the lambda parameter. */
+  public abstract Explanation explain(BasicStats stats);
+  
+  /**
+   * Subclasses must override this method to return the code of the lambda
+   * formula. Since the original paper is not very clear on this matter, and
+   * also uses the DFR naming scheme incorrectly, the codes here were chosen
+   * arbitrarily.
+   */
+  @Override
+  public abstract String toString();
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java
@ -0,0 +1,48 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * Computes lambda as {@code totalTermFreq / numberOfDocuments}.
+ * @lucene.experimental
+ */
+public class LambdaDF extends Lambda {
+  @Override
+  public final float lambda(BasicStats stats) {
+    return (float)stats.getDocFreq() / stats.getNumberOfDocuments();
+  }
+  
+  @Override
+  public final Explanation explain(BasicStats stats) {
+    Explanation result = new Explanation();
+    result.setDescription(getClass().getSimpleName() + ", computed from: ");
+    result.setValue(lambda(stats));
+    result.addDetail(
+        new Explanation(stats.getDocFreq(), "docFreq"));
+    result.addDetail(
+        new Explanation(stats.getNumberOfDocuments(), "numberOfDocuments"));
+    return result;
+  }
+  
+  @Override
+  public String toString() {
+    return "D";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
@ -0,0 +1,48 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * Computes lambda as {@code docFreq / numberOfDocuments}.
+ * @lucene.experimental
+ */
+public class LambdaTTF extends Lambda {  
+  @Override
+  public final float lambda(BasicStats stats) {
+    return (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
+  }
+
+  @Override
+  public final Explanation explain(BasicStats stats) {
+    Explanation result = new Explanation();
+    result.setDescription(getClass().getSimpleName() + ", computed from: ");
+    result.setValue(lambda(stats));
+    result.addDetail(
+        new Explanation(stats.getTotalTermFreq(), "totalTermFreq"));
+    result.addDetail(
+        new Explanation(stats.getNumberOfDocuments(), "numberOfDocuments"));
+    return result;
+  }
+  
+  @Override
+  public String toString() {
+    return "L";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java
@ -0,0 +1,159 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.TermContext;
+
+/**
+ * Implements the CombSUM method for combining evidence from multiple
+ * similarity values described in: Joseph A. Shaw, Edward A. Fox. 
+ * In Text REtrieval Conference (1993), pp. 243-252
+ * @lucene.experimental
+ */
+public class MultiSimilarity extends Similarity {
+  protected final Similarity sims[];
+  
+  public MultiSimilarity(Similarity sims[]) {
+    this.sims = sims;
+  }
+  
+  @Override
+  public byte computeNorm(FieldInvertState state) {
+    return sims[0].computeNorm(state);
+  }
+
+  @Override
+  public Stats computeStats(IndexSearcher searcher, String fieldName, float queryBoost, TermContext... termContexts) throws IOException {
+    Stats subStats[] = new Stats[sims.length];
+    for (int i = 0; i < subStats.length; i++) {
+      subStats[i] = sims[i].computeStats(searcher, fieldName, queryBoost, termContexts);
+    }
+    return new MultiStats(subStats);
+  }
+
+  @Override
+  public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
+    ExactDocScorer subScorers[] = new ExactDocScorer[sims.length];
+    for (int i = 0; i < subScorers.length; i++) {
+      subScorers[i] = sims[i].exactDocScorer(((MultiStats)stats).subStats[i], fieldName, context);
+    }
+    return new MultiExactDocScorer(subScorers);
+  }
+
+  @Override
+  public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
+    SloppyDocScorer subScorers[] = new SloppyDocScorer[sims.length];
+    for (int i = 0; i < subScorers.length; i++) {
+      subScorers[i] = sims[i].sloppyDocScorer(((MultiStats)stats).subStats[i], fieldName, context);
+    }
+    return new MultiSloppyDocScorer(subScorers);
+  }
+  
+  public static class MultiExactDocScorer extends ExactDocScorer {
+    private final ExactDocScorer subScorers[];
+    
+    MultiExactDocScorer(ExactDocScorer subScorers[]) {
+      this.subScorers = subScorers;
+    }
+    
+    @Override
+    public float score(int doc, int freq) {
+      float sum = 0.0f;
+      for (ExactDocScorer subScorer : subScorers) {
+        sum += subScorer.score(doc, freq);
+      }
+      return sum;
+    }
+
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:");
+      for (ExactDocScorer subScorer : subScorers) {
+        expl.addDetail(subScorer.explain(doc, freq));
+      }
+      return expl;
+    }
+  }
+  
+  public static class MultiSloppyDocScorer extends SloppyDocScorer {
+    private final SloppyDocScorer subScorers[];
+    
+    MultiSloppyDocScorer(SloppyDocScorer subScorers[]) {
+      this.subScorers = subScorers;
+    }
+    
+    @Override
+    public float score(int doc, float freq) {
+      float sum = 0.0f;
+      for (SloppyDocScorer subScorer : subScorers) {
+        sum += subScorer.score(doc, freq);
+      }
+      return sum;
+    }
+
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:");
+      for (SloppyDocScorer subScorer : subScorers) {
+        expl.addDetail(subScorer.explain(doc, freq));
+      }
+      return expl;
+    }
+
+    @Override
+    public float computeSlopFactor(int distance) {
+      return subScorers[0].computeSlopFactor(distance);
+    }
+
+    @Override
+    public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+      return subScorers[0].computePayloadFactor(doc, start, end, payload);
+    }
+  }
+
+  public static class MultiStats extends Stats {
+    final Stats subStats[];
+    
+    MultiStats(Stats subStats[]) {
+      this.subStats = subStats;
+    }
+    
+    @Override
+    public float getValueForNormalization() {
+      float sum = 0.0f;
+      for (Stats stat : subStats) {
+        sum += stat.getValueForNormalization();
+      }
+      return sum / subStats.length;
+    }
+
+    @Override
+    public void normalize(float queryNorm, float topLevelBoost) {
+      for (Stats stat : subStats) {
+        stat.normalize(queryNorm, topLevelBoost);
+      }
+    }
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java
@ -0,0 +1,75 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Explanation;
+
+/**
+ * This class acts as the base class for the implementations of the term
+ * frequency normalization methods in the DFR framework.
+ * 
+ * @see DFRSimilarity
+ * @lucene.experimental
+ */
+public abstract class Normalization {
+  /** Returns the normalized term frequency.
+   * @param len the field length. */
+  public abstract float tfn(BasicStats stats, float tf, float len);
+  
+  /** Returns an explanation for the normalized term frequency.
+   * <p>The default normalization methods use the field length of the document
+   * and the average field length to compute the normalized term frequency.
+   * This method provides a generic explanation for such methods.
+   * Subclasses that use other statistics must override this method.</p>
+   */
+  public Explanation explain(BasicStats stats, float tf, float len) {
+    Explanation result = new Explanation();
+    result.setDescription(getClass().getSimpleName() + ", computed from: ");
+    result.setValue(tfn(stats, tf, len));
+    result.addDetail(new Explanation(tf, "tf"));
+    result.addDetail(
+        new Explanation(stats.getAvgFieldLength(), "avgFieldLength"));
+    result.addDetail(new Explanation(len, "len"));
+    return result;
+  }
+
+  /** Implementation used when there is no normalization. */
+  public static final class NoNormalization extends Normalization {
+    @Override
+    public final float tfn(BasicStats stats, float tf, float len) {
+      return tf;
+    }
+
+    @Override
+    public final Explanation explain(BasicStats stats, float tf, float len) {
+      return new Explanation(1, "no normalization");
+    }
+    
+    @Override
+    public String toString() {
+      return "";
+    }
+  }
+  
+  /**
+   * Subclasses must override this method to return the code of the
+   * normalization formula. Refer to the original paper for the list. 
+   */
+  @Override
+  public abstract String toString();
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java
@ -0,0 +1,34 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Normalization model that assumes a uniform distribution of the term frequency.
+ * @lucene.experimental
+ */
+public class NormalizationH1 extends Normalization {
+  @Override
+  public final float tfn(BasicStats stats, float tf, float len) {
+    return tf * stats.getAvgFieldLength() / len;
+  }
+
+  @Override
+  public String toString() {
+    return "1";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java
@ -0,0 +1,37 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
+
+/**
+ * Normalization model in which the term frequency is inversely related to the
+ * length.
+ * @lucene.experimental
+ */
+public class NormalizationH2 extends Normalization {
+  @Override
+  public final float tfn(BasicStats stats, float tf, float len) {
+    return (float)(tf * log2(1 + stats.getAvgFieldLength() / len));
+  }
+
+  @Override
+  public String toString() {
+    return "2";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH3.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH3.java
@ -0,0 +1,44 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Dirichlet Priors normalization
+ * @lucene.experimental
+ */
+public class NormalizationH3 extends Normalization {
+  private final float mu;
+  
+  public NormalizationH3() {
+    this(800F);
+  }
+  
+  public NormalizationH3(float mu) {
+    this.mu = mu;
+  }
+
+  @Override
+  public float tfn(BasicStats stats, float tf, float len) {
+    return (tf + mu * (stats.getTotalTermFreq() / (float)stats.getNumberOfFieldTokens())) / (len + mu) * mu;
+  }
+
+  @Override
+  public String toString() {
+    return "3(" + mu + ")";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/NormalizationZ.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/NormalizationZ.java
@ -0,0 +1,44 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Pareto-Zipf Normalization
+ * @lucene.experimental
+ */
+public class NormalizationZ extends Normalization {
+  final float z;
+
+  public NormalizationZ() {
+    this(0.30F);
+  }
+
+  public NormalizationZ(float z) {
+    this.z = z;
+  }
+  
+  @Override
+  public float tfn(BasicStats stats, float tf, float len) {
+    return (float)(tf * Math.pow(stats.avgFieldLength / len, z));
+  }
+
+  @Override
+  public String toString() {
+    return "Z(" + z + ")";
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/Similarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/Similarity.java
@ -1,4 +1,4 @@
-package org.apache.lucene.search;
+package org.apache.lucene.search.similarities;

 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,6 +25,12 @@ import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader; // javadoc
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.Terms; // javadoc
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.spans.SpanQuery; // javadoc
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.SmallFloat; // javadoc
@ -140,7 +146,7 @@ public abstract class Similarity {
   * <p>
   * Term frequencies are integers (the term or phrase's tf)
   */
-  public abstract class ExactDocScorer {
+  public static abstract class ExactDocScorer {
    /**
     * Score a single document
     * @param doc document id
@ -169,7 +175,7 @@ public abstract class Similarity {
   * <p>
   * Term frequencies are floating point values.
   */
-  public abstract class SloppyDocScorer {
+  public static abstract class SloppyDocScorer {
    /**
     * Score a single document
     * @param doc document id
--- a/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
@ -0,0 +1,345 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.SmallFloat;
+import org.apache.lucene.util.TermContext;
+
+/**
+ * A subclass of {@code Similarity} that provides a simplified API for its
+ * descendants. Subclasses are only required to implement the {@link #score}
+ * and {@link #toString()} methods. Implementing
+ * {@link #explain(Explanation, BasicStats, int, float, float)} is optional,
+ * inasmuch as SimilarityBase already provides a basic explanation of the score
+ * and the term frequency. However, implementers of a subclass are encouraged to
+ * include as much detail about the scoring method as possible.
+ * <p>
+ * Note: multi-word queries such as phrase queries are scored in a different way
+ * than Lucene's default ranking algorithm: whereas it "fakes" an IDF value for
+ * the phrase as a whole (since it does not know it), this class instead scores
+ * phrases as a summation of the individual term scores.
+ * @lucene.experimental
+ */
+public abstract class SimilarityBase extends Similarity {
+  /** For {@link #log2(double)}. Precomputed for efficiency reasons. */
+  private static final double LOG_2 = Math.log(2);
+  
+  /** @see #setDiscountOverlaps */
+  protected boolean discountOverlaps = true;
+  
+  /** Determines whether overlap tokens (Tokens with
+   *  0 position increment) are ignored when computing
+   *  norm.  By default this is true, meaning overlap
+   *  tokens do not count when computing norms.
+   *
+   *  @lucene.experimental
+   *
+   *  @see #computeNorm
+   */
+  public void setDiscountOverlaps(boolean v) {
+    discountOverlaps = v;
+  }
+
+  /** @see #setDiscountOverlaps */
+  public boolean getDiscountOverlaps() {
+    return discountOverlaps;
+  }
+  
+  @Override
+  public final Stats computeStats(IndexSearcher searcher, String fieldName,
+      float queryBoost, TermContext... termContexts) throws IOException {
+    BasicStats stats[] = new BasicStats[termContexts.length];
+    for (int i = 0; i < termContexts.length; i++) {
+      stats[i] = newStats(queryBoost);
+      fillBasicStats(stats[i], searcher, fieldName, termContexts[i]);
+    }
+    return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
+  }
+  
+  /** Factory method to return a custom stats object */
+  protected BasicStats newStats(float queryBoost) {
+    return new BasicStats(queryBoost);
+  }
+  
+  /** Fills all member fields defined in {@code BasicStats} in {@code stats}. 
+   *  Subclasses can override this method to fill additional stats. */
+  protected void fillBasicStats(BasicStats stats, IndexSearcher searcher,
+      String fieldName, TermContext termContext) throws IOException {
+    IndexReader reader = searcher.getIndexReader();
+    int numberOfDocuments = reader.maxDoc();
+    
+    int docFreq = termContext.docFreq();
+    long totalTermFreq = termContext.totalTermFreq();
+
+    // codec does not supply totalTermFreq: substitute docFreq
+    if (totalTermFreq == -1) {
+      totalTermFreq = docFreq;
+    }
+
+    final long numberOfFieldTokens;
+    final float avgFieldLength;
+    
+    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), fieldName);
+    if (terms == null) {
+      // field does not exist;
+      numberOfFieldTokens = 0;
+      avgFieldLength = 1;
+    } else {
+      long sumTotalTermFreq = terms.getSumTotalTermFreq();
+
+      // We have to provide something if codec doesnt supply these measures,
+      // or if someone omitted frequencies for the field... negative values cause
+      // NaN/Inf for some scorers.
+      if (sumTotalTermFreq == -1) {
+        numberOfFieldTokens = docFreq;
+        avgFieldLength = 1;
+      } else {
+        numberOfFieldTokens = sumTotalTermFreq;
+        avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
+      }
+    }
+ 
+    // TODO: add sumDocFreq for field (numberOfFieldPostings)
+    stats.setNumberOfDocuments(numberOfDocuments);
+    stats.setNumberOfFieldTokens(numberOfFieldTokens);
+    stats.setAvgFieldLength(avgFieldLength);
+    stats.setDocFreq(docFreq);
+    stats.setTotalTermFreq(totalTermFreq);
+  }
+  
+  /**
+   * Scores the document {@code doc}.
+   * <p>Subclasses must apply their scoring formula in this class.</p>
+   * @param stats the corpus level statistics.
+   * @param freq the term frequency.
+   * @param docLen the document length.
+   * @return the score.
+   */
+  protected abstract float score(BasicStats stats, float freq, float docLen);
+  
+  /**
+   * Subclasses should implement this method to explain the score. {@code expl}
+   * already contains the score, the name of the class and the doc id, as well
+   * as the term frequency and its explanation; subclasses can add additional
+   * clauses to explain details of their scoring formulae.
+   * <p>The default implementation does nothing.</p>
+   * 
+   * @param expl the explanation to extend with details.
+   * @param stats the corpus level statistics.
+   * @param doc the document id.
+   * @param freq the term frequency.
+   * @param docLen the document length.
+   */
+  protected void explain(
+      Explanation expl, BasicStats stats, int doc, float freq, float docLen) {}
+  
+  /**
+   * Explains the score. The implementation here provides a basic explanation
+   * in the format <em>score(name-of-similarity, doc=doc-id,
+   * freq=term-frequency), computed from:</em>, and
+   * attaches the score (computed via the {@link #score(BasicStats, float, float)}
+   * method) and the explanation for the term frequency. Subclasses content with
+   * this format may add additional details in
+   * {@link #explain(Explanation, BasicStats, int, float, float)}.
+   *  
+   * @param stats the corpus level statistics.
+   * @param doc the document id.
+   * @param freq the term frequency and its explanation.
+   * @param docLen the document length.
+   * @return the explanation.
+   */
+  protected Explanation explain(
+      BasicStats stats, int doc, Explanation freq, float docLen) {
+    Explanation result = new Explanation(); 
+    result.setValue(score(stats, freq.getValue(), docLen));
+    result.setDescription("score(" + getClass().getSimpleName() +
+        ", doc=" + doc + ", freq=" + freq.getValue() +"), computed from:");
+    result.addDetail(freq);
+    
+    explain(result, stats, doc, freq.getValue(), docLen);
+    
+    return result;
+  }
+  
+  @Override
+  public ExactDocScorer exactDocScorer(Stats stats, String fieldName,
+      AtomicReaderContext context) throws IOException {
+    byte norms[] = context.reader.norms(fieldName);
+    
+    if (stats instanceof MultiSimilarity.MultiStats) {
+      // a multi term query (e.g. phrase). return the summation, 
+      // scoring almost as if it were boolean query
+      Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
+      ExactDocScorer subScorers[] = new ExactDocScorer[subStats.length];
+      for (int i = 0; i < subScorers.length; i++) {
+        subScorers[i] = new BasicExactDocScorer((BasicStats)subStats[i], norms);
+      }
+      return new MultiSimilarity.MultiExactDocScorer(subScorers);
+    } else {
+      return new BasicExactDocScorer((BasicStats) stats, norms);
+    }
+  }
+  
+  @Override
+  public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName,
+      AtomicReaderContext context) throws IOException {
+    byte norms[] = context.reader.norms(fieldName);
+    
+    if (stats instanceof MultiSimilarity.MultiStats) {
+      // a multi term query (e.g. phrase). return the summation, 
+      // scoring almost as if it were boolean query
+      Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
+      SloppyDocScorer subScorers[] = new SloppyDocScorer[subStats.length];
+      for (int i = 0; i < subScorers.length; i++) {
+        subScorers[i] = new BasicSloppyDocScorer((BasicStats)subStats[i], norms);
+      }
+      return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
+    } else {
+      return new BasicSloppyDocScorer((BasicStats) stats, norms);
+    }
+  }
+  
+  /**
+   * Subclasses must override this method to return the name of the Similarity
+   * and preferably the values of parameters (if any) as well.
+   */
+  @Override
+  public abstract String toString();
+
+  // ------------------------------ Norm handling ------------------------------
+  
+  /** Norm -> document length map. */
+  private static final float[] NORM_TABLE = new float[256];
+
+  static {
+    for (int i = 0; i < 256; i++) {
+      float floatNorm = SmallFloat.byte315ToFloat((byte)i);
+      NORM_TABLE[i] = 1.0f / (floatNorm * floatNorm);
+    }
+  }
+
+  /** Encodes the document length in the same way as {@link TFIDFSimilarity}. */
+  @Override
+  public byte computeNorm(FieldInvertState state) {
+    final float numTerms;
+    if (discountOverlaps)
+      numTerms = state.getLength() - state.getNumOverlap();
+    else
+      numTerms = state.getLength() / state.getBoost();
+    return encodeNormValue(state.getBoost(), numTerms);
+  }
+  
+  /** Decodes a normalization factor (document length) stored in an index.
+   * @see #encodeNormValue(float,float)
+   */
+  protected float decodeNormValue(byte norm) {
+    return NORM_TABLE[norm & 0xFF];  // & 0xFF maps negative bytes to positive above 127
+  }
+  
+  /** Encodes the length to a byte via SmallFloat. */
+  protected byte encodeNormValue(float boost, float length) {
+    return SmallFloat.floatToByte315((boost / (float) Math.sqrt(length)));
+  }
+  
+  // ----------------------------- Static methods ------------------------------
+  
+  /** Returns the base two logarithm of {@code x}. */
+  public static double log2(double x) {
+    // Put this to a 'util' class if we need more of these.
+    return Math.log(x) / LOG_2;
+  }
+  
+  // --------------------------------- Classes ---------------------------------
+  
+  /** Delegates the {@link #score(int, int)} and
+   * {@link #explain(int, Explanation)} methods to
+   * {@link SimilarityBase#score(BasicStats, float, int)} and
+   * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
+   * respectively.
+   */
+  private class BasicExactDocScorer extends ExactDocScorer {
+    private final BasicStats stats;
+    private final byte[] norms;
+    
+    BasicExactDocScorer(BasicStats stats, byte norms[]) {
+      this.stats = stats;
+      this.norms = norms;
+    }
+    
+    @Override
+    public float score(int doc, int freq) {
+      // We have to supply something in case norms are omitted
+      return SimilarityBase.this.score(stats, freq,
+          norms == null ? 1F : decodeNormValue(norms[doc]));
+    }
+    
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      return SimilarityBase.this.explain(stats, doc, freq,
+          norms == null ? 1F : decodeNormValue(norms[doc]));
+    }
+  }
+  
+  /** Delegates the {@link #score(int, int)} and
+   * {@link #explain(int, Explanation)} methods to
+   * {@link SimilarityBase#score(BasicStats, float, int)} and
+   * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
+   * respectively.
+   */
+  private class BasicSloppyDocScorer extends SloppyDocScorer {
+    private final BasicStats stats;
+    private final byte[] norms;
+    
+    BasicSloppyDocScorer(BasicStats stats, byte norms[]) {
+      this.stats = stats;
+      this.norms = norms;
+    }
+    
+    @Override
+    public float score(int doc, float freq) {
+      // We have to supply something in case norms are omitted
+      return SimilarityBase.this.score(stats, freq,
+          norms == null ? 1F : decodeNormValue(norms[doc]));
+    }
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      return SimilarityBase.this.explain(stats, doc, freq,
+          norms == null ? 1F : decodeNormValue(norms[doc]));
+    }
+
+    @Override
+    public float computeSlopFactor(int distance) {
+      return 1.0f / (distance + 1);
+    }
+
+    @Override
+    public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+      return 1f;
+    }
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/similarities/SimilarityProvider.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/SimilarityProvider.java
@ -1,4 +1,6 @@
-package org.apache.lucene.search;
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.search.BooleanQuery;

 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
--- a/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
+++ b/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
@ -1,4 +1,4 @@
-package org.apache.lucene.search;
+package org.apache.lucene.search.similarities;

 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -21,6 +21,10 @@ package org.apache.lucene.search;
 import java.io.IOException;

 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TermContext;
 import org.apache.lucene.util.SmallFloat;
@ -303,13 +307,13 @@ import org.apache.lucene.util.SmallFloat;
 *      two term-queries with that same term and hence the computation would still be correct (although
 *      not very efficient).
 *      The default computation for <i>tf(t in d)</i> in
- *      {@link org.apache.lucene.search.DefaultSimilarity#tf(float) DefaultSimilarity} is:
+ *      {@link org.apache.lucene.search.similarities.DefaultSimilarity#tf(float) DefaultSimilarity} is:
 *
 *      <br>&nbsp;<br>
 *      <table cellpadding="2" cellspacing="2" border="0" align="center">
 *        <tr>
 *          <td valign="middle" align="right" rowspan="1">
- *            {@link org.apache.lucene.search.DefaultSimilarity#tf(float) tf(t in d)} &nbsp; = &nbsp;
+ *            {@link org.apache.lucene.search.similarities.DefaultSimilarity#tf(float) tf(t in d)} &nbsp; = &nbsp;
 *          </td>
 *          <td valign="top" align="center" rowspan="1">
 *               frequency<sup><big>&frac12;</big></sup>
@ -328,13 +332,13 @@ import org.apache.lucene.util.SmallFloat;
 *      <i>idf(t)</i> appears for <i>t</i> in both the query and the document,
 *      hence it is squared in the equation.
 *      The default computation for <i>idf(t)</i> in
- *      {@link org.apache.lucene.search.DefaultSimilarity#idf(int, int) DefaultSimilarity} is:
+ *      {@link org.apache.lucene.search.similarities.DefaultSimilarity#idf(int, int) DefaultSimilarity} is:
 *
 *      <br>&nbsp;<br>
 *      <table cellpadding="2" cellspacing="2" border="0" align="center">
 *        <tr>
 *          <td valign="middle" align="right">
- *            {@link org.apache.lucene.search.DefaultSimilarity#idf(int, int) idf(t)}&nbsp; = &nbsp;
+ *            {@link org.apache.lucene.search.similarities.DefaultSimilarity#idf(int, int) idf(t)}&nbsp; = &nbsp;
 *          </td>
 *          <td valign="middle" align="center">
 *            1 + log <big>(</big>
@ -376,14 +380,14 @@ import org.apache.lucene.util.SmallFloat;
 *      This is a search time factor computed by the Similarity in effect at search time.
 *
 *      The default computation in
- *      {@link org.apache.lucene.search.DefaultSimilarityProvider#queryNorm(float) DefaultSimilarityProvider}
+ *      {@link org.apache.lucene.search.similarities.DefaultSimilarityProvider#queryNorm(float) DefaultSimilarityProvider}
 *      produces a <a href="http://en.wikipedia.org/wiki/Euclidean_norm#Euclidean_norm">Euclidean norm</a>:
 *      <br>&nbsp;<br>
 *      <table cellpadding="1" cellspacing="0" border="0" align="center">
 *        <tr>
 *          <td valign="middle" align="right" rowspan="1">
 *            queryNorm(q)  &nbsp; = &nbsp;
- *            {@link org.apache.lucene.search.DefaultSimilarityProvider#queryNorm(float) queryNorm(sumOfSquaredWeights)}
+ *            {@link org.apache.lucene.search.similarities.DefaultSimilarityProvider#queryNorm(float) queryNorm(sumOfSquaredWeights)}
 *            &nbsp; = &nbsp;
 *          </td>
 *          <td valign="middle" align="center" rowspan="1">
--- a/lucene/src/java/org/apache/lucene/search/similarities/package.html
+++ b/lucene/src/java/org/apache/lucene/search/similarities/package.html
@ -0,0 +1,174 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+This package contains the various ranking models that can be used in Lucene. The
+abstract class {@link org.apache.lucene.search.similarities.Similarity} serves
+as the base for ranking functions. For searching, users can employ the models
+already implemented or create their own by extending one of the classes in this
+package.
+
+<h2>Table Of Contents</h2>
+<p>
+    <ol>
+        <li><a href="#sims">Summary of the Ranking Methods</a></li>
+        <li><a href="#providers">Similarity Providers<a/></li>
+        <li><a href="#changingSimilarity">Changing the Similarity</a></li>
+    </ol>
+</p>
+
+
+<a name="sims"></a>
+<h2>Summary of the Ranking Methods</h2>
+
+<p>{@link org.apache.lucene.search.similarities.DefaultSimilarity} is the original Lucene
+scoring function. It is based on a highly optimized Vector Space Model. For more
+information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}.</p>
+
+<p>{@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized
+implementation of the successful Okapi BM25 model.</p>
+
+<p>{@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic
+implementation of the Similarity contract and exposes a highly simplified
+interface, which makes it an ideal starting point for new ranking functions.
+Lucene ships the following methods built on
+{@link org.apache.lucene.search.similarities.SimilarityBase}:
+
+<a name="framework"></a>
+<ul>
+  <li>Amati and Rijsbergen's {@linkplain org.apache.lucene.search.similarities.DFRSimilarity DFR} framework;</li>
+  <li>Clinchant and Gaussier's {@linkplain org.apache.lucene.search.similarities.IBSimilarity Information-based models}
+    for IR;</li>
+  <li>The implementation of two {@linkplain org.apache.lucene.search.similarities.LMSimilarity language models} from
+  Zhai and Lafferty's paper.</li>
+</ul>
+
+Since {@link org.apache.lucene.search.similarities.SimilarityBase} is not
+optimized to the same extent as
+{@link org.apache.lucene.search.similarities.DefaultSimilarity} and
+{@link org.apache.lucene.search.similarities.BM25Similarity}, a difference in
+performance is to be expected when using the methods listed above. However,
+optimizations can always be implemented in subclasses; see
+<a href="#changingSimilarity">below</a>.</p>
+
+
+<a name="providers"></a>
+<h2>Similarity Providers</h2>
+
+<p>{@link org.apache.lucene.search.similarities.SimilarityProvider}s are factories
+that return Similarities per-field and compute coordination factors and normalization 
+values for the query.
+{@link org.apache.lucene.search.similarities.DefaultSimilarityProvider} is the
+default implementation used by Lucene, geared towards vector-spaced search: it returns 
+{@link org.apache.lucene.search.similarities.DefaultSimilarity} for every field,
+and implements coordination-level matching and query normalization. 
+{@link org.apache.lucene.search.similarities.BasicSimilarityProvider} is geared towards
+non-vector-space models and does not implement coordination-level matching or query
+normalization. It is a convenience implementation that returns an arbitrary 
+{@link org.apache.lucene.search.similarities.Similarity} for every field.
+You can write your own SimilarityProvider to return different Similarities for different
+fields: for example you might want to use different parameter values for different fields,
+or maybe even entirely different ranking algorithms.
+</p>
+
+
+<a name="changingSimilarity"></a>
+<h2>Changing Similarity</h2>
+
+<p>Chances are the available Similarities are sufficient for all
+    your searching needs.
+    However, in some applications it may be necessary to customize your <a
+        href="Similarity.html">Similarity</a> implementation. For instance, some
+    applications do not need to
+    distinguish between shorter and longer documents (see <a
+        href="http://www.gossamer-threads.com/lists/lucene/java-user/38967#38967">a "fair" similarity</a>).</p>
+
+<p>To change {@link org.apache.lucene.search.similarities.Similarity}, one must do so for both indexing and
+    searching, and the changes must happen before
+    either of these actions take place. Although in theory there is nothing stopping you from changing mid-stream, it
+    just isn't well-defined what is going to happen.
+</p>
+
+<p>To make this change, implement your own {@link org.apache.lucene.search.similarities.Similarity} (likely
+    you'll want to simply subclass an existing method, be it
+    {@link org.apache.lucene.search.similarities.DefaultSimilarity} or a descendant of
+    {@link org.apache.lucene.search.similarities.SimilarityBase}) and
+    {@link org.apache.lucene.search.similarities.SimilarityProvider} (or use
+    {@link org.apache.lucene.search.similarities.BasicSimilarityProvider}), and
+    then register the new class by calling
+    {@link org.apache.lucene.index.IndexWriterConfig#setSimilarityProvider(SimilarityProvider)}
+    before indexing and
+    {@link org.apache.lucene.search.IndexSearcher#setSimilarityProvider(SimilarityProvider)}
+    before searching.
+</p>
+
+<h3>Extending {@linkplain org.apache.lucene.search.similarities.SimilarityBase}</h3>
+<p>
+The easiest way to quickly implement a new ranking method is to extend
+{@link org.apache.lucene.search.similarities.SimilarityBase}, which provides
+basic implementations for the low level . Subclasses are only required to
+implement the {@link org.apache.lucene.search.similarities.SimilarityBase#score(BasicStats, float, float)}
+and {@link org.apache.lucene.search.similarities.SimilarityBase#toString()}
+methods.</p>
+
+<p>Another options is to extend one of the <a href="#framework">frameworks</a>
+based on {@link org.apache.lucene.search.similarities.SimilarityBase}. These
+Similarities are implemented modularly, e.g.
+{@link org.apache.lucene.search.similarities.DFRSimilarity} delegates
+computation of the three parts of its formula to the classes
+{@link org.apache.lucene.search.similarities.BasicModel},
+{@link org.apache.lucene.search.similarities.AfterEffect} and
+{@link org.apache.lucene.search.similarities.Normalization}. Instead of
+subclassing the Similarity, one can simply introduce a new basic model and tell
+{@link org.apache.lucene.search.similarities.DFRSimilarity} to use it.</p>
+
+<h3>Changing {@linkplain org.apache.lucene.search.similarities.DefaultSimilarity}</h3>
+<p>
+    If you are interested in use cases for changing your similarity, see the Lucene users's mailing list at <a
+        href="http://www.nabble.com/Overriding-Similarity-tf2128934.html">Overriding Similarity</a>.
+    In summary, here are a few use cases:
+    <ol>
+        <li><p>The <code>SweetSpotSimilarity</code> in
+            <code>org.apache.lucene.misc</code> gives small
+            increases as the frequency increases a small amount
+            and then greater increases when you hit the "sweet spot", i.e. where
+            you think the frequency of terms is more significant.</p></li>
+        <li><p>Overriding tf &mdash; In some applications, it doesn't matter what the score of a document is as long as a
+            matching term occurs. In these
+            cases people have overridden Similarity to return 1 from the tf() method.</p></li>
+        <li><p>Changing Length Normalization &mdash; By overriding
+            {@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState state)},
+            it is possible to discount how the length of a field contributes
+            to a score. In {@link org.apache.lucene.search.similarities.DefaultSimilarity},
+            lengthNorm = 1 / (numTerms in field)^0.5, but if one changes this to be
+            1 / (numTerms in field), all fields will be treated
+            <a href="http://www.gossamer-threads.com/lists/lucene/java-user/38967#38967">"fairly"</a>.</p></li>
+    </ol>
+    In general, Chris Hostetter sums it up best in saying (from <a
+        href="http://www.gossamer-threads.com/lists/lucene/java-user/39125#39125">the Lucene users's mailing list</a>):
+    <blockquote>[One would override the Similarity in] ... any situation where you know more about your data then just
+        that
+        it's "text" is a situation where it *might* make sense to to override your
+        Similarity method.</blockquote>
+</p>
+
+</body>
+</html>
--- a/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java
@ -19,11 +19,9 @@ package org.apache.lucene.search.spans;

 import java.io.IOException;

-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.TFIDFSimilarity;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.similarities.Similarity;

 /**
 * Public for extension only.
--- a/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java
+++ b/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java
@ -21,7 +21,8 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.*;
-import org.apache.lucene.search.Similarity.SloppyDocScorer;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
 import org.apache.lucene.util.TermContext;

 import java.io.IOException;
--- a/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java
+++ b/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java
@ -33,7 +33,7 @@ import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;

 import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
--- a/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java
+++ b/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java
@ -18,6 +18,7 @@ package org.apache.lucene.search;
 */

 import java.io.IOException;
+import java.util.Locale;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.Random;
@ -35,7 +36,7 @@ public class CheckHits {
   * different  order of operations from the actual scoring method ...
   * this allows for a small amount of variation
   */
-  public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.0002f;
+  public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.02f;
    
  /**
   * Tests that all documents up to maxDoc which are *not* in the
@ -327,6 +328,10 @@ public class CheckHits {
    if (!deep) return;

    Explanation detail[] = expl.getDetails();
+    // TODO: can we improve this entire method? its really geared to work only with TF/IDF
+    if (expl.getDescription().endsWith("computed from:")) {
+      return; // something more complicated.
+    }
    if (detail!=null) {
      if (detail.length==1) {
        // simple containment, unless its a freq of: (which lets a query explain how the freq is calculated), 
@ -338,7 +343,7 @@ public class CheckHits {
        // - end with one of: "product of:", "sum of:", "max of:", or
        // - have "max plus <x> times others" (where <x> is float).
        float x = 0;
-        String descr = expl.getDescription().toLowerCase();
+        String descr = expl.getDescription().toLowerCase(Locale.ENGLISH);
        boolean productOf = descr.endsWith("product of:");
        boolean sumOf = descr.endsWith("sum of:");
        boolean maxOf = descr.endsWith("max of:");
--- a/lucene/src/test-framework/org/apache/lucene/search/RandomSimilarityProvider.java
+++ b/lucene/src/test-framework/org/apache/lucene/search/RandomSimilarityProvider.java
@ -0,0 +1,158 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.lucene.search.similarities.AfterEffect;
+import org.apache.lucene.search.similarities.AfterEffectB;
+import org.apache.lucene.search.similarities.AfterEffectL;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.BasicModel;
+import org.apache.lucene.search.similarities.BasicModelBE;
+import org.apache.lucene.search.similarities.BasicModelD;
+import org.apache.lucene.search.similarities.BasicModelG;
+import org.apache.lucene.search.similarities.BasicModelIF;
+import org.apache.lucene.search.similarities.BasicModelIn;
+import org.apache.lucene.search.similarities.BasicModelIne;
+import org.apache.lucene.search.similarities.BasicModelP;
+import org.apache.lucene.search.similarities.DFRSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Distribution;
+import org.apache.lucene.search.similarities.DistributionLL;
+import org.apache.lucene.search.similarities.DistributionSPL;
+import org.apache.lucene.search.similarities.IBSimilarity;
+import org.apache.lucene.search.similarities.LMDirichletSimilarity;
+import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
+import org.apache.lucene.search.similarities.Lambda;
+import org.apache.lucene.search.similarities.LambdaDF;
+import org.apache.lucene.search.similarities.LambdaTTF;
+import org.apache.lucene.search.similarities.Normalization;
+import org.apache.lucene.search.similarities.NormalizationH1;
+import org.apache.lucene.search.similarities.NormalizationH2;
+import org.apache.lucene.search.similarities.NormalizationH3;
+import org.apache.lucene.search.similarities.NormalizationZ;
+import org.apache.lucene.search.similarities.Similarity;
+
+public class RandomSimilarityProvider extends DefaultSimilarityProvider {
+  final List<Similarity> knownSims;
+  Map<String,Similarity> previousMappings = new HashMap<String,Similarity>();
+  final int perFieldSeed;
+  final boolean shouldCoord;
+  final boolean shouldQueryNorm;
+  
+  public RandomSimilarityProvider(Random random) {
+    perFieldSeed = random.nextInt();
+    shouldCoord = random.nextBoolean();
+    shouldQueryNorm = random.nextBoolean();
+    knownSims = new ArrayList<Similarity>(allSims);
+    Collections.shuffle(knownSims, random);
+  }
+  
+  @Override
+  public float coord(int overlap, int maxOverlap) {
+    if (shouldCoord) {
+      return super.coord(overlap, maxOverlap);
+    } else {
+      return 1.0f;
+    }
+  }
+  
+  @Override
+  public float queryNorm(float sumOfSquaredWeights) {
+    if (shouldQueryNorm) {
+      return super.queryNorm(sumOfSquaredWeights);
+    } else {
+      return 1.0f;
+    }
+  }
+  
+  @Override
+  public synchronized Similarity get(String field) {
+    assert field != null;
+    Similarity sim = previousMappings.get(field);
+    if (sim == null) {
+      sim = knownSims.get(Math.abs(perFieldSeed ^ field.hashCode()) % knownSims.size());
+      previousMappings.put(field, sim);
+    }
+    return sim;
+  }
+  
+  // all the similarities that we rotate through
+  /** The DFR basic models to test. */
+  static BasicModel[] BASIC_MODELS = {
+    new BasicModelBE(), /* TODO: enable new BasicModelD(), */ new BasicModelG(),
+    new BasicModelIF(), new BasicModelIn(), new BasicModelIne(),
+    /* TODO: enable new BasicModelP() */
+  };
+  /** The DFR aftereffects to test. */
+  static AfterEffect[] AFTER_EFFECTS = {
+    new AfterEffectB(), new AfterEffectL(), new AfterEffect.NoAfterEffect()
+  };
+  /** The DFR normalizations to test. */
+  static Normalization[] NORMALIZATIONS = {
+    new NormalizationH1(), new NormalizationH2(),
+    new NormalizationH3(), new NormalizationZ()
+    // TODO: if we enable NoNormalization, we have to deal with
+    // a couple tests (e.g. TestDocBoost, TestSort) that expect length normalization
+    // new Normalization.NoNormalization()
+  };
+  /** The distributions for IB. */
+  static Distribution[] DISTRIBUTIONS = {
+    new DistributionLL(), new DistributionSPL()
+  };
+  /** Lambdas for IB. */
+  static Lambda[] LAMBDAS = {
+    new LambdaDF(), new LambdaTTF()
+  };
+  static List<Similarity> allSims;
+  static {
+    allSims = new ArrayList<Similarity>();
+    allSims.add(new DefaultSimilarity());
+    allSims.add(new BM25Similarity());
+    for (BasicModel basicModel : BASIC_MODELS) {
+      for (AfterEffect afterEffect : AFTER_EFFECTS) {
+        for (Normalization normalization : NORMALIZATIONS) {
+          allSims.add(new DFRSimilarity(basicModel, afterEffect, normalization));
+        }
+      }
+    }
+    for (Distribution distribution : DISTRIBUTIONS) {
+      for (Lambda lambda : LAMBDAS) {
+        for (Normalization normalization : NORMALIZATIONS) {
+          allSims.add(new IBSimilarity(distribution, lambda, normalization));
+        }
+      }
+    }
+    /* TODO: enable Dirichlet 
+    allSims.add(new LMDirichletSimilarity()); */
+    allSims.add(new LMJelinekMercerSimilarity(0.1f));
+    allSims.add(new LMJelinekMercerSimilarity(0.7f));
+  }
+  
+  @Override
+  public synchronized String toString() {
+    return "RandomSimilarityProvider(queryNorm=" + shouldQueryNorm + ",coord=" + shouldCoord + "): " + previousMappings.toString();
+  }
+}
--- a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
@ -52,6 +52,8 @@ import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.FieldCache.CacheEntry;
 import org.apache.lucene.search.AssertingIndexSearcher;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.RandomSimilarityProvider;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.FlushInfo;
@ -210,6 +212,8 @@ public abstract class LuceneTestCase extends Assert {
  // default codec provider
  private static CodecProvider savedCodecProvider;
  
+  private static SimilarityProvider similarityProvider;
+
  private static Locale locale;
  private static Locale savedLocale;
  private static TimeZone timeZone;
@ -393,6 +397,7 @@ public abstract class LuceneTestCase extends Assert {
    savedTimeZone = TimeZone.getDefault();
    timeZone = TEST_TIMEZONE.equals("random") ? randomTimeZone(random) : TimeZone.getTimeZone(TEST_TIMEZONE);
    TimeZone.setDefault(timeZone);
+    similarityProvider = new RandomSimilarityProvider(random);
    testsFailed = false;
  }

@ -467,6 +472,7 @@ public abstract class LuceneTestCase extends Assert {
  /** print some useful debugging information about the environment */
  private static void printDebuggingInformation(String codecDescription) {
    System.err.println("NOTE: test params are: codec=" + codecDescription +
+        ", sim=" + similarityProvider +
        ", locale=" + locale +
        ", timezone=" + (timeZone == null ? "(null)" : timeZone.getID()));
    System.err.println("NOTE: all tests run in this JVM:");
@ -922,6 +928,7 @@ public abstract class LuceneTestCase extends Assert {
  /** create a new index writer config with random defaults using the specified random */
  public static IndexWriterConfig newIndexWriterConfig(Random r, Version v, Analyzer a) {
    IndexWriterConfig c = new IndexWriterConfig(v, a);
+    c.setSimilarityProvider(similarityProvider);
    if (r.nextBoolean()) {
      c.setMergeScheduler(new SerialMergeScheduler());
    }
@ -1249,7 +1256,9 @@ public abstract class LuceneTestCase extends Assert {
      if (maybeWrap && rarely()) {
        r = new SlowMultiReaderWrapper(r);
      }
-      return random.nextBoolean() ? new AssertingIndexSearcher(r) : new AssertingIndexSearcher(r.getTopReaderContext());
+      IndexSearcher ret = random.nextBoolean() ? new AssertingIndexSearcher(r) : new AssertingIndexSearcher(r.getTopReaderContext());
+      ret.setSimilarityProvider(similarityProvider);
+      return ret;
    } else {
      int threads = 0;
      final ExecutorService ex = (random.nextBoolean()) ? null
@ -1258,7 +1267,7 @@ public abstract class LuceneTestCase extends Assert {
      if (ex != null && VERBOSE) {
        System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
      }
-      return random.nextBoolean() ? 
+      IndexSearcher ret = random.nextBoolean() ? 
        new AssertingIndexSearcher(r, ex) {
          @Override
          public void close() throws IOException {
@ -1272,6 +1281,8 @@ public abstract class LuceneTestCase extends Assert {
            shutdownExecutorService(ex);
          }
        };
+      ret.setSimilarityProvider(similarityProvider);
+      return ret;
    }
  }
  
--- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -35,13 +35,13 @@ import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.NumericRangeQuery;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.store.CompoundFileDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
--- a/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
+++ b/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
@ -27,11 +27,11 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

--- a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
@ -25,7 +25,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.store.CompoundFileDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
--- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
@ -40,9 +40,9 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexReader.FieldOption;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
--- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java
@ -17,7 +17,7 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

-import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.TextField;
--- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java
@ -29,10 +29,10 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.DefaultSimilarityProvider;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

--- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderOnDiskFull.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderOnDiskFull.java
@ -23,10 +23,11 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
-import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase;
--- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
@ -35,11 +35,13 @@ import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BitVector;
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
@ -26,8 +26,8 @@ import java.util.Set;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.DefaultSimilarityProvider;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Test;

--- a/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
+++ b/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
@ -27,9 +27,9 @@ import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.DefaultSimilarityProvider;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
--- a/lucene/src/test/org/apache/lucene/index/TestNorms.java
+++ b/lucene/src/test/org/apache/lucene/index/TestNorms.java
@ -27,10 +27,10 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.DefaultSimilarityProvider;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

--- a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
+++ b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
@ -32,6 +32,9 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
+import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.store.Directory;


--- a/lucene/src/test/org/apache/lucene/index/TestParallelReader.java
+++ b/lucene/src/test/org/apache/lucene/index/TestParallelReader.java
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.*;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@ -234,7 +235,8 @@ public class TestParallelReader extends LuceneTestCase {
    w.addDocument(d2);
    w.close();

-    return new IndexSearcher(dir, false);
+    IndexReader ir = IndexReader.open(dir, false);
+    return newSearcher(ir);
  }

  // Fields 1 & 2 in one index, 3 & 4 in other, with ParallelReader:
--- a/lucene/src/test/org/apache/lucene/index/TestUniqueTermCount.java
+++ b/lucene/src/test/org/apache/lucene/index/TestUniqueTermCount.java
@ -25,9 +25,9 @@ import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.DefaultSimilarityProvider;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
--- a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java
+++ b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java
@ -20,9 +20,11 @@ package org.apache.lucene.search;
 import java.io.IOException;

 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.search.Similarity.ExactDocScorer;
-import org.apache.lucene.search.Similarity.SloppyDocScorer;
-import org.apache.lucene.search.Similarity.Stats;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
+import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
+import org.apache.lucene.search.similarities.Similarity.Stats;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TermContext;
 import org.apache.lucene.index.FieldInvertState;
--- a/lucene/src/test/org/apache/lucene/search/TestBoolean2.java
+++ b/lucene/src/test/org/apache/lucene/search/TestBoolean2.java
@ -26,6 +26,8 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.MockDirectoryWrapper;
--- a/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java
+++ b/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java
@ -27,6 +27,9 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.NamedThreadFactory;
@ -72,6 +75,21 @@ public class TestBooleanQuery extends LuceneTestCase {

    IndexReader r = w.getReader();
    IndexSearcher s = newSearcher(r);
+    // this test relies upon coord being the default implementation,
+    // otherwise scores are different!
+    final SimilarityProvider delegate = s.getSimilarityProvider();
+    s.setSimilarityProvider(new DefaultSimilarityProvider() {
+      @Override
+      public float queryNorm(float sumOfSquaredWeights) {
+        return delegate.queryNorm(sumOfSquaredWeights);
+      }
+
+      @Override
+      public Similarity get(String field) {
+        return delegate.get(field);
+      }
+    });
+
    BooleanQuery q = new BooleanQuery();
    q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);

@ -81,7 +99,7 @@ public class TestBooleanQuery extends LuceneTestCase {
    subQuery.setBoost(0);
    q.add(subQuery, BooleanClause.Occur.SHOULD);
    float score2 = s.search(q, 10).getMaxScore();
-    assertEquals(score*.5, score2, 1e-6);
+    assertEquals(score*.5F, score2, 1e-6);

    // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor
    BooleanQuery qq = (BooleanQuery)q.clone();
@ -91,14 +109,14 @@ public class TestBooleanQuery extends LuceneTestCase {
    phrase.setBoost(0);
    qq.add(phrase, BooleanClause.Occur.SHOULD);
    score2 = s.search(qq, 10).getMaxScore();
-    assertEquals(score*(1.0/3), score2, 1e-6);
+    assertEquals(score*(1/3F), score2, 1e-6);

    // now test BooleanScorer2
    subQuery = new TermQuery(new Term("field", "b"));
    subQuery.setBoost(0);
    q.add(subQuery, BooleanClause.Occur.MUST);
    score2 = s.search(q, 10).getMaxScore();
-    assertEquals(score*(2.0/3), score2, 1e-6);
+    assertEquals(score*(2/3F), score2, 1e-6);
 
    // PhraseQuery w/ no terms added returns a null scorer
    PhraseQuery pq = new PhraseQuery();
--- a/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java
+++ b/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java
@ -19,6 +19,7 @@ package org.apache.lucene.search;

 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
 import org.apache.lucene.search.spans.*;

 /**
--- a/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java
+++ b/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java
@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

--- a/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
+++ b/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
@ -29,6 +29,10 @@ import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Weight.ScorerContext;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;

 import java.text.DecimalFormat;
--- a/lucene/src/test/org/apache/lucene/search/TestDocBoost.java
+++ b/lucene/src/test/org/apache/lucene/search/TestDocBoost.java
@ -56,7 +56,8 @@ public class TestDocBoost extends LuceneTestCase {

    final float[] scores = new float[4];

-    newSearcher(reader).search
+    IndexSearcher searcher = newSearcher(reader);
+    searcher.search
      (new TermQuery(new Term("field", "word")),
       new Collector() {
         private int base = 0;
@ -82,7 +83,10 @@ public class TestDocBoost extends LuceneTestCase {
    float lastScore = 0.0f;

    for (int i = 0; i < 2; i++) {
-      assertTrue(scores[i] > lastScore);
+      if (VERBOSE) {
+        System.out.println(searcher.explain(new TermQuery(new Term("field", "word")), i));
+      }
+      assertTrue("score: " + scores[i] + " should be > lastScore: " + lastScore, scores[i] > lastScore);
      lastScore = scores[i];
    }
    
--- a/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java
+++ b/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java
@ -30,6 +30,9 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.values.IndexDocValues.Source;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
@ -71,13 +74,24 @@ public class TestDocValuesScoring extends LuceneTestCase {
    
    // no boosting
    IndexSearcher searcher1 = newSearcher(ir);
+    final SimilarityProvider base = searcher1.getSimilarityProvider();
    // boosting
    IndexSearcher searcher2 = newSearcher(ir);
-    searcher2.setSimilarityProvider(new DefaultSimilarityProvider() {
-      final Similarity fooSim = new BoostingSimilarity(super.get("foo"), "foo_boost");
+    searcher2.setSimilarityProvider(new SimilarityProvider() {
+      final Similarity fooSim = new BoostingSimilarity(base.get("foo"), "foo_boost");

      public Similarity get(String field) {
-        return "foo".equals(field) ? fooSim : super.get(field);
+        return "foo".equals(field) ? fooSim : base.get(field);
+      }
+
+      @Override
+      public float coord(int overlap, int maxOverlap) {
+        return base.coord(overlap, maxOverlap);
+      }
+
+      @Override
+      public float queryNorm(float sumOfSquaredWeights) {
+        return base.queryNorm(sumOfSquaredWeights);
      }
    });
    
--- a/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java
+++ b/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java
@ -23,6 +23,7 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.*;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.search.FieldValueHitQueue.Entry;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.BytesRef;
@ -41,7 +42,8 @@ public class TestElevationComparator extends LuceneTestCase {
        directory,
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
            setMaxBufferedDocs(2).
-            setMergePolicy(newLogMergePolicy(1000))
+            setMergePolicy(newLogMergePolicy(1000)).
+            setSimilarityProvider(new DefaultSimilarityProvider())
    );
    writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"}));
    writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"}));
@ -54,6 +56,7 @@ public class TestElevationComparator extends LuceneTestCase {
    writer.close();

    IndexSearcher searcher = newSearcher(r);
+    searcher.setSimilarityProvider(new DefaultSimilarityProvider());

    runTest(searcher, true);
    runTest(searcher, false);
--- a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java
+++ b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java
@ -29,6 +29,9 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

@ -104,6 +107,21 @@ public class TestFuzzyQuery2 extends LuceneTestCase {
    if (VERBOSE) {
      System.out.println("TEST: searcher=" + searcher);
    }
+    // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation,
+    // otherwise scores are different!
+    final SimilarityProvider delegate = searcher.getSimilarityProvider();
+    searcher.setSimilarityProvider(new DefaultSimilarityProvider() {
+      @Override
+      public float coord(int overlap, int maxOverlap) {
+        return delegate.coord(overlap, maxOverlap);
+      }
+
+      @Override
+      public Similarity get(String field) {
+        return delegate.get(field);
+      }
+    });
+    
    writer.close();
    String line;
    while ((line = reader.readLine()) != null) {
--- a/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
+++ b/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
@ -37,6 +37,9 @@ import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase;

--- a/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
+++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
@ -26,6 +26,9 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@ -169,6 +172,19 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {

    // test for correct application of query normalization
    // must use a non score normalizing method for this.
+    
+    final SimilarityProvider delegate = search.getSimilarityProvider();
+    search.setSimilarityProvider(new DefaultSimilarityProvider() {
+      @Override
+      public float coord(int overlap, int maxOverlap) {
+        return delegate.coord(overlap, maxOverlap);
+      }
+
+      @Override
+      public Similarity get(String field) {
+        return delegate.get(field);
+      }
+    });
    Query q = csrq("data", "1", "6", T, T);
    q.setBoost(100);
    search.search(q, null, new Collector() {
--- a/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java
+++ b/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java
@ -23,6 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.*;
 import org.apache.lucene.document.*;
 import org.apache.lucene.index.*;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util._TestUtil;
@ -342,7 +343,10 @@ public class TestPhraseQuery extends LuceneTestCase {
  
  public void testSlopScoring() throws IOException {
    Directory directory = newDirectory();
-    RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
+    RandomIndexWriter writer = new RandomIndexWriter(random, directory, 
+        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+          .setMergePolicy(newLogMergePolicy())
+          .setSimilarityProvider(new DefaultSimilarityProvider()));

    Document doc = new Document();
    doc.add(newField("field", "foo firstname lastname foo", TextField.TYPE_STORED));
@ -360,6 +364,7 @@ public class TestPhraseQuery extends LuceneTestCase {
    writer.close();

    IndexSearcher searcher = newSearcher(reader);
+    searcher.setSimilarityProvider(new DefaultSimilarityProvider());
    PhraseQuery query = new PhraseQuery();
    query.add(new Term("field", "firstname"));
    query.add(new Term("field", "lastname"));
--- a/Show More
+++ b/Show More