From 6772e7567d96b0b670828a38866507012a8112e3 Mon Sep 17 00:00:00 2001
From: Doug Cutting <cutting@apache.org>
Date: Thu, 7 Nov 2002 17:31:27 +0000
Subject: [PATCH] Added a public, extensible scoring API.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149885 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES.txt                                   |   3 +
 build.xml                                     |  32 ++-
 default.properties                            |   1 -
 .../apache/lucene/analysis/ru/package.html    |   5 +
 .../org/apache/lucene/document/Field.java     |   4 +-
 .../apache/lucene/index/DocumentWriter.java   |  17 +-
 .../org/apache/lucene/index/IndexWriter.java  |  31 ++-
 .../apache/lucene/search/BooleanQuery.java    |   8 +-
 .../apache/lucene/search/BooleanScorer.java   |   6 +-
 .../lucene/search/DefaultSimilarity.java      |  90 +++++++
 .../lucene/search/ExactPhraseScorer.java      |   6 +-
 .../apache/lucene/search/MultiTermQuery.java  |   6 +-
 .../lucene/search/PhrasePrefixQuery.java      |  12 +-
 .../org/apache/lucene/search/PhraseQuery.java |  17 +-
 .../apache/lucene/search/PhraseScorer.java    |  11 +-
 .../org/apache/lucene/search/PrefixQuery.java |   4 +-
 src/java/org/apache/lucene/search/Query.java  |  17 +-
 .../org/apache/lucene/search/RangeQuery.java  |   4 +-
 src/java/org/apache/lucene/search/Scorer.java |  10 +
 .../org/apache/lucene/search/Searcher.java    |  23 +-
 .../org/apache/lucene/search/Similarity.java  | 244 +++++++++++++++---
 .../lucene/search/SloppyPhraseScorer.java     |  10 +-
 .../org/apache/lucene/search/TermQuery.java   |   7 +-
 .../org/apache/lucene/search/TermScorer.java  |  19 +-
 src/test/org/apache/lucene/index/DocTest.java |   4 +-
 .../apache/lucene/search/TestDocBoost.java    |   2 +-
 .../apache/lucene/search/TestSimilarity.java  | 161 ++++++++++++
 27 files changed, 609 insertions(+), 145 deletions(-)
 create mode 100644 src/java/org/apache/lucene/analysis/ru/package.html
 create mode 100644 src/java/org/apache/lucene/search/DefaultSimilarity.java
 create mode 100644 src/test/org/apache/lucene/search/TestSimilarity.java
diff --git a/CHANGES.txt b/CHANGES.txt
index 3c80f8c3f2a..3252cee8085 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -93,6 +93,9 @@ $Id$
  17. Added Russian Analyzer.
      (Boris Okner via otis)
 
+ 18. Added a public, extensible scoring API.  For details, see the
+     javadoc for org.apache.lucene.search.Similarity.
+
 
 1.2 RC6
 
diff --git a/build.xml b/build.xml
index 03a15dc41a6..096d5e4cb9a 100644
--- a/build.xml
+++ b/build.xml
@@ -12,14 +12,21 @@
   <!-- Build classpath -->
   <path id="classpath">
     <pathelement location="${build.classes}"/>
-    <pathelement location="${build.demo.classes}"/>
-    <pathelement location="${build.test.classes}"/>
-    <pathelement location="."/>
     <fileset dir="lib">
       <include name="*.jar" />
     </fileset>
   </path>
 
+  <path id="demo.classpath">
+    <path refid="classpath"/>
+    <pathelement location="${build.demo.classes}"/>
+  </path>
+
+  <path id="test.classpath">
+    <path refid="demo.classpath"/>
+    <pathelement location="${build.test.classes}"/>
+  </path>
+
   <path id="junit.classpath">
     <pathelement location="${junit.classes}" />
     <pathelement location="${build.classes}"/>
@@ -245,7 +252,7 @@ Implementation-Vendor: Lucene
       includes="**/*.java"
       destdir="${build.demo.classes}"
       debug="${debug}">
-      <classpath refid="classpath"/>
+      <classpath refid="demo.classpath"/>
     </javac>
   </target>
 
@@ -255,23 +262,14 @@ Implementation-Vendor: Lucene
   <!--                                                                    -->
   <!-- ================================================================== -->
   <target name="test" depends="compile,demo">
-    <mkdir dir="${build.test}"/>
-
-    <copy todir="${build.test.src}">
-      <fileset dir="${test.src}">
-        <include name="**/*.java"/>
-      </fileset>
-    </copy>
-
     <mkdir dir="${build.test.classes}"/>
-
     <javac
       encoding="${build.encoding}"
-      srcdir="${build.test.src}"
+      srcdir="${test.src}"
       includes="**/*.java"
       destdir="${build.test.classes}"
       debug="${debug}">
-      <classpath refid="classpath"/>
+      <classpath refid="test.classpath"/>
     </javac>
   </target>
 
@@ -295,7 +293,7 @@ Implementation-Vendor: Lucene
       includes="**/*.java"
       destdir="${junit.classes}"
       debug="${debug}">
-      <classpath refid="classpath"/>
+      <classpath refid="test.classpath"/>
     </javac>
 
     <junit printsummary="yes" haltonfailure="no" >
@@ -565,7 +563,7 @@ Implementation-Vendor: Lucene
   <!-- ================================================================== -->
   <!--                                                                    -->
   <!-- ================================================================== -->
-  <target name="clean" depends="init">
+  <target name="clean">
     <delete dir="${build.dir}"/>
     <delete dir="${dist.dir}"/>
     <delete file="${basedir}/${final.name}.tar"/>
diff --git a/default.properties b/default.properties
index 85e633fceee..7b08e2547a2 100644
--- a/default.properties
+++ b/default.properties
@@ -50,7 +50,6 @@ build.docweb = ${build.dir}/docweb
 build.docweb.war.name = lucenedocweb
 
 build.test = ${build.dir}/test
-build.test.src = ${build.test}/src
 build.test.classes = ${build.test}/classes
 
 junit.src = ${basedir}/src/test
diff --git a/src/java/org/apache/lucene/analysis/ru/package.html b/src/java/org/apache/lucene/analysis/ru/package.html
new file mode 100644
index 00000000000..c63920a9bfa
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/ru/package.html
@@ -0,0 +1,5 @@
+<html>
+<body>
+Support for indexing and searching Russian text.
+</body>
+</html>
diff --git a/src/java/org/apache/lucene/document/Field.java b/src/java/org/apache/lucene/document/Field.java
index c9f65bdb7a2..3d478580460 100644
--- a/src/java/org/apache/lucene/document/Field.java
+++ b/src/java/org/apache/lucene/document/Field.java
@@ -85,13 +85,13 @@ public final class Field implements java.io.Serializable {
    * <p>The boost is multiplied by {@link Document#getBoost()} of the document
    * containing this field.  If a document has multiple fields with the same
    * name, all such values are multiplied together.  This product is then
-   * multipled by the value {@link Similarity#normalizeLength(int)}, and
+   * multipled by the value {@link Similarity#lengthNorm(String,int)}, and
    * rounded by {@link Similarity#encodeNorm(float)} before it is stored in the
    * index.  One should attempt to ensure that this product does not overflow
    * the range of that encoding.
    *
    * @see Document#setBoost(float)
-   * @see Similarity#normalizeLength(int)
+   * @see Similarity#lengthNorm(String, int)
    * @see Similarity#encodeNorm(float)
    */
   public void setBoost(float boost) {
diff --git a/src/java/org/apache/lucene/index/DocumentWriter.java b/src/java/org/apache/lucene/index/DocumentWriter.java
index 79167e90c9d..d0b695dfe35 100644
--- a/src/java/org/apache/lucene/index/DocumentWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentWriter.java
@@ -73,13 +73,16 @@ import org.apache.lucene.search.Similarity;
 final class DocumentWriter {
   private Analyzer analyzer;
   private Directory directory;
+  private Similarity similarity;
   private FieldInfos fieldInfos;
   private int maxFieldLength;
-
-  DocumentWriter(Directory d, Analyzer a, int mfl) {
-    directory = d;
-    analyzer = a;
-    maxFieldLength = mfl;
+  
+  DocumentWriter(Directory directory, Analyzer analyzer,
+                 Similarity similarity, int maxFieldLength) {
+    this.directory = directory;
+    this.analyzer = analyzer;
+    this.similarity = similarity;
+    this.maxFieldLength = maxFieldLength;
   }
 
   final void addDocument(String segment, Document doc)
@@ -320,10 +323,10 @@ final class DocumentWriter {
       if (field.isIndexed()) {
 	int n = fieldInfos.fieldNumber(field.name());
         float norm =
-          fieldBoosts[n] * Similarity.normalizeLength(fieldLengths[n]);
+          fieldBoosts[n] * similarity.lengthNorm(field.name(),fieldLengths[n]);
 	OutputStream norms = directory.createFile(segment + ".f" + n);
 	try {
-	  norms.writeByte(Similarity.encodeNorm(norm));
+	  norms.writeByte(similarity.encodeNorm(norm));
 	} finally {
 	  norms.close();
 	}
diff --git a/src/java/org/apache/lucene/index/IndexWriter.java b/src/java/org/apache/lucene/index/IndexWriter.java
index 0238a42211c..846776db5b3 100644
--- a/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/src/java/org/apache/lucene/index/IndexWriter.java
@@ -68,6 +68,8 @@ import org.apache.lucene.store.OutputStream;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.Similarity;
+
 
 /**
   An IndexWriter creates and maintains an index.
@@ -89,12 +91,28 @@ public class IndexWriter {
   private Directory directory;			  // where this index resides
   private Analyzer analyzer;			  // how to analyze text
 
+  private Similarity similarity = Similarity.getDefault(); // how to normalize
+
   private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
   private final Directory ramDirectory = new RAMDirectory(); // for temp segs
 
   private Lock writeLock;
 
-  private Similarity similarity;
+  /** Expert: Set the Similarity implementation used by this IndexWriter.
+   *
+   * @see Similarity#setDefault(Similarity)
+   */
+  public void setSimilarity(Similarity similarity) {
+    this.similarity = similarity;
+  }
+
+  /** Expert: Return the Similarity implementation used by this IndexWriter.
+   *
+   * <p>This defaults to the current value of {@link Similarity#getDefault()}.
+   */
+  public Similarity getSimilarity() {
+    return this.similarity;
+  }
 
   /** Constructs an IndexWriter for the index in <code>path</code>.  Text will
     be analyzed with <code>a</code>.  If <code>create</code> is true, then a
@@ -186,7 +204,7 @@ public class IndexWriter {
   /** Adds a document to this index.*/
   public void addDocument(Document doc) throws IOException {
     DocumentWriter dw =
-      new DocumentWriter(ramDirectory, analyzer, maxFieldLength);
+      new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength);
     String segmentName = newSegmentName();
     dw.addDocument(segmentName, doc);
     synchronized (this) {
@@ -407,13 +425,4 @@ public class IndexWriter {
     }
     directory.renameFile("deleteable.new", "deletable");
   }
-
-  /**
-   * Sets the <code>Similarity</code> implementation to use.
-   *
-   * @param sim an instance of a class that implements  <code>Similarity</code
-   */
-  public void setSimilarity(Similarity sim) {
-    similarity = sim;
-  }
 }
diff --git a/src/java/org/apache/lucene/search/BooleanQuery.java b/src/java/org/apache/lucene/search/BooleanQuery.java
index 9a8fd582585..524d43f5e89 100644
--- a/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -116,20 +116,20 @@ public class BooleanQuery extends Query {
     }
   }
 
-  Scorer scorer(IndexReader reader)
+  Scorer scorer(IndexReader reader, Similarity similarity)
        throws IOException {
 
     if (clauses.size() == 1) {			  // optimize 1-term queries
       BooleanClause c = (BooleanClause)clauses.elementAt(0);
       if (!c.prohibited)			  // just return term scorer
-	return c.query.scorer(reader);
+	return c.query.scorer(reader, similarity);
     }
 
-    BooleanScorer result = new BooleanScorer();
+    BooleanScorer result = new BooleanScorer(similarity);
 
     for (int i = 0 ; i < clauses.size(); i++) {
       BooleanClause c = (BooleanClause)clauses.elementAt(i);
-      Scorer subScorer = c.query.scorer(reader);
+      Scorer subScorer = c.query.scorer(reader, similarity);
       if (subScorer != null)
 	result.add(subScorer, c.required, c.prohibited);
       else if (c.required)
diff --git a/src/java/org/apache/lucene/search/BooleanScorer.java b/src/java/org/apache/lucene/search/BooleanScorer.java
index e9cfd3543ab..7cf416be82a 100644
--- a/src/java/org/apache/lucene/search/BooleanScorer.java
+++ b/src/java/org/apache/lucene/search/BooleanScorer.java
@@ -70,6 +70,10 @@ final class BooleanScorer extends Scorer {
   private int prohibitedMask = 0;
   private int nextMask = 1;
 
+  BooleanScorer(Similarity similarity) {
+    super(similarity);
+  }
+
   static final class SubScorer {
     public Scorer scorer;
     public boolean required = false;
@@ -113,7 +117,7 @@ final class BooleanScorer extends Scorer {
   private final void computeCoordFactors() throws IOException {
     coordFactors = new float[maxCoord];
     for (int i = 0; i < maxCoord; i++)
-      coordFactors[i] = Similarity.coord(i, maxCoord);
+      coordFactors[i] = getSimilarity().coord(i, maxCoord);
   }
 
   final void score(HitCollector results, int maxDoc) throws IOException {
diff --git a/src/java/org/apache/lucene/search/DefaultSimilarity.java b/src/java/org/apache/lucene/search/DefaultSimilarity.java
new file mode 100644
index 00000000000..e541f058ab9
--- /dev/null
+++ b/src/java/org/apache/lucene/search/DefaultSimilarity.java
@@ -0,0 +1,90 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.document.Document;
+
+/** Expert: Default scoring implementation. */
+public class DefaultSimilarity extends Similarity {
+  /** Implemented as <code>1/sqrt(numTerms)</code>. */
+  public float lengthNorm(String fieldName, int numTerms) {
+    return (float)(1.0 / Math.sqrt(numTerms));
+  }
+  
+  /** Implemented as <code>1/sqrt(sumOfSquaredWeights)</code>. */
+  public float queryNorm(float sumOfSquaredWeights) {
+    return (float)(1.0 / Math.sqrt(sumOfSquaredWeights));
+  }
+
+  /** Implemented as <code>sqrt(freq)</code>. */
+  public float tf(float freq) {
+    return (float)Math.sqrt(freq);
+  }
+    
+  /** Implemented as <code>1 / (distance + 1)</code>. */
+  public float sloppyFreq(int distance) {
+    return 1.0f / (distance + 1);
+  }
+    
+  /** Implemented as <code>log(numDocs/(docFreq+1)) + 1</code>. */
+  public float idf(int docFreq, int numDocs) {
+    return (float)(Math.log(numDocs/(double)(docFreq+1)) + 1.0);
+  }
+    
+  /** Implemented as <code>overlap / maxOverlap</code>. */
+  public float coord(int overlap, int maxOverlap) {
+    return overlap / (float)maxOverlap;
+  }
+}
diff --git a/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/src/java/org/apache/lucene/search/ExactPhraseScorer.java
index 46c590fd68c..c33c5c59ba3 100644
--- a/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@@ -61,9 +61,9 @@ import org.apache.lucene.index.*;
 
 final class ExactPhraseScorer extends PhraseScorer {
 
-  ExactPhraseScorer(TermPositions[] tps, byte[] n, float w)
-       throws IOException {
-    super(tps, n, w);
+  ExactPhraseScorer(TermPositions[] tps, Similarity similarity,
+                    byte[] norms, float weight) throws IOException {
+    super(tps, similarity, norms, weight);
   }
 
   protected final float phraseFreq() throws IOException {
diff --git a/src/java/org/apache/lucene/search/MultiTermQuery.java b/src/java/org/apache/lucene/search/MultiTermQuery.java
index 668f87a0118..776acc97793 100644
--- a/src/java/org/apache/lucene/search/MultiTermQuery.java
+++ b/src/java/org/apache/lucene/search/MultiTermQuery.java
@@ -85,7 +85,6 @@ public class MultiTermQuery extends Query {
     /** Constructs a query for terms matching <code>term</code>. */
     public MultiTermQuery(Term term) {
         this.term = term;
-        this.query = query;
     }
     
     /** Set the TermEnum to be used */
@@ -105,8 +104,9 @@ public class MultiTermQuery extends Query {
         }
     }
     
-    final Scorer scorer(IndexReader reader) throws IOException {
-        return getQuery().scorer(reader);
+    final Scorer scorer(IndexReader reader, Similarity similarity)
+      throws IOException {
+      return getQuery().scorer(reader, similarity);
     }
     
     private final BooleanQuery getQuery() throws IOException {
diff --git a/src/java/org/apache/lucene/search/PhrasePrefixQuery.java b/src/java/org/apache/lucene/search/PhrasePrefixQuery.java
index 7a9f94a7934..36a85539e48 100644
--- a/src/java/org/apache/lucene/search/PhrasePrefixQuery.java
+++ b/src/java/org/apache/lucene/search/PhrasePrefixQuery.java
@@ -147,7 +147,7 @@ public class PhrasePrefixQuery
 	_termArrays.add(terms);
     }
 
-    Scorer scorer(IndexReader reader)
+    Scorer scorer(IndexReader reader, Similarity similarity)
 	throws IOException
     {
     	if (_termArrays.size() == 0)  // optimize zero-term case
@@ -161,7 +161,7 @@ public class PhrasePrefixQuery
 	    for (int i=0; i<terms.length; i++)
 		boq.add(new TermQuery(terms[i]), false, false);
 
-	    return boq.scorer(reader);
+	    return boq.scorer(reader, similarity);
     	}
 
     	TermPositions[] tps = new TermPositions[_termArrays.size()];
@@ -182,9 +182,11 @@ public class PhrasePrefixQuery
 	}
 
 	if (_slop == 0)
-	    return new ExactPhraseScorer(tps, reader.norms(_field), _weight);
+	    return new ExactPhraseScorer(tps, similarity,
+                                         reader.norms(_field), _weight);
 	else
-	    return new SloppyPhraseScorer(tps, _slop, reader.norms(_field), _weight);
+	    return new SloppyPhraseScorer(tps, similarity, _slop,
+                                          reader.norms(_field), _weight);
     }
 
     float sumOfSquaredWeights(Searcher searcher)
@@ -195,7 +197,7 @@ public class PhrasePrefixQuery
 	{
 	    Term[] terms = (Term[])i.next();
 	    for (int j=0; j<terms.length; j++)
-		_idf += Similarity.idf(terms[j], searcher);
+		_idf += searcher.getSimilarity().idf(terms[j], searcher);
 	}
 
 	_weight = _idf * boost;
diff --git a/src/java/org/apache/lucene/search/PhraseQuery.java b/src/java/org/apache/lucene/search/PhraseQuery.java
index cd7af711328..9ac6e08218d 100644
--- a/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -108,10 +108,7 @@ public class PhraseQuery extends Query {
   }
 
   final float sumOfSquaredWeights(Searcher searcher) throws IOException {
-    idf = 0.0f;
-    for (int i = 0; i < terms.size(); i++)	  // sum term IDFs
-      idf += Similarity.idf((Term)terms.elementAt(i), searcher);
-
+    idf = searcher.getSimilarity().idf(terms, searcher);
     weight = idf * boost;
     return weight * weight;			  // square term weights
   }
@@ -121,7 +118,8 @@ public class PhraseQuery extends Query {
     weight *= idf;				  // factor from document
   }
 
-  final Scorer scorer(IndexReader reader) throws IOException {
+  final Scorer scorer(IndexReader reader, Similarity similarity)
+    throws IOException {
     if (terms.size() == 0)			  // optimize zero-term case
       return null;
     if (terms.size() == 1) {			  // optimize one-term case
@@ -129,7 +127,8 @@ public class PhraseQuery extends Query {
       TermDocs docs = reader.termDocs(term);
       if (docs == null)
 	return null;
-      return new TermScorer(docs, reader.norms(term.field()), weight);
+      return new TermScorer(docs, similarity,
+                            reader.norms(term.field()), weight);
     }
 
     TermPositions[] tps = new TermPositions[terms.size()];
@@ -141,10 +140,12 @@ public class PhraseQuery extends Query {
     }
 
     if (slop == 0)				  // optimize exact case
-      return new ExactPhraseScorer(tps, reader.norms(field), weight);
+      return new ExactPhraseScorer(tps, similarity,
+                                   reader.norms(field), weight);
     else
       return
-	new SloppyPhraseScorer(tps, slop, reader.norms(field), weight);
+	new SloppyPhraseScorer(tps, similarity, slop,
+                               reader.norms(field), weight);
 
   }
 
diff --git a/src/java/org/apache/lucene/search/PhraseScorer.java b/src/java/org/apache/lucene/search/PhraseScorer.java
index 368144f2d98..853ed7294a1 100644
--- a/src/java/org/apache/lucene/search/PhraseScorer.java
+++ b/src/java/org/apache/lucene/search/PhraseScorer.java
@@ -66,9 +66,11 @@ abstract class PhraseScorer extends Scorer {
   protected PhraseQueue pq;
   protected PhrasePositions first, last;
 
-  PhraseScorer(TermPositions[] tps, byte[] n, float w) throws IOException {
-    norms = n;
-    weight = w;
+  PhraseScorer(TermPositions[] tps, Similarity similarity,
+               byte[] norms, float weight) throws IOException {
+    super(similarity);
+    this.norms = norms;
+    this.weight = weight;
 
     // use PQ to build a sorted list of PhrasePositions
     pq = new PhraseQueue(tps.length);
@@ -78,6 +80,7 @@ abstract class PhraseScorer extends Scorer {
   }
 
   final void score(HitCollector results, int end) throws IOException {
+    Similarity similarity = getSimilarity();
     while (last.doc < end) {			  // find doc w/ all the terms
       while (first.doc < last.doc) {		  // scan forward in first
 	do {
@@ -92,7 +95,7 @@ abstract class PhraseScorer extends Scorer {
       float freq = phraseFreq();		  // check for phrase
 
       if (freq > 0.0) {
-	float score = Similarity.tf(freq)*weight; // compute score
+	float score = similarity.tf(freq)*weight; // compute score
 	score *= Similarity.decodeNorm(norms[first.doc]); // normalize
 	results.collect(first.doc, score);	  // add to results
       }
diff --git a/src/java/org/apache/lucene/search/PrefixQuery.java b/src/java/org/apache/lucene/search/PrefixQuery.java
index 4525f26347b..cc5f63d71b8 100644
--- a/src/java/org/apache/lucene/search/PrefixQuery.java
+++ b/src/java/org/apache/lucene/search/PrefixQuery.java
@@ -90,8 +90,8 @@ public class PrefixQuery extends Query {
     }
   }
 
-  Scorer scorer(IndexReader reader) throws IOException {
-    return getQuery().scorer(reader);
+  Scorer scorer(IndexReader reader, Similarity similarity) throws IOException {
+    return getQuery().scorer(reader, similarity);
   }
 
   private BooleanQuery getQuery() throws IOException {
diff --git a/src/java/org/apache/lucene/search/Query.java b/src/java/org/apache/lucene/search/Query.java
index 88b50200c05..b3b84727e5c 100644
--- a/src/java/org/apache/lucene/search/Query.java
+++ b/src/java/org/apache/lucene/search/Query.java
@@ -86,18 +86,19 @@ public abstract class Query implements java.io.Serializable
     abstract void normalize(float norm);
 
     // query evaluation
-    abstract Scorer scorer(IndexReader reader) throws IOException;
+    abstract Scorer scorer(IndexReader reader, Similarity similarity)
+      throws IOException;
 
     void prepare(IndexReader reader) {}
 
     static Scorer scorer(Query query, Searcher searcher, IndexReader reader)
-	throws IOException
-    {
-	query.prepare(reader);
-	float sum = query.sumOfSquaredWeights(searcher);
-	float norm = 1.0f / (float)Math.sqrt(sum);
-	query.normalize(norm);
-	return query.scorer(reader);
+      throws IOException {
+      Similarity similarity = searcher.getSimilarity();
+      query.prepare(reader);
+      float sum = query.sumOfSquaredWeights(searcher);
+      float norm = similarity.queryNorm(sum);
+      query.normalize(norm);
+      return query.scorer(reader, similarity);
     }
 
     /**
diff --git a/src/java/org/apache/lucene/search/RangeQuery.java b/src/java/org/apache/lucene/search/RangeQuery.java
index dbeec4a4568..277e174d289 100644
--- a/src/java/org/apache/lucene/search/RangeQuery.java
+++ b/src/java/org/apache/lucene/search/RangeQuery.java
@@ -113,9 +113,9 @@ public class RangeQuery extends Query
         }
     }
     
-    Scorer scorer(IndexReader reader) throws IOException
+    Scorer scorer(IndexReader reader, Similarity similarity) throws IOException
     {
-        return getQuery().scorer(reader);
+        return getQuery().scorer(reader, similarity);
     }
     
     private BooleanQuery getQuery() throws IOException
diff --git a/src/java/org/apache/lucene/search/Scorer.java b/src/java/org/apache/lucene/search/Scorer.java
index 863a447c7d9..bd04dfb2580 100644
--- a/src/java/org/apache/lucene/search/Scorer.java
+++ b/src/java/org/apache/lucene/search/Scorer.java
@@ -57,5 +57,15 @@ package org.apache.lucene.search;
 import java.io.IOException;
 
 abstract class Scorer {
+  private Similarity similarity;
+
+  protected Scorer(Similarity similarity) {
+    this.similarity = similarity;
+  }
+
+  public Similarity getSimilarity() {
+    return this.similarity;
+  }
+
   abstract void score(HitCollector hc, int maxDoc) throws IOException;
 }
diff --git a/src/java/org/apache/lucene/search/Searcher.java b/src/java/org/apache/lucene/search/Searcher.java
index d9d1b18f41d..5cf9a3e33ed 100644
--- a/src/java/org/apache/lucene/search/Searcher.java
+++ b/src/java/org/apache/lucene/search/Searcher.java
@@ -63,9 +63,6 @@ import org.apache.lucene.index.IndexReader;
  * Implements some common utility methods.
  */
 public abstract class Searcher implements Searchable {
-
-  protected Similarity similarity;
-
   /** Returns the documents matching <code>query</code>. */
   public final Hits search(Query query) throws IOException {
     return search(query, (Filter)null);
@@ -91,12 +88,22 @@ public abstract class Searcher implements Searchable {
     search(query, (Filter)null, results);
   }    
 
-  /**
-   * Sets the <code>Similarity</code> implementation to use.
+  /** The Similarity implementation used by this searcher. */
+  private Similarity similarity = Similarity.getDefault();
+
+  /** Expert: Set the Similarity implementation used by this Searcher.
    *
-   * @param sim an instance of a class that implements  <code>Similarity</code
+   * @see Similarity#setDefault(Similarity)
    */
-  public void setSimilarity(Similarity sim) {
-    similarity = sim;
+  public void setSimilarity(Similarity similarity) {
+    this.similarity = similarity;
+  }
+
+  /** Expert: Return the Similarity implementation used by this Searcher.
+   *
+   * <p>This defaults to the current value of {@link Similarity#getDefault()}.
+   */
+  public Similarity getSimilarity() {
+    return this.similarity;
   }
 }
diff --git a/src/java/org/apache/lucene/search/Similarity.java b/src/java/org/apache/lucene/search/Similarity.java
index c525bc32e3e..459615f2943 100644
--- a/src/java/org/apache/lucene/search/Similarity.java
+++ b/src/java/org/apache/lucene/search/Similarity.java
@@ -55,14 +55,73 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
+import java.util.Vector;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
 
-/** Internal class used for scoring.
- * <p>Public only so that the indexing code can compute and store the
- * normalization byte for each document. */
+/** Expert: Scoring API.
+ * <p>Subclasses implement search scoring.
+ *
+ * <p>The score of query <code>q</code> for document <code>d</code> is defined
+ * in terms of these methods as follows:
+ *
+ * <table cellpadding="0" cellspacing="0" border="0">
+ *  <tr>
+ *    <td valign="middle" align="right" rowspan="2">score(q,d) =<br></td>
+ *    <td valign="middle" align="center">
+ *    <big><big><big><big><big>&Sigma;</big></big></big></big></big></td>
+ *    <td valign="middle"><small>
+ *    {@link #tf(int) tf}(t in d) *
+ *    {@link #idf(Term,Searcher) idf}(t) *
+ *    {@link Field#getBoost getBoost}(t.field in d) *
+ *    {@link #lengthNorm(String,int) lengthNorm}(t.field in d)
+ *    </small></td>
+ *    <td valign="middle" rowspan="2">&nbsp;*
+ *    {@link #coord(int,int) coord}(q,d) *
+ *    {@link #queryNorm(float) queryNorm}(q)
+ *    </td>
+ *  </tr>
+ *  <tr> 
+ *   <td valign="top" align="right">
+ *    <small>t in q</small>
+ *    </td>
+ *  </tr>
+ * </table>
+ *
+ * @see #setDefault(Similarity)
+ * @see IndexWriter#setSimilarity(Similarity)
+ * @see Searcher#setSimilarity(Similarity)
+ */
 public abstract class Similarity {
+  /** The Similarity implementation used by default. */
+  private static Similarity defaultImpl = new DefaultSimilarity();
 
+  /** Set the default Similarity implementation used by indexing and search
+   * code.
+   *
+   * @see Searcher#setSimilarity(Similarity)
+   * @see IndexWriter#setSimilarity(Similarity)
+   */
+  public static void setDefault(Similarity similarity) {
+    Similarity.defaultImpl = similarity;
+  }
+
+  /** Return the default Similarity implementation used by indexing and search
+   * code.
+   *
+   * <p>This is initially an instance of {@link DefaultSimilarity}.
+   *
+   * @see Searcher#setSimilarity(Similarity)
+   * @see IndexWriter#setSimilarity(Similarity)
+   */
+  public static Similarity getDefault() {
+    return Similarity.defaultImpl;
+  }
+
+  /** Cache of decoded bytes. */
   private static final float[] NORM_TABLE = new float[256];
 
   static {
@@ -70,31 +129,6 @@ public abstract class Similarity {
       NORM_TABLE[i] = byteToFloat((byte)i);
   }
 
-  private static Similarity similarity;
-
-  private Similarity() {}			  // no public constructor
-
-  /**
-   * Sets the <code>Similarity</code> implementation to use.
-   *
-   * @param sim an instance of a class that implements  <code>Similarity</code
-   */
-  public static void setDefaultSimilarity(Similarity sim) {
-    similarity = sim;
-  }
-
-  /** Computes the normalization value for a document given the total number of
-   * terms contained in a field.  These values are stored in an index and used
-   * by the search code.
-   *
-   * <p>The formula used is: <code>1.0f / Math.sqrt(numTerms)</code>
-   *
-   * @see Field#setBoost(float)
-   */
-  public static float normalizeLength(int numTerms) {
-    return (float)(1.0 / Math.sqrt(numTerms));
-  }
-  
   /** Decodes a normalization factor stored in an index.
    * @see #encodeNorm(float)
    */
@@ -102,6 +136,41 @@ public abstract class Similarity {
     return NORM_TABLE[b & 0xFF];
   }
 
+  /** Computes the normalization value for a field given the total number of
+   * terms contained in a field.  These values, together with field boosts, are
+   * stored in an index and multipled into scores for hits on each field by the
+   * search code.
+   *
+   * <p>Matches in longer fields are less precise, so implemenations of this
+   * method usually return smaller values when <code>numTokens</code> is large,
+   * and larger values when <code>numTokens</code> is small.
+   *
+   * <p>That these values are computed under {@link
+   * IndexWriter#addDocument(Document)} and stored then using
+   * {#encodeNorm(float)}.  Thus they have limited precision, and documents
+   * must be re-indexed if this method is altered.
+   *
+   * @param fieldName the name of the field
+   * @param numTokens the total number of tokens contained in fields named
+   * <i>fieldName</i> of <i>doc</i>.
+   * @return a normalization factor for hits on this field of this document
+   *
+   * @see Field#setBoost(float)
+   */
+  public abstract float lengthNorm(String fieldName, int numTokens);
+
+  /** Computes the normalization value for a query given the sum of the squared
+   * weights of each of the query terms.  This value is then multipled into the
+   * weight of each query term.
+   *
+   * <p>This does not affect ranking, but rather just attempts to make scores
+   * from different queries comparable.
+   *
+   * @param sumOfSquaredWeights the sum of the squares of query term weights
+   * @return a normalization factor for query weights
+   */
+  public abstract float queryNorm(float sumOfSquaredWeights);
+
   /** Encodes a normalization factor for storage in an index.  
    *
    * <p>The encoding uses a five-bit exponent and three-bit mantissa, thus
@@ -151,25 +220,118 @@ public abstract class Similarity {
     return (byte)((exponent << 3) | mantissa);    // pack into a byte
    }
 
-  static final float tf(int freq) {
-    return (float)Math.sqrt(freq);
+
+  /** Computes a score factor based on a term or phrase's frequency in a
+   * document.  This value is multiplied by the {@link #idf(Term, Searcher)}
+   * factor for each term in the query and these products are then summed to
+   * form the initial score for a document.
+   *
+   * <p>Terms and phrases repeated in a document indicate the topic of the
+   * document, so implemenations of this method usually return larger values
+   * when <code>freq</code> is large, and smaller values when <code>freq</code>
+   * is small.
+   *
+   * <p>The default implementation calls {@link #tf(float)}.
+   *
+   * @param tf the frequency of a term within a document
+   * @return a score factor based on a term's within-document frequency
+   */
+  public float tf(int freq) {
+    return tf((float)freq);
   }
 
-  static final float tf(float freq) {
-    return (float)Math.sqrt(freq);
-  }
+  /** Computes the amount of a sloppy phrase match, based on an edit distance.
+   * This value is summed for each sloppy phrase match in a document to form
+   * the frequency that is passed to {@link #tf(float)}.
+   *
+   * <p>A phrase match with a small edit distance to a document passage more
+   * closely matches the document, so implemenations of this method usually
+   * return larger values when the edit distance is small and smaller values
+   * when it is large.
+   *
+   * @see PhraseQuery#setSlop(int)
+   * @param distance the edit distance of this sloppy phrase match
+   * @return the frequency increment for this match
+   */
+  public abstract float sloppyFreq(int distance);
+
+  /** Computes a score factor based on a term or phrase's frequency in a
+   * document.  This value is multiplied by the {@link #idf(Term, Searcher)}
+   * factor for each term in the query and these products are then summed to
+   * form the initial score for a document.
+   *
+   * <p>Terms and phrases repeated in a document indicate the topic of the
+   * document, so implemenations of this method usually return larger values
+   * when <code>freq</code> is large, and smaller values when <code>freq</code>
+   * is small.
+   *
+   * @param tf the frequency of a term within a document
+   * @return a score factor based on a term's within-document frequency
+   */
+  public abstract float tf(float freq);
     
-  static final float idf(Term term, Searcher searcher) throws IOException {
-    // Use maxDoc() instead of numDocs() because its proportional to docFreq(),
-    // i.e., when one is inaccurate, so is the other, and in the same way.
+  /** Computes a score factor for a simple term.
+   *
+   * <p>The default implementation is:<pre>
+   *   return idf(searcher.docFreq(term), searcher.maxDoc());
+   * </pre>
+   *
+   * Note that {@link Searcher#maxDoc()} is used instead of {@link
+   * IndexReader#numDocs()} because it is proportional to {@link
+   * Searcher#docFreq(Term)} , i.e., when one is inaccurate, so is the other,
+   * and in the same direction.
+   *
+   * @param term the term in question
+   * @param searcher the document collection being searched
+   * @return a score factor for the term
+   */
+  public float idf(Term term, Searcher searcher) throws IOException {
     return idf(searcher.docFreq(term), searcher.maxDoc());
   }
 
-  static final float idf(int docFreq, int numDocs) {
-    return (float)(Math.log(numDocs/(double)(docFreq+1)) + 1.0);
+  /** Computes a score factor for a phrase.
+   *
+   * <p>The default implementation sums the {@link #idf(Term,Searcher)} factor
+   * for each term in the phrase.
+   *
+   * @param terms the vector of terms in the phrase
+   * @param searcher the document collection being searched
+   * @return a score factor for the phrase
+   */
+  public float idf(Vector terms, Searcher searcher) throws IOException {
+    float idf = 0.0f;
+    for (int i = 0; i < terms.size(); i++) {
+      idf += idf((Term)terms.elementAt(i), searcher);
+    }
+    return idf;
   }
+
+  /** Computes a score factor based on a term's document frequency (the number
+   * of documents which contain the term).  This value is multiplied by the
+   * {@link #tf(int)} factor for each term in the query and these products are
+   * then summed to form the initial score for a document.
+   *
+   * <p>Terms that occur in fewer documents are better indicators of topic, so
+   * implemenations of this method usually return larger values for rare terms,
+   * and smaller values for common terms.
+   *
+   * @param docFreq the number of documents which contain the term
+   * @param numDocs the total number of documents in the collection
+   * @return a score factor based on the term's document frequency
+   */
+  protected abstract float idf(int docFreq, int numDocs);
     
-  static final float coord(int overlap, int maxOverlap) {
-    return overlap / (float)maxOverlap;
-  }
+  /** Computes a score factor based on the fraction of all query terms that a
+   * document contains.  This value is multiplied into scores.
+   *
+   * <p>The presence of a large portion of the query terms indicates a better
+   * match with the query, so implemenations of this method usually return
+   * larger values when the ratio between these parameters is large and smaller
+   * values when the ratio between them is small.
+   *
+   * @param overlap the number of query terms matched in the document
+   * @param maxOverlap the total number of terms in the query
+   * @return a score factor based on term overlap with the query
+   */
+  public abstract float coord(int overlap, int maxOverlap);
 }
diff --git a/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
index 74bec4a5343..c3afa75b485 100644
--- a/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
+++ b/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@@ -62,10 +62,10 @@ import org.apache.lucene.index.*;
 final class SloppyPhraseScorer extends PhraseScorer {
   private int slop;
 
-  SloppyPhraseScorer(TermPositions[] tps, int s, byte[] n, float w)
-       throws IOException {
-    super(tps, n, w);
-    slop = s;
+  SloppyPhraseScorer(TermPositions[] tps, Similarity similarity,
+                     int slop, byte[] norms, float weight) throws IOException {
+    super(tps, similarity, norms, weight);
+    this.slop = slop;
   }
 
   protected final float phraseFreq() throws IOException {
@@ -94,7 +94,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
 
       int matchLength = end - start;
       if (matchLength <= slop)
-	freq += 1.0 / (matchLength + 1);	  // penalize longer matches
+	freq += getSimilarity().sloppyFreq(matchLength); // score match
 
       if (pp.position > end)
 	end = pp.position;
diff --git a/src/java/org/apache/lucene/search/TermQuery.java b/src/java/org/apache/lucene/search/TermQuery.java
index 2fd54bc80c8..3e666a49764 100644
--- a/src/java/org/apache/lucene/search/TermQuery.java
+++ b/src/java/org/apache/lucene/search/TermQuery.java
@@ -73,7 +73,7 @@ public class TermQuery extends Query {
   }
 
   final float sumOfSquaredWeights(Searcher searcher) throws IOException {
-    idf = Similarity.idf(term, searcher);
+    idf = searcher.getSimilarity().idf(term, searcher);
     weight = idf * boost;
     return weight * weight;			  // square term weights
   }
@@ -83,14 +83,15 @@ public class TermQuery extends Query {
     weight *= idf;				  // factor from document
   }
 
-  Scorer scorer(IndexReader reader)
+  Scorer scorer(IndexReader reader, Similarity similarity)
        throws IOException {
     TermDocs termDocs = reader.termDocs(term);
 
     if (termDocs == null)
       return null;
     
-    return new TermScorer(termDocs, reader.norms(term.field()), weight);
+    return new TermScorer(termDocs, similarity,
+                          reader.norms(term.field()), weight);
   }
 
   /** Prints a user-readable version of this query. */
diff --git a/src/java/org/apache/lucene/search/TermScorer.java b/src/java/org/apache/lucene/search/TermScorer.java
index 76637131873..7582c66e7ec 100644
--- a/src/java/org/apache/lucene/search/TermScorer.java
+++ b/src/java/org/apache/lucene/search/TermScorer.java
@@ -63,21 +63,23 @@ final class TermScorer extends Scorer {
   private float weight;
   private int doc;
 
-  private final int[] docs = new int[128];	  // buffered doc numbers
-  private final int[] freqs = new int[128];	  // buffered term freqs
+  private final int[] docs = new int[32];	  // buffered doc numbers
+  private final int[] freqs = new int[32];	  // buffered term freqs
   private int pointer;
   private int pointerMax;
 
   private static final int SCORE_CACHE_SIZE = 32;
   private float[] scoreCache = new float[SCORE_CACHE_SIZE];
 
-  TermScorer(TermDocs td, byte[] n, float w) throws IOException {
-    termDocs = td;
-    norms = n;
-    weight = w;
+  TermScorer(TermDocs td, Similarity similarity, byte[] norms, float weight)
+    throws IOException {
+    super(similarity);
+    this.termDocs = td;
+    this.norms = norms;
+    this.weight = weight;
 
     for (int i = 0; i < SCORE_CACHE_SIZE; i++)
-      scoreCache[i] = Similarity.tf(i) * weight;
+      scoreCache[i] = getSimilarity().tf(i) * weight;
 
     pointerMax = termDocs.read(docs, freqs);	  // fill buffers
 
@@ -91,12 +93,13 @@ final class TermScorer extends Scorer {
 
   final void score(HitCollector c, final int end) throws IOException {
     int d = doc;				  // cache doc in local
+    Similarity similarity = getSimilarity();      // cache sim in local
     while (d < end) {				  // for docs in window
       final int f = freqs[pointer];
       float score =				  // compute tf(f)*weight
 	f < SCORE_CACHE_SIZE			  // check cache
 	 ? scoreCache[f]			  // cache hit
-	 : Similarity.tf(f)*weight;		  // cache miss
+	 : similarity.tf(f)*weight;		  // cache miss
 
       score *= Similarity.decodeNorm(norms[d]);	  // normalize for field
 
diff --git a/src/test/org/apache/lucene/index/DocTest.java b/src/test/org/apache/lucene/index/DocTest.java
index b5f9116f992..ffa5ae09cc2 100644
--- a/src/test/org/apache/lucene/index/DocTest.java
+++ b/src/test/org/apache/lucene/index/DocTest.java
@@ -59,6 +59,7 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.search.Similarity;
 import org.apache.lucene.demo.FileDocument;
 
 import java.io.File;
@@ -95,7 +96,8 @@ class DocTest {
        throws Exception {
     Directory directory = FSDirectory.getDirectory("test", false);
     Analyzer analyzer = new SimpleAnalyzer();
-    DocumentWriter writer = new DocumentWriter(directory, analyzer, 1000);
+    DocumentWriter writer =
+      new DocumentWriter(directory, analyzer, Similarity.getDefault(), 1000);
 
     File file = new File(fileName);
     Document doc = FileDocument.Document(file);
diff --git a/src/test/org/apache/lucene/search/TestDocBoost.java b/src/test/org/apache/lucene/search/TestDocBoost.java
index c9e2c1a8d85..7ed8d3b41b2 100644
--- a/src/test/org/apache/lucene/search/TestDocBoost.java
+++ b/src/test/org/apache/lucene/search/TestDocBoost.java
@@ -76,7 +76,7 @@ public class TestDocBoost extends TestCase {
     super(name);
   }
   
-  public static void test() throws Exception {
+  public void testDocBoost() throws Exception {
     RAMDirectory store = new RAMDirectory();
     IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
     
diff --git a/src/test/org/apache/lucene/search/TestSimilarity.java b/src/test/org/apache/lucene/search/TestSimilarity.java
new file mode 100644
index 00000000000..b095def7daa
--- /dev/null
+++ b/src/test/org/apache/lucene/search/TestSimilarity.java
@@ -0,0 +1,161 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import junit.framework.TestCase;
+
+import java.util.Vector;
+
+ /** Similarity unit test.
+  *
+  * @author Doug Cutting
+  * @version $Revision$
+  */
+public class TestSimilarity extends TestCase {
+  public TestSimilarity(String name) {
+    super(name);
+  }
+  
+  public static class SimpleSimilarity extends Similarity {
+    public float lengthNorm(String field, int numTerms) { return 1.0f; }
+    public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
+    public float tf(float freq) { return freq; }
+    public float sloppyFreq(int distance) { return 2.0f; }
+    public float idf(Vector terms, Searcher searcher) { return 1.0f; }
+    public float idf(int docFreq, int numDocs) { return 1.0f; }
+    public float coord(int overlap, int maxOverlap) { return 1.0f; }
+  }
+
+  public void testSimilarity() throws Exception {
+    RAMDirectory store = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
+    writer.setSimilarity(new SimpleSimilarity());
+    
+    Document d1 = new Document();
+    d1.add(Field.Text("field", "a c"));
+
+    Document d2 = new Document();
+    d2.add(Field.Text("field", "a b c"));
+    
+    writer.addDocument(d1);
+    writer.addDocument(d2);
+    writer.optimize();
+    writer.close();
+
+    final float[] scores = new float[4];
+
+    Searcher searcher = new IndexSearcher(store);
+    searcher.setSimilarity(new SimpleSimilarity());
+
+    Term a = new Term("field", "a");
+    Term b = new Term("field", "b");
+    Term c = new Term("field", "c");
+
+    searcher.search
+      (new TermQuery(b),
+       new HitCollector() {
+         public final void collect(int doc, float score) {
+           assertTrue(score == 1.0f);
+         }
+       });
+
+    BooleanQuery bq = new BooleanQuery();
+    bq.add(new TermQuery(a), false, false);
+    bq.add(new TermQuery(b), false, false);
+    //System.out.println(bq.toString("field"));
+    searcher.search
+      (bq,
+       new HitCollector() {
+         public final void collect(int doc, float score) {
+           //System.out.println("Doc=" + doc + " score=" + score);
+           assertTrue(score == (float)doc+1);
+         }
+       });
+
+    PhraseQuery pq = new PhraseQuery();
+    pq.add(a);
+    pq.add(c);
+    //System.out.println(pq.toString("field"));
+    searcher.search
+      (pq,
+       new HitCollector() {
+         public final void collect(int doc, float score) {
+           //System.out.println("Doc=" + doc + " score=" + score);
+           assertTrue(score == 1.0f);
+         }
+       });
+
+    pq.setSlop(2);
+    //System.out.println(pq.toString("field"));
+    searcher.search
+      (pq,
+       new HitCollector() {
+         public final void collect(int doc, float score) {
+           //System.out.println("Doc=" + doc + " score=" + score);
+           assertTrue(score == 2.0f);
+         }
+       });
+  }
+}

score(q,d) =	+ * Σ	+ * {@link #tf(int) tf}(t in d) * + * {@link #idf(Term,Searcher) idf}(t) * + * {@link Field#getBoost getBoost}(t.field in d) * + * {@link #lengthNorm(String,int) lengthNorm}(t.field in d) + *	* + * {@link #coord(int,int) coord}(q,d) * + * {@link #queryNorm(float) queryNorm}(q) + *
	+ * t in q + *