LUCENE-7730: Better accuracy for the length normalization factor.

2017-05-18 16:27:31 +02:00 · 2017-05-18 16:27:31 +02:00 · 06a6034d9b
parent c53d19e7b2
commit 06a6034d9b
45 changed files with 1351 additions and 1313 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -63,6 +63,9 @@ Improvements
 * LUCENE-7489: Better storage of sparse doc-values fields with the default
  codec. (Adrien Grand)

+* LUCENE-7730: More accurate encoding of the length normalization factor
+  thanks to the removal of index-time boosts. (Adrien Grand)
+
 Optimizations

 * LUCENE-7416: BooleanQuery optimizes queries that have queries that occur both
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@ -603,7 +603,7 @@ final class DefaultIndexingChain extends DocConsumer {
      // PerField.invert to allow for later downgrading of the index options:
      fi.setIndexOptions(fieldType.indexOptions());
      
-      fp = new PerField(fi, invert);
+      fp = new PerField(docWriter.getIndexCreatedVersionMajor(), fi, invert);
      fp.next = fieldHash[hashPos];
      fieldHash[hashPos] = fp;
      totalFieldCount++;
@ -633,6 +633,7 @@ final class DefaultIndexingChain extends DocConsumer {
  /** NOTE: not static: accesses at least docState, termsHash. */
  private final class PerField implements Comparable<PerField> {

+    final int indexCreatedVersionMajor;
    final FieldInfo fieldInfo;
    final Similarity similarity;

@ -659,7 +660,8 @@ final class DefaultIndexingChain extends DocConsumer {
    // reused
    TokenStream tokenStream;

-    public PerField(FieldInfo fieldInfo, boolean invert) {
+    public PerField(int indexCreatedVersionMajor, FieldInfo fieldInfo, boolean invert) {
+      this.indexCreatedVersionMajor = indexCreatedVersionMajor;
      this.fieldInfo = fieldInfo;
      similarity = docState.similarity;
      if (invert) {
@ -668,7 +670,7 @@ final class DefaultIndexingChain extends DocConsumer {
    }

    void setInvertState() {
-      invertState = new FieldInvertState(fieldInfo.name);
+      invertState = new FieldInvertState(indexCreatedVersionMajor, fieldInfo.name);
      termsHashPerField = termsHash.addField(invertState, fieldInfo);
      if (fieldInfo.omitsNorms() == false) {
        assert norms == null;
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@ -193,6 +193,10 @@ class DocumentsWriterPerThread {
    return fieldInfos;
  }

+  public int getIndexCreatedVersionMajor() {
+    return indexWriter.segmentInfos.getIndexCreatedVersionMajor();
+  }
+
  final void testPoint(String message) {
    if (enableTestPoints) {
      assert infoStream.isEnabled("TP"); // don't enable unless you need them.
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java
@ -31,7 +31,8 @@ import org.apache.lucene.util.AttributeSource;
 * @lucene.experimental
 */
 public final class FieldInvertState {
-  String name;
+  final int indexCreatedVersionMajor;
+  final String name;
  int position;
  int length;
  int numOverlap;
@ -50,14 +51,15 @@ public final class FieldInvertState {

  /** Creates {code FieldInvertState} for the specified
   *  field name. */
-  public FieldInvertState(String name) {
+  public FieldInvertState(int indexCreatedVersionMajor, String name) {
+    this.indexCreatedVersionMajor = indexCreatedVersionMajor;
    this.name = name;
  }
  
  /** Creates {code FieldInvertState} for the specified
   *  field name and values for all fields. */
-  public FieldInvertState(String name, int position, int length, int numOverlap, int offset) {
-    this.name = name;
+  public FieldInvertState(int indexCreatedVersionMajor, String name, int position, int length, int numOverlap, int offset) {
+    this(indexCreatedVersionMajor, name);
    this.position = position;
    this.length = length;
    this.numOverlap = numOverlap;
@ -164,4 +166,11 @@ public final class FieldInvertState {
  public String getName() {
    return name;
  }
+
+  /**
+   * Return the version that was used to create the index, or 6 if it was created before 7.0.
+   */
+  public int getIndexCreatedVersionMajor() {
+    return indexCreatedVersionMajor;
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
@ -96,20 +96,6 @@ public class BM25Similarity extends Similarity {
    }
  }
  
-  /** The default implementation encodes <code>1 / sqrt(length)</code>
-   * with {@link SmallFloat#floatToByte315(float)}.  This is compatible with 
-   * Lucene's historic implementation: {@link ClassicSimilarity}.  If you
-   * change this, then you should change {@link #decodeNormValue(byte)} to match. */
-  protected byte encodeNormValue(int fieldLength) {
-    return SmallFloat.floatToByte315((float) (1 / Math.sqrt(fieldLength)));
-  }
-
-  /** The default implementation returns <code>1 / f<sup>2</sup></code>
-   * where <code>f</code> is {@link SmallFloat#byte315ToFloat(byte)}. */
-  protected float decodeNormValue(byte b) {
-    return NORM_TABLE[b & 0xFF];
-  }
-  
  /** 
   * True if overlap tokens (tokens with a position of increment of zero) are
   * discounted from the document's length.
@ -132,21 +118,31 @@ public class BM25Similarity extends Similarity {
  }
  
  /** Cache of decoded bytes. */
-  private static final float[] NORM_TABLE = new float[256];
+  private static final float[] OLD_LENGTH_TABLE = new float[256];
+  private static final float[] LENGTH_TABLE = new float[256];

  static {
    for (int i = 1; i < 256; i++) {
      float f = SmallFloat.byte315ToFloat((byte)i);
-      NORM_TABLE[i] = 1.0f / (f*f);
+      OLD_LENGTH_TABLE[i] = 1.0f / (f*f);
+    }
+    OLD_LENGTH_TABLE[0] = 1.0f / OLD_LENGTH_TABLE[255]; // otherwise inf
+
+    for (int i = 0; i < 256; i++) {
+      LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
    }
-    NORM_TABLE[0] = 1.0f / NORM_TABLE[255]; // otherwise inf
  }


  @Override
  public final long computeNorm(FieldInvertState state) {
    final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
-    return encodeNormValue(numTerms);
+    int indexCreatedVersionMajor = state.getIndexCreatedVersionMajor();
+    if (indexCreatedVersionMajor >= 7) {
+      return SmallFloat.intToByte4(numTerms);
+    } else {
+      return SmallFloat.floatToByte315((float) (1 / Math.sqrt(numTerms)));
+    }
  }

  /**
@ -207,34 +203,43 @@ public class BM25Similarity extends Similarity {
  @Override
  public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
    Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
-
    float avgdl = avgFieldLength(collectionStats);

-    // compute freq-independent part of bm25 equation across all norm values
-    float cache[] = new float[256];
+    float[] oldCache = new float[256];
+    float[] cache = new float[256];
    for (int i = 0; i < cache.length; i++) {
-      cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
+      oldCache[i] = k1 * ((1 - b) + b * OLD_LENGTH_TABLE[i] / avgdl);
+      cache[i] = k1 * ((1 - b) + b * LENGTH_TABLE[i] / avgdl);
    }
-    return new BM25Stats(collectionStats.field(), boost, idf, avgdl, cache);
+    return new BM25Stats(collectionStats.field(), boost, idf, avgdl, oldCache, cache);
  }

  @Override
  public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
    BM25Stats bm25stats = (BM25Stats) stats;
-    return new BM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field));
+    return new BM25DocScorer(bm25stats, context.reader().getMetaData().getCreatedVersionMajor(), context.reader().getNormValues(bm25stats.field));
  }
  
  private class BM25DocScorer extends SimScorer {
    private final BM25Stats stats;
    private final float weightValue; // boost * idf * (k1 + 1)
    private final NumericDocValues norms;
+    /** precomputed cache for all length values */
+    private final float[] lengthCache;
+    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
    private final float[] cache;
    
-    BM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
+    BM25DocScorer(BM25Stats stats, int indexCreatedVersionMajor, NumericDocValues norms) throws IOException {
      this.stats = stats;
      this.weightValue = stats.weight * (k1 + 1);
-      this.cache = stats.cache;
      this.norms = norms;
+      if (indexCreatedVersionMajor >= 7) {
+        lengthCache = LENGTH_TABLE;
+        cache = stats.cache;
+      } else {
+        lengthCache = OLD_LENGTH_TABLE;
+        cache = stats.oldCache;
+      }
    }
    
    @Override
@ -245,7 +250,7 @@ public class BM25Similarity extends Similarity {
        norm = k1;
      } else {
        if (norms.advanceExact(doc)) {
-          norm = cache[(byte)norms.longValue() & 0xFF];
+          norm = cache[((byte) norms.longValue()) & 0xFF];
        } else {
          norm = cache[0];
        }
@ -255,7 +260,7 @@ public class BM25Similarity extends Similarity {
    
    @Override
    public Explanation explain(int doc, Explanation freq) throws IOException {
-      return explainScore(doc, freq, stats, norms);
+      return explainScore(doc, freq, stats, norms, lengthCache);
    }

    @Override
@ -281,21 +286,23 @@ public class BM25Similarity extends Similarity {
    private final float weight;
    /** field name, for pulling norms */
    private final String field;
-    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
-    private final float cache[];
+    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl)
+     *  for both OLD_LENGTH_TABLE and LENGTH_TABLE */
+    private final float[] oldCache, cache;

-    BM25Stats(String field, float boost, Explanation idf, float avgdl, float cache[]) {
+    BM25Stats(String field, float boost, Explanation idf, float avgdl, float[] oldCache, float[] cache) {
      this.field = field;
      this.boost = boost;
      this.idf = idf;
      this.avgdl = avgdl;
-      this.cache = cache;
      this.weight = idf.getValue() * boost;
+      this.oldCache = oldCache;
+      this.cache = cache;
    }

  }

-  private Explanation explainTFNorm(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) throws IOException {
+  private Explanation explainTFNorm(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms, float[] lengthCache) throws IOException {
    List<Explanation> subs = new ArrayList<>();
    subs.add(freq);
    subs.add(Explanation.match(k1, "parameter k1"));
@ -311,7 +318,7 @@ public class BM25Similarity extends Similarity {
      } else {
        norm = 0;
      }
-      float doclen = decodeNormValue(norm);
+      float doclen = lengthCache[norm & 0xff];
      subs.add(Explanation.match(b, "parameter b"));
      subs.add(Explanation.match(stats.avgdl, "avgFieldLength"));
      subs.add(Explanation.match(doclen, "fieldLength"));
@ -321,13 +328,13 @@ public class BM25Similarity extends Similarity {
    }
  }

-  private Explanation explainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) throws IOException {
+  private Explanation explainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms, float[] lengthCache) throws IOException {
    Explanation boostExpl = Explanation.match(stats.boost, "boost");
    List<Explanation> subs = new ArrayList<>();
    if (boostExpl.getValue() != 1.0f)
      subs.add(boostExpl);
    subs.add(stats.idf);
-    Explanation tfNormExpl = explainTFNorm(doc, freq, stats, norms);
+    Explanation tfNormExpl = explainTFNorm(doc, freq, stats, norms, lengthCache);
    subs.add(tfNormExpl);
    return Explanation.match(
        boostExpl.getValue() * stats.idf.getValue() * tfNormExpl.getValue(),
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java
@ -17,91 +17,27 @@
 package org.apache.lucene.search.similarities;


-import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.SmallFloat;

 /**
- * Expert: Default scoring implementation which {@link #encodeNormValue(float)
- * encodes} norm values as a single byte before being stored. At search time,
- * the norm byte value is read from the index
- * {@link org.apache.lucene.store.Directory directory} and
- * {@link #decodeNormValue(long) decoded} back to a float <i>norm</i> value.
- * This encoding/decoding, while reducing index size, comes with the price of
- * precision loss - it is not guaranteed that <i>decode(encode(x)) = x</i>. For
- * instance, <i>decode(encode(0.89)) = 0.875</i>.
- * <p>
- * Compression of norm values to a single byte saves memory at search time,
- * because once a field is referenced at search time, its norms - for all
- * documents - are maintained in memory.
- * <p>
- * The rationale supporting such lossy compression of norm values is that given
- * the difficulty (and inaccuracy) of users to express their true information
- * need by a query, only big differences matter. <br>
- * &nbsp;<br>
- * Last, note that search time is too late to modify this <i>norm</i> part of
- * scoring, e.g. by using a different {@link Similarity} for search.
+ * Expert: Historical scoring implementation. You might want to consider using
+ * {@link BM25Similarity} instead, which is generally considered superior to
+ * TF-IDF.
 */
 public class ClassicSimilarity extends TFIDFSimilarity {
-  
-  /** Cache of decoded bytes. */
-  private static final float[] NORM_TABLE = new float[256];
-
-  static {
-    for (int i = 0; i < 256; i++) {
-      NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
-    }
-  }

  /** Sole constructor: parameter-free */
  public ClassicSimilarity() {}
-  
-  /**
-   * Encodes a normalization factor for storage in an index.
-   * <p>
-   * The encoding uses a three-bit mantissa, a five-bit exponent, and the
-   * zero-exponent point at 15, thus representing values from around 7x10^9 to
-   * 2x10^-9 with about one significant decimal digit of accuracy. Zero is also
-   * represented. Negative numbers are rounded up to zero. Values too large to
-   * represent are rounded down to the largest representable value. Positive
-   * values too small to represent are rounded up to the smallest positive
-   * representable value.
-   *
-   * @see org.apache.lucene.util.SmallFloat
-   */
-  @Override
-  public final long encodeNormValue(float f) {
-    return SmallFloat.floatToByte315(f);
-  }
-
-  /**
-   * Decodes the norm value, assuming it is a single byte.
-   * 
-   * @see #encodeNormValue(float)
-   */
-  @Override
-  public final float decodeNormValue(long norm) {
-    return NORM_TABLE[(int) (norm & 0xFF)];  // & 0xFF maps negative bytes to positive above 127
-  }

  /** Implemented as
-   *  <code>state.getBoost()*lengthNorm(numTerms)</code>, where
-   *  <code>numTerms</code> is {@link FieldInvertState#getLength()} if {@link
-   *  #setDiscountOverlaps} is false, else it's {@link
-   *  FieldInvertState#getLength()} - {@link
-   *  FieldInvertState#getNumOverlap()}.
+   *  <code>1/sqrt(length)</code>.
   *
   *  @lucene.experimental */
  @Override
-  public float lengthNorm(FieldInvertState state) {
-    final int numTerms;
-    if (discountOverlaps)
-      numTerms = state.getLength() - state.getNumOverlap();
-    else
-      numTerms = state.getLength();
+  public float lengthNorm(int numTerms) {
    return (float) (1.0 / Math.sqrt(numTerms));
  }

@ -138,33 +74,6 @@ public class ClassicSimilarity extends TFIDFSimilarity {
  public float idf(long docFreq, long docCount) {
    return (float)(Math.log((docCount+1)/(double)(docFreq+1)) + 1.0);
  }
-    
-  /** 
-   * True if overlap tokens (tokens with a position of increment of zero) are
-   * discounted from the document's length.
-   */
-  protected boolean discountOverlaps = true;
-
-  /** Determines whether overlap tokens (Tokens with
-   *  0 position increment) are ignored when computing
-   *  norm.  By default this is true, meaning overlap
-   *  tokens do not count when computing norms.
-   *
-   *  @lucene.experimental
-   *
-   *  @see #computeNorm
-   */
-  public void setDiscountOverlaps(boolean v) {
-    discountOverlaps = v;
-  }
-
-  /**
-   * Returns true if overlap tokens are discounted from the document's length. 
-   * @see #setDiscountOverlaps 
-   */
-  public boolean getDiscountOverlaps() {
-    return discountOverlaps;
-  }

  @Override
  public String toString() {
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
@ -190,7 +190,8 @@ public abstract class SimilarityBase extends Similarity {
  }
  
  @Override
-  public SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
+  public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
+    int indexCreatedVersionMajor = context.reader().getMetaData().getCreatedVersionMajor();
    if (stats instanceof MultiSimilarity.MultiStats) {
      // a multi term query (e.g. phrase). return the summation, 
      // scoring almost as if it were boolean query
@ -198,12 +199,12 @@ public abstract class SimilarityBase extends Similarity {
      SimScorer subScorers[] = new SimScorer[subStats.length];
      for (int i = 0; i < subScorers.length; i++) {
        BasicStats basicstats = (BasicStats) subStats[i];
-        subScorers[i] = new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
+        subScorers[i] = new BasicSimScorer(basicstats, indexCreatedVersionMajor, context.reader().getNormValues(basicstats.field));
      }
      return new MultiSimilarity.MultiSimScorer(subScorers);
    } else {
      BasicStats basicstats = (BasicStats) stats;
-      return new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
+      return new BasicSimScorer(basicstats, indexCreatedVersionMajor, context.reader().getNormValues(basicstats.field));
    }
  }
  
@ -216,40 +217,38 @@ public abstract class SimilarityBase extends Similarity {

  // ------------------------------ Norm handling ------------------------------
  
-  /** Norm to document length map. */
-  private static final float[] NORM_TABLE = new float[256];
+  /** Cache of decoded bytes. */
+  private static final float[] OLD_LENGTH_TABLE = new float[256];
+  private static final float[] LENGTH_TABLE = new float[256];

  static {
    for (int i = 1; i < 256; i++) {
-      float floatNorm = SmallFloat.byte315ToFloat((byte)i);
-      NORM_TABLE[i] = 1.0f / (floatNorm * floatNorm);
+      float f = SmallFloat.byte315ToFloat((byte)i);
+      OLD_LENGTH_TABLE[i] = 1.0f / (f*f);
+    }
+    OLD_LENGTH_TABLE[0] = 1.0f / OLD_LENGTH_TABLE[255]; // otherwise inf
+
+    for (int i = 0; i < 256; i++) {
+      LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
    }
-    NORM_TABLE[0] = 1.0f / NORM_TABLE[255]; // otherwise inf
  }

-  /** Encodes the document length in the same way as {@link TFIDFSimilarity}. */
+  /** Encodes the document length in the same way as {@link BM25Similarity}. */
  @Override
-  public long computeNorm(FieldInvertState state) {
-    final float numTerms;
+  public final long computeNorm(FieldInvertState state) {
+    final int numTerms;
    if (discountOverlaps)
      numTerms = state.getLength() - state.getNumOverlap();
    else
      numTerms = state.getLength();
-    return encodeNormValue(numTerms);
+    int indexCreatedVersionMajor = state.getIndexCreatedVersionMajor();
+    if (indexCreatedVersionMajor >= 7) {
+      return SmallFloat.intToByte4(numTerms);
+    } else {
+      return SmallFloat.floatToByte315((float) (1 / Math.sqrt(numTerms)));
+    }
  }
-  
-  /** Decodes a normalization factor (document length) stored in an index.
-   * @see #encodeNormValue(float)
-   */
-  protected float decodeNormValue(byte norm) {
-    return NORM_TABLE[norm & 0xFF];  // & 0xFF maps negative bytes to positive above 127
-  }
-  
-  /** Encodes the length to a byte via SmallFloat. */
-  protected byte encodeNormValue(float length) {
-    return SmallFloat.floatToByte315((float) (1 / Math.sqrt(length)));
-  }
-  
+
  // ----------------------------- Static methods ------------------------------
  
  /** Returns the base two logarithm of {@code x}. */
@ -266,35 +265,37 @@ public abstract class SimilarityBase extends Similarity {
   * {@link SimilarityBase#explain(BasicStats, int, Explanation, float)},
   * respectively.
   */
-  private class BasicSimScorer extends SimScorer {
+  final class BasicSimScorer extends SimScorer {
    private final BasicStats stats;
    private final NumericDocValues norms;
+    private final float[] normCache;
    
-    BasicSimScorer(BasicStats stats, NumericDocValues norms) throws IOException {
+    BasicSimScorer(BasicStats stats, int indexCreatedVersionMajor, NumericDocValues norms) throws IOException {
      this.stats = stats;
      this.norms = norms;
+      this.normCache = indexCreatedVersionMajor >= 7 ? LENGTH_TABLE : OLD_LENGTH_TABLE;
    }

-    private float getNormValue(int doc) throws IOException {
+    float getLengthValue(int doc) throws IOException {
      if (norms == null) {
        return 1F;
      }
      if (norms.advanceExact(doc)) {
-        return decodeNormValue((byte) norms.longValue());
+        return normCache[Byte.toUnsignedInt((byte) norms.longValue())];
      } else {
-        return decodeNormValue((byte) 0);
+        return 0;
      }
    }
    
    @Override
    public float score(int doc, float freq) throws IOException {
      // We have to supply something in case norms are omitted
-      return SimilarityBase.this.score(stats, freq, getNormValue(doc));
+      return SimilarityBase.this.score(stats, freq, getLengthValue(doc));
    }

    @Override
    public Explanation explain(int doc, Explanation freq) throws IOException {
-      return SimilarityBase.this.explain(stats, doc, freq, getNormValue(doc));
+      return SimilarityBase.this.explain(stats, doc, freq, getLengthValue(doc));
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
@ -30,6 +30,7 @@ import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.SmallFloat;


 /**
@ -233,11 +234,6 @@ import org.apache.lucene.util.BytesRef;
 *   And this is exactly what normalizing the query vector <i>V(q)</i>
 *   provides: comparability (to a certain extent) of two or more queries.
 *   </li>
- *
- *   <li>Applying query normalization on the scores helps to keep the
- *   scores around the unit vector, hence preventing loss of score data
- *   because of floating point precision limitations.
- *   </li>
 *  </ul>
 *  </li>
 *
@ -379,13 +375,49 @@ import org.apache.lucene.util.BytesRef;
 * @see IndexSearcher#setSimilarity(Similarity)
 */
 public abstract class TFIDFSimilarity extends Similarity {
-  
+
+  /** Cache of decoded bytes. */
+  static final float[] OLD_NORM_TABLE = new float[256];
+
+  static {
+    for (int i = 0; i < 256; i++) {
+      OLD_NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
+    }
+  }
+
  /**
   * Sole constructor. (For invocation by subclass 
   * constructors, typically implicit.)
   */
  public TFIDFSimilarity() {}
-  
+
+  /** 
+   * True if overlap tokens (tokens with a position of increment of zero) are
+   * discounted from the document's length.
+   */
+  protected boolean discountOverlaps = true;
+
+  /** Determines whether overlap tokens (Tokens with
+   *  0 position increment) are ignored when computing
+   *  norm.  By default this is true, meaning overlap
+   *  tokens do not count when computing norms.
+   *
+   *  @lucene.experimental
+   *
+   *  @see #computeNorm
+   */
+  public void setDiscountOverlaps(boolean v) {
+    discountOverlaps = v;
+  }
+
+  /**
+   * Returns true if overlap tokens are discounted from the document's length. 
+   * @see #setDiscountOverlaps 
+   */
+  public boolean getDiscountOverlaps() {
+    return discountOverlaps;
+  }
+
  /** Computes a score factor based on a term or phrase's frequency in a
   * document.  This value is multiplied by the {@link #idf(long, long)}
   * factor for each term in the query and these products are then summed to
@ -471,30 +503,25 @@ public abstract class TFIDFSimilarity extends Similarity {

  /**
   * Compute an index-time normalization value for this field instance.
-   * <p>
-   * This value will be stored in a single byte lossy representation by 
-   * {@link #encodeNormValue(float)}.
   * 
-   * @param state statistics of the current field (such as length, boost, etc)
-   * @return an index-time normalization value
+   * @param length the number of terms in the field, optionally {@link #setDiscountOverlaps(boolean) discounting overlaps}
+   * @return a length normalization value
   */
-  public abstract float lengthNorm(FieldInvertState state);
+  public abstract float lengthNorm(int length);
  
  @Override
  public final long computeNorm(FieldInvertState state) {
-    float normValue = lengthNorm(state);
-    return encodeNormValue(normValue);
+    final int numTerms;
+    if (discountOverlaps)
+      numTerms = state.getLength() - state.getNumOverlap();
+    else
+      numTerms = state.getLength();
+    if (state.getIndexCreatedVersionMajor() >= 7) {
+      return SmallFloat.intToByte4(numTerms);
+    } else {
+      return SmallFloat.floatToByte315(lengthNorm(numTerms));
+    }
  }
-  
-  /**
-   * Decodes a normalization factor stored in an index.
-   * 
-   * @see #encodeNormValue(float)
-   */
-  public abstract float decodeNormValue(long norm);
-
-  /** Encodes a normalization factor for storage in an index. */
-  public abstract long encodeNormValue(float f);
 
  /** Computes the amount of a sloppy phrase match, based on an edit distance.
   * This value is summed for each sloppy phrase match in a document to form
@ -529,24 +556,41 @@ public abstract class TFIDFSimilarity extends Similarity {
    final Explanation idf = termStats.length == 1
    ? idfExplain(collectionStats, termStats[0])
    : idfExplain(collectionStats, termStats);
-    return new IDFStats(collectionStats.field(), boost, idf);
+    float[] normTable = new float[256];
+    for (int i = 1; i < 256; ++i) {
+      int length = SmallFloat.byte4ToInt((byte) i);
+      float norm = lengthNorm(length);
+      normTable[i] = norm;
+    }
+    normTable[0] = 1f / normTable[255];
+    return new IDFStats(collectionStats.field(), boost, idf, normTable);
  }

  @Override
  public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
    IDFStats idfstats = (IDFStats) stats;
-    return new TFIDFSimScorer(idfstats, context.reader().getNormValues(idfstats.field));
+    final float[] normTable;
+    if (context.reader().getMetaData().getCreatedVersionMajor() >= 7) {
+      // the norms only encode the length, we need a translation table that depends on how lengthNorm is implemented
+      normTable = idfstats.normTable;
+    } else {
+      // the norm is directly encoded in the index
+      normTable = OLD_NORM_TABLE;
+    }
+    return new TFIDFSimScorer(idfstats, context.reader().getNormValues(idfstats.field), normTable);
  }
  
  private final class TFIDFSimScorer extends SimScorer {
    private final IDFStats stats;
    private final float weightValue;
    private final NumericDocValues norms;
+    private final float[] normTable;
    
-    TFIDFSimScorer(IDFStats stats, NumericDocValues norms) throws IOException {
+    TFIDFSimScorer(IDFStats stats, NumericDocValues norms, float[] normTable) throws IOException {
      this.stats = stats;
      this.weightValue = stats.queryWeight;
      this.norms = norms;
+      this.normTable = normTable;
    }
    
    @Override
@ -556,13 +600,13 @@ public abstract class TFIDFSimilarity extends Similarity {
      if (norms == null) {
        return raw;
      } else {
-        long normValue;
+        float normValue;
        if (norms.advanceExact(doc)) {
-          normValue = norms.longValue();
+          normValue = normTable[(int) (norms.longValue() & 0xFF)];
        } else {
          normValue = 0;
        }
-        return raw * decodeNormValue(normValue);  // normalize for field
+        return raw * normValue;  // normalize for field
      }
    }
    
@ -578,35 +622,39 @@ public abstract class TFIDFSimilarity extends Similarity {

    @Override
    public Explanation explain(int doc, Explanation freq) throws IOException {
-      return explainScore(doc, freq, stats, norms);
+      return explainScore(doc, freq, stats, norms, normTable);
    }
  }
  
  /** Collection statistics for the TF-IDF model. The only statistic of interest
   * to this model is idf. */
-  private static class IDFStats extends SimWeight {
+  static class IDFStats extends SimWeight {
    private final String field;
    /** The idf and its explanation */
    private final Explanation idf;
    private final float boost;
    private final float queryWeight;
+    final float[] normTable;
    
-    public IDFStats(String field, float boost, Explanation idf) {
+    public IDFStats(String field, float boost, Explanation idf, float[] normTable) {
      // TODO: Validate?
      this.field = field;
      this.idf = idf;
      this.boost = boost;
      this.queryWeight = boost * idf.getValue();
+      this.normTable = normTable;
    }
  }  

-  private Explanation explainField(int doc, Explanation freq, IDFStats stats, NumericDocValues norms) throws IOException {
+  private Explanation explainField(int doc, Explanation freq, IDFStats stats, NumericDocValues norms, float[] normTable) throws IOException {
    Explanation tfExplanation = Explanation.match(tf(freq.getValue()), "tf(freq="+freq.getValue()+"), with freq of:", freq);
    float norm;
-    if (norms != null && norms.advanceExact(doc)) {
-      norm = decodeNormValue(norms.longValue());
-    } else {
+    if (norms == null) {
      norm = 1f;
+    } else if (norms.advanceExact(doc) == false) {
+      norm = 0f;
+    } else {
+      norm = normTable[(int) (norms.longValue() & 0xFF)];
    }
    
    Explanation fieldNormExpl = Explanation.match(
@ -619,9 +667,9 @@ public abstract class TFIDFSimilarity extends Similarity {
        tfExplanation, stats.idf, fieldNormExpl);
  }

-  private Explanation explainScore(int doc, Explanation freq, IDFStats stats, NumericDocValues norms) throws IOException {
+  private Explanation explainScore(int doc, Explanation freq, IDFStats stats, NumericDocValues norms, float[] normTable) throws IOException {
    Explanation queryExpl = Explanation.match(stats.boost, "boost");
-    Explanation fieldExpl = explainField(doc, freq, stats, norms);
+    Explanation fieldExpl = explainField(doc, freq, stats, norms, normTable);
    if (stats.boost == 1f) {
      return fieldExpl;
    }
--- a/lucene/core/src/java/org/apache/lucene/util/SmallFloat.java
+++ b/lucene/core/src/java/org/apache/lucene/util/SmallFloat.java
@ -97,31 +97,74 @@ public class SmallFloat {
    return Float.intBitsToFloat(bits);
  }

-
-  /** floatToByte(b, mantissaBits=5, zeroExponent=2)
-   * <br>smallest nonzero value = 0.033203125
-   * <br>largest value = 1984.0
-   * <br>epsilon = 0.03125
-   */
-  public static byte floatToByte52(float f) {
-    int bits = Float.floatToRawIntBits(f);
-    int smallfloat = bits >> (24-5);
-    if (smallfloat <= (63-2)<<5) {
-      return (bits<=0) ? (byte)0 : (byte)1;
+  /** Float-like encoding for positive longs that preserves ordering and 4 significant bits. */
+  public static int longToInt4(long i) {
+    if (i < 0) {
+      throw new IllegalArgumentException("Only supports positive values, got " + i);
    }
-    if (smallfloat >= ((63-2)<<5) + 0x100) {
-      return -1;
+    int numBits = 64 - Long.numberOfLeadingZeros(i);
+    if (numBits < 4) {
+      // subnormal value
+      return Math.toIntExact(i);
+    } else {
+      // normal value
+      int shift = numBits - 4;
+      // only keep the 5 most significant bits
+      int encoded = Math.toIntExact(i >>> shift);
+      // clear the most significant bit, which is implicit
+      encoded &= 0x07;
+      // encode the shift, adding 1 because 0 is reserved for subnormal values
+      encoded |= (shift + 1) << 3;
+      return encoded;
    }
-    return (byte)(smallfloat - ((63-2)<<5));
  }

-  /** byteToFloat(b, mantissaBits=5, zeroExponent=2) */
-  public static float byte52ToFloat(byte b) {
-    // on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
-    // is only a little bit faster (anywhere from 0% to 7%)
-    if (b == 0) return 0.0f;
-    int bits = (b&0xff) << (24-5);
-    bits += (63-2) << 24;
-    return Float.intBitsToFloat(bits);
+  /**
+   * Decode values encoded with {@link #longToInt4(long)}.
+   */
+  public static final long int4ToLong(int i) {
+    long bits = i & 0x07;
+    int shift = (i >>> 3) - 1;
+    long decoded;
+    if (shift == -1) {
+      // subnormal value
+      decoded = bits;
+    } else {
+      // normal value
+      decoded = (bits | 0x08) << shift;
+    }
+    return decoded;
+  }
+
+  private static final int MAX_INT4 = longToInt4(Integer.MAX_VALUE);
+  private static final int NUM_FREE_VALUES = 255 - MAX_INT4;
+
+  /**
+   * Encode an integer to a byte. It is built upon {@link #longToInt4(long)}
+   * and leverages the fact that {@code longToInt4(Integer.MAX_VALUE)} is
+   * less than 255 to encode low values more accurately.
+   */
+  public static byte intToByte4(int i) {
+    if (i < 0) {
+      throw new IllegalArgumentException("Only supports positive values, got " + i);
+    }
+    if (i < NUM_FREE_VALUES) {
+      return (byte) i;
+    } else {
+      return (byte) (NUM_FREE_VALUES + longToInt4(i - NUM_FREE_VALUES));
+    }
+  }
+
+  /**
+   * Decode values that have been encoded with {@link #intToByte4(int)}.
+   */
+  public static int byte4ToInt(byte b) {
+    int i = Byte.toUnsignedInt(b);
+    if (i < NUM_FREE_VALUES) {
+      return i;
+    } else {
+      long decoded = NUM_FREE_VALUES + int4ToLong(i - NUM_FREE_VALUES);
+      return Math.toIntExact(decoded);
+    }
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@ -2441,7 +2441,7 @@ public class TestIndexSorting extends LuceneTestCase {
        assertTrue(sparseValues.advanceExact(docID));
        assertTrue(sparseBinaryValues.advanceExact(docID));
        assertTrue(normsValues.advanceExact(docID));
-        assertEquals(124, normsValues.longValue());
+        assertEquals(1, normsValues.longValue());
        assertEquals(127-docID, (int) sparseValues.longValue());
        assertEquals(new BytesRef(Integer.toString(127-docID)), sparseBinaryValues.binaryValue());
      } else {
--- a/lucene/core/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
@ -17,6 +17,7 @@
 package org.apache.lucene.index;


+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@ -26,7 +27,9 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.search.similarities.TFIDFSimilarity;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
@ -35,12 +38,12 @@ import org.apache.lucene.util.TestUtil;
 /**
 * Tests the maxTermFrequency statistic in FieldInvertState
 */
-public class TestMaxTermFrequency extends LuceneTestCase { 
+public class TestMaxTermFrequency extends LuceneTestCase {
  Directory dir;
  IndexReader reader;
  /* expected maxTermFrequency values for our documents */
  ArrayList<Integer> expected = new ArrayList<>();
-  
+
  @Override
  public void setUp() throws Exception {
    super.setUp();
@ -59,14 +62,14 @@ public class TestMaxTermFrequency extends LuceneTestCase {
    reader = writer.getReader();
    writer.close();
  }
-  
+
  @Override
  public void tearDown() throws Exception {
    reader.close();
    dir.close();
    super.tearDown();
  }
-  
+
  public void test() throws Exception {
    NumericDocValues fooNorms = MultiDocValues.getNormValues(reader, "foo");
    for (int i = 0; i < reader.maxDoc(); i++) {
@ -95,30 +98,42 @@ public class TestMaxTermFrequency extends LuceneTestCase {
    Collections.shuffle(terms, random());
    return Arrays.toString(terms.toArray(new String[terms.size()]));
  }
-  
+
  /**
   * Simple similarity that encodes maxTermFrequency directly as a byte
   */
-  static class TestSimilarity extends TFIDFSimilarity {
+  static class TestSimilarity extends Similarity {

    @Override
-    public float lengthNorm(FieldInvertState state) {
+    public long computeNorm(FieldInvertState state) {
      return state.getMaxTermFrequency();
    }

    @Override
-    public long encodeNormValue(float f) {
-      return (byte) f;
+    public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+      return new SimWeight() {};
    }

    @Override
-    public float decodeNormValue(long norm) {
-      return norm;
+    public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
+      return new SimScorer() {
+
+        @Override
+        public float score(int doc, float freq) throws IOException {
+          return 0;
+        }
+
+        @Override
+        public float computeSlopFactor(int distance) {
+          return 0;
+        }
+
+        @Override
+        public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+          return 0;
+        }
+      };
    }

-    @Override public float tf(float freq) { return 0; }
-    @Override public float idf(long docFreq, long docCount) { return 0; }
-    @Override public float sloppyFreq(int distance) { return 0; }
-    @Override public float scorePayload(int doc, int start, int end, BytesRef payload) { return 0; }
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
@ -32,13 +32,11 @@ import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
 import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
-import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;

 /**
@ -49,67 +47,6 @@ import org.apache.lucene.util.TestUtil;
@Slow
 public class TestNorms extends LuceneTestCase {
  static final String BYTE_TEST_FIELD = "normsTestByte";
-
-  static class CustomNormEncodingSimilarity extends TFIDFSimilarity {
-
-    @Override
-    public long encodeNormValue(float f) {
-      return (long) f;
-    }
-    
-    @Override
-    public float decodeNormValue(long norm) {
-      return norm;
-    }
-
-    @Override
-    public float lengthNorm(FieldInvertState state) {
-      return state.getLength();
-    }
-
-    @Override public float tf(float freq) { return 0; }
-    @Override public float idf(long docFreq, long docCount) { return 0; }
-    @Override public float sloppyFreq(int distance) { return 0; }
-    @Override public float scorePayload(int doc, int start, int end, BytesRef payload) { return 0; }
-  }
-  
-  // LUCENE-1260
-  public void testCustomEncoder() throws Exception {
-    Directory dir = newDirectory();
-    MockAnalyzer analyzer = new MockAnalyzer(random());
-
-    IndexWriterConfig config = newIndexWriterConfig(analyzer);
-    config.setSimilarity(new CustomNormEncodingSimilarity());
-    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
-    Document doc = new Document();
-    Field foo = newTextField("foo", "", Field.Store.NO);
-    Field bar = newTextField("bar", "", Field.Store.NO);
-    doc.add(foo);
-    doc.add(bar);
-    
-    for (int i = 0; i < 100; i++) {
-      bar.setStringValue("singleton");
-      writer.addDocument(doc);
-    }
-    
-    IndexReader reader = writer.getReader();
-    writer.close();
-    
-    NumericDocValues fooNorms = MultiDocValues.getNormValues(reader, "foo");
-    for (int i = 0; i < reader.maxDoc(); i++) {
-      assertEquals(i, fooNorms.nextDoc());
-      assertEquals(0, fooNorms.longValue());
-    }
-    
-    NumericDocValues barNorms = MultiDocValues.getNormValues(reader, "bar");
-    for (int i = 0; i < reader.maxDoc(); i++) {
-      assertEquals(i, barNorms.nextDoc());
-      assertEquals(1, barNorms.longValue());
-    }
-    
-    reader.close();
-    dir.close();
-  }
  
  public void testMaxByteNorms() throws IOException {
    Directory dir = newFSDirectory(createTempDir("TestNorms.testMaxByteNorms"));
--- a/lucene/core/src/test/org/apache/lucene/index/TestOmitTf.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestOmitTf.java
@ -44,9 +44,7 @@ import org.apache.lucene.util.LuceneTestCase;
 public class TestOmitTf extends LuceneTestCase {
  
  public static class SimpleSimilarity extends TFIDFSimilarity {
-    @Override public float decodeNormValue(long norm) { return norm; }
-    @Override public long encodeNormValue(float f) { return (long) f; }
-    @Override public float lengthNorm(FieldInvertState state) { return 1; }
+    @Override public float lengthNorm(int length) { return 1; }
    @Override public float tf(float freq) { return freq; }
    @Override public float sloppyFreq(int distance) { return 2.0f; }
    @Override public float idf(long docFreq, long docCount) { return 1.0f; }
--- a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
@ -30,7 +30,6 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@ -72,7 +71,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
    }
    
    @Override
-    public float lengthNorm(FieldInvertState state) {
+    public float lengthNorm(int length) {
      // Disable length norm
      return 1;
    }
--- a/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java
@ -33,6 +33,7 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.FieldValueHitQueue.Entry;
+import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@ -63,7 +64,7 @@ public class TestElevationComparator extends LuceneTestCase {
    writer.close();

    IndexSearcher searcher = newSearcher(r);
-    searcher.setSimilarity(new ClassicSimilarity());
+    searcher.setSimilarity(new BM25Similarity());

    runTest(searcher, true);
    runTest(searcher, false);
@ -98,11 +99,11 @@ public class TestElevationComparator extends LuceneTestCase {
    assertEquals(3, topDocs.scoreDocs[1].doc);

    if (reversed) {
-      assertEquals(2, topDocs.scoreDocs[2].doc);
-      assertEquals(1, topDocs.scoreDocs[3].doc);
-    } else {
      assertEquals(1, topDocs.scoreDocs[2].doc);
      assertEquals(2, topDocs.scoreDocs[3].doc);
+    } else {
+      assertEquals(2, topDocs.scoreDocs[2].doc);
+      assertEquals(1, topDocs.scoreDocs[3].doc);
    }

    /*
--- a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@ -309,7 +310,7 @@ public class TestPhraseQuery extends LuceneTestCase {
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
        newIndexWriterConfig(new MockAnalyzer(random()))
          .setMergePolicy(newLogMergePolicy())
-          .setSimilarity(new ClassicSimilarity()));
+          .setSimilarity(new BM25Similarity()));

    Document doc = new Document();
    doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES));
@ -335,9 +336,9 @@ public class TestPhraseQuery extends LuceneTestCase {
    // each other get a higher score:
    assertEquals(1.0, hits[0].score, 0.01);
    assertEquals(0, hits[0].doc);
-    assertEquals(0.62, hits[1].score, 0.01);
+    assertEquals(0.63, hits[1].score, 0.01);
    assertEquals(1, hits[1].doc);
-    assertEquals(0.43, hits[2].score, 0.01);
+    assertEquals(0.47, hits[2].score, 0.01);
    assertEquals(2, hits[2].doc);
    QueryUtils.check(random(), query,searcher);
    reader.close();
--- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java
@ -26,6 +26,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
@ -49,9 +50,14 @@ public class TestQueryRescorer extends LuceneTestCase {
    return searcher;
  }

+  public static IndexWriterConfig newIndexWriterConfig() {
+    // We rely on more tokens = lower score:
+    return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity());
+  }
+
  public void testBasic() throws Exception {
    Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());

    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
@ -106,7 +112,7 @@ public class TestQueryRescorer extends LuceneTestCase {
  // Test LUCENE-5682
  public void testNullScorerTermQuery() throws Exception {
    Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());

    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
@ -145,7 +151,7 @@ public class TestQueryRescorer extends LuceneTestCase {

  public void testCustomCombine() throws Exception {
    Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());

    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
@ -196,7 +202,7 @@ public class TestQueryRescorer extends LuceneTestCase {

  public void testExplain() throws Exception {
    Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());

    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
@ -271,7 +277,7 @@ public class TestQueryRescorer extends LuceneTestCase {

  public void testMissingSecondPassScore() throws Exception {
    Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());

    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
@ -325,7 +331,7 @@ public class TestQueryRescorer extends LuceneTestCase {
  public void testRandom() throws Exception {
    Directory dir = newDirectory();
    int numDocs = atLeast(1000);
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());

    final int[] idToNum = new int[numDocs];
    int maxValue = TestUtil.nextInt(random(), 10, 1000000);
--- a/lucene/core/src/test/org/apache/lucene/search/TestSimilarity.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSimilarity.java
@ -17,20 +17,18 @@
 package org.apache.lucene.search;


-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.util.LuceneTestCase;
-
 import java.io.IOException;

-import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
+import org.apache.lucene.util.LuceneTestCase;

 /** Similarity unit test.
 *
@ -39,7 +37,7 @@ import org.apache.lucene.document.Document;
 public class TestSimilarity extends LuceneTestCase {
  
  public static class SimpleSimilarity extends ClassicSimilarity {
-    @Override public float lengthNorm(FieldInvertState state) { return 1; }
+    @Override public float lengthNorm(int length) { return 1; }
    @Override public float tf(float freq) { return freq; }
    @Override public float sloppyFreq(int distance) { return 2.0f; }
    @Override public float idf(long docFreq, long docCount) { return 1.0f; }
--- a/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java
@ -17,19 +17,21 @@
 package org.apache.lucene.search;


+import java.io.IOException;
+
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
 import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
@ -38,7 +40,7 @@ public class TestSimilarityProvider extends LuceneTestCase {
  private Directory directory;
  private DirectoryReader reader;
  private IndexSearcher searcher;
-  
+
  @Override
  public void setUp() throws Exception {
    super.setUp();
@ -51,7 +53,7 @@ public class TestSimilarityProvider extends LuceneTestCase {
    doc.add(field);
    Field field2 = newTextField("bar", "", Field.Store.NO);
    doc.add(field2);
-    
+
    field.setStringValue("quick brown fox");
    field2.setStringValue("quick brown fox");
    iw.addDocument(doc);
@ -63,14 +65,14 @@ public class TestSimilarityProvider extends LuceneTestCase {
    searcher = newSearcher(reader);
    searcher.setSimilarity(sim);
  }
-  
+
  @Override
  public void tearDown() throws Exception {
    reader.close();
    directory.close();
    super.tearDown();
  }
-  
+
  public void testBasics() throws Exception {
    // sanity check of norms writer
    // TODO: generalize
@ -81,7 +83,7 @@ public class TestSimilarityProvider extends LuceneTestCase {
      assertEquals(i, barNorms.nextDoc());
      assertFalse(fooNorms.longValue() == barNorms.longValue());
    }
-    
+
    // sanity check of searching
    TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10);
    assertTrue(foodocs.totalHits > 0);
@ -89,11 +91,11 @@ public class TestSimilarityProvider extends LuceneTestCase {
    assertTrue(bardocs.totalHits > 0);
    assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score);
  }
-  
+
  private static class ExampleSimilarityProvider extends PerFieldSimilarityWrapper {
    private Similarity sim1 = new Sim1();
    private Similarity sim2 = new Sim2();
-    
+
    @Override
    public Similarity get(String field) {
      if (field.equals("foo")) {
@ -103,80 +105,73 @@ public class TestSimilarityProvider extends LuceneTestCase {
      }
    }
  }
-  
-  private static class Sim1 extends TFIDFSimilarity {
-    
+
+  private static class Sim1 extends Similarity {
+
    @Override
-    public long encodeNormValue(float f) {
-      return (long) f;
-    }
-    
-    @Override
-    public float decodeNormValue(long norm) {
-      return norm;
+    public long computeNorm(FieldInvertState state) {
+      return 1;
    }

    @Override
-    public float lengthNorm(FieldInvertState state) {
-      return 1f;
+    public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+      return new SimWeight() {};
    }

    @Override
-    public float sloppyFreq(int distance) {
-      return 1f;
+    public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
+      return new SimScorer() {
+
+        @Override
+        public float score(int doc, float freq) throws IOException {
+          return 1;
+        }
+
+        @Override
+        public float computeSlopFactor(int distance) {
+          return 1;
+        }
+
+        @Override
+        public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+          return 1;
+        }
+      };
    }

-    @Override
-    public float tf(float freq) {
-      return 1f;
-    }
-
-    @Override
-    public float idf(long docFreq, long docCount) {
-      return 1f;
-    }
-
-    @Override
-    public float scorePayload(int doc, int start, int end, BytesRef payload) {
-      return 1f;
-    }
  }
-  
-  private static class Sim2 extends TFIDFSimilarity {
-    
+
+  private static class Sim2 extends Similarity {
+
    @Override
-    public long encodeNormValue(float f) {
-      return (long) f;
-    }
-    
-    @Override
-    public float decodeNormValue(long norm) {
-      return norm;
-    }
-    
-    @Override
-    public float lengthNorm(FieldInvertState state) {
-      return 10f;
+    public long computeNorm(FieldInvertState state) {
+      return 10;
    }

    @Override
-    public float sloppyFreq(int distance) {
-      return 10f;
+    public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+      return new SimWeight() {};
    }

    @Override
-    public float tf(float freq) {
-      return 10f;
-    }
+    public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
+      return new SimScorer() {

-    @Override
-    public float idf(long docFreq, long docCount) {
-      return 10f;
-    }
+        @Override
+        public float score(int doc, float freq) throws IOException {
+          return 10;
+        }

-    @Override
-    public float scorePayload(int doc, int start, int end, BytesRef payload) {
-      return 1f;
+        @Override
+        public float computeSlopFactor(int distance) {
+          return 1;
+        }
+
+        @Override
+        public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+          return 1;
+        }
+      };
    }
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortRescorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortRescorer.java
@ -42,7 +42,7 @@ public class TestSortRescorer extends LuceneTestCase {
  public void setUp() throws Exception {
    super.setUp();
    dir = newDirectory();
-    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig().setSimilarity(new ClassicSimilarity()));
    
    Document doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java
@ -20,19 +20,6 @@ import org.apache.lucene.util.LuceneTestCase;

 public class TestAxiomaticSimilarity extends LuceneTestCase {

-  public void testSaneNormValues() {
-    Axiomatic sim = new AxiomaticF2EXP();
-    for (int i = 0; i < 256; i++) {
-      float len = sim.decodeNormValue((byte) i);
-      assertFalse("negative len: " + len + ", byte=" + i, len < 0.0f);
-      assertFalse("inf len: " + len + ", byte=" + i, Float.isInfinite(len));
-      assertFalse("nan len for byte=" + i, Float.isNaN(len));
-      if (i > 0) {
-        assertTrue("len is not decreasing: " + len + ",byte=" + i, len < sim.decodeNormValue((byte) (i - 1)));
-      }
-    }
-  }
-
  public void testIllegalS() {
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
      new AxiomaticF2EXP(Float.POSITIVE_INFINITY, 0.1f);
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBM25Similarity.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBM25Similarity.java
@ -17,23 +17,27 @@
 package org.apache.lucene.search.similarities;


+import java.io.IOException;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;

 public class TestBM25Similarity extends LuceneTestCase {
  
-  public void testSaneNormValues() {
-    BM25Similarity sim = new BM25Similarity();
-    for (int i = 0; i < 256; i++) {
-      float len = sim.decodeNormValue((byte) i);
-      assertFalse("negative len: " + len + ", byte=" + i, len < 0.0f);
-      assertFalse("inf len: " + len + ", byte=" + i, Float.isInfinite(len));
-      assertFalse("nan len for byte=" + i, Float.isNaN(len));
-      if (i > 0) {
-        assertTrue("len is not decreasing: " + len + ",byte=" + i, len < sim.decodeNormValue((byte)(i-1)));
-      }
-    }
-  }
-  
  public void testIllegalK1() {
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
      new BM25Similarity(Float.POSITIVE_INFINITY, 0.75f);
@ -72,4 +76,44 @@ public class TestBM25Similarity extends LuceneTestCase {
    });
    assertTrue(expected.getMessage().contains("illegal b value"));
  }
+
+  public void testLengthEncodingBackwardCompatibility() throws IOException {
+    Similarity similarity = new BM25Similarity();
+    for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major}) {
+      for (int length : new int[] {1, 2, 4}) { // these length values are encoded accurately on both cases
+        Directory dir = newDirectory();
+        // set the version on the directory
+        new SegmentInfos(indexCreatedVersionMajor).commit(dir);
+        IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
+        Document doc = new Document();
+        String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
+        doc.add(new TextField("foo", value, Store.NO));
+        w.addDocument(doc);
+        IndexReader reader = DirectoryReader.open(w);
+        IndexSearcher searcher = newSearcher(reader);
+        searcher.setSimilarity(similarity);
+        Explanation expl = searcher.explain(new TermQuery(new Term("foo", "b")), 0);
+        Explanation docLen = findExplanation(expl, "fieldLength");
+        assertNotNull(docLen);
+        assertEquals(docLen.toString(), length, (int) docLen.getValue());
+        w.close();
+        reader.close();
+        dir.close();
+      }
+    }
+  }
+
+  private static Explanation findExplanation(Explanation expl, String text) {
+    if (expl.getDescription().equals(text)) {
+      return expl;
+    } else {
+      for (Explanation sub : expl.getDetails()) {
+        Explanation match = findExplanation(sub, text);
+        if (match != null) {
+          return match;
+        }
+      }
+    }
+    return null;
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java
@ -34,6 +34,7 @@ import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.Version;

 public class TestBooleanSimilarity extends LuceneTestCase {

@ -105,8 +106,8 @@ public class TestBooleanSimilarity extends LuceneTestCase {
    for (int iter = 0; iter < 100; ++iter) {
      final int length = TestUtil.nextInt(random(), 1, 100);
      final int position = random().nextInt(length);
-      final int numOverlaps = random().nextInt(50);
-      FieldInvertState state = new FieldInvertState("foo", position, length, numOverlaps, 100);
+      final int numOverlaps = random().nextInt(length);
+      FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", position, length, numOverlaps, 100);
      assertEquals(
          sim2.computeNorm(state),
          sim1.computeNorm(state),
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java
@ -19,24 +19,34 @@ package org.apache.lucene.search.similarities;

 import java.io.IOException;
 import java.util.Arrays;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.similarities.TFIDFSimilarity.IDFStats;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.Version;

 public class TestClassicSimilarity extends LuceneTestCase {
  private Directory directory;
@ -63,14 +73,6 @@ public class TestClassicSimilarity extends LuceneTestCase {
    IOUtils.close(indexReader, directory);
    super.tearDown();
  }
-  
-  // Javadocs give this as an example so we test to make sure it's correct:
-  public void testPrecisionLoss() throws Exception {
-    ClassicSimilarity sim = new ClassicSimilarity();
-    float v = sim.decodeNormValue(sim.encodeNormValue(.89f));
-    assertEquals(0.875f, v, 0.0001f);
-  }
-

  public void testHit() throws IOException {
    Query query = new TermQuery(new Term("test", "hit"));
@ -159,16 +161,83 @@ public class TestClassicSimilarity extends LuceneTestCase {
    assertTrue(topDocs.scoreDocs[0].score != 0);
  }
  
-  public void testSaneNormValues() {
+  public void testSaneNormValues() throws IOException {
    ClassicSimilarity sim = new ClassicSimilarity();
    for (int i = 0; i < 256; i++) {
-      float boost = sim.decodeNormValue((byte) i);
+      float boost = TFIDFSimilarity.OLD_NORM_TABLE[i];
      assertFalse("negative boost: " + boost + ", byte=" + i, boost < 0.0f);
      assertFalse("inf bost: " + boost + ", byte=" + i, Float.isInfinite(boost));
      assertFalse("nan boost for byte=" + i, Float.isNaN(boost));
      if (i > 0) {
-        assertTrue("boost is not increasing: " + boost + ",byte=" + i, boost > sim.decodeNormValue((byte)(i-1)));
+        assertTrue("boost is not increasing: " + boost + ",byte=" + i, boost > TFIDFSimilarity.OLD_NORM_TABLE[i-1]);
+      }
+    }
+
+    TFIDFSimilarity.IDFStats stats = (IDFStats) sim.computeWeight(1f, new IndexSearcher(new MultiReader()).collectionStatistics("foo"));
+    for (int i = 0; i < 256; i++) {
+      float boost = stats.normTable[i];
+      assertFalse("negative boost: " + boost + ", byte=" + i, boost < 0.0f);
+      assertFalse("inf bost: " + boost + ", byte=" + i, Float.isInfinite(boost));
+      assertFalse("nan boost for byte=" + i, Float.isNaN(boost));
+      if (i > 0) {
+        assertTrue("boost is not decreasing: " + boost + ",byte=" + i, boost < stats.normTable[i-1]);
      }
    }
  }
+
+  public void testNormEncodingBackwardCompatibility() throws IOException {
+    Similarity similarity = new ClassicSimilarity();
+    for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major}) {
+      for (int length : new int[] {1, 4, 16 }) { // these length values are encoded accurately on both cases
+        Directory dir = newDirectory();
+        // set the version on the directory
+        new SegmentInfos(indexCreatedVersionMajor).commit(dir);
+        IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
+        Document doc = new Document();
+        String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
+        doc.add(new TextField("foo", value, Store.NO));
+        w.addDocument(doc);
+        IndexReader reader = DirectoryReader.open(w);
+        IndexSearcher searcher = newSearcher(reader);
+        searcher.setSimilarity(similarity);
+        Explanation expl = searcher.explain(new TermQuery(new Term("foo", "b")), 0);
+        Explanation fieldNorm = findExplanation(expl, "fieldNorm");
+        assertNotNull(fieldNorm);
+        assertEquals(fieldNorm.toString(), 1/Math.sqrt(length), fieldNorm.getValue(), 0f);
+        w.close();
+        reader.close();
+        dir.close();
+      }
+    }
+  }
+
+  private static Explanation findExplanation(Explanation expl, String text) {
+    if (expl.getDescription().startsWith(text)) {
+      return expl;
+    } else {
+      for (Explanation sub : expl.getDetails()) {
+        Explanation match = findExplanation(sub, text);
+        if (match != null) {
+          return match;
+        }
+      }
+    }
+    return null;
+  }
+
+  public void testSameNormsAsBM25() {
+    ClassicSimilarity sim1 = new ClassicSimilarity();
+    BM25Similarity sim2 = new BM25Similarity();
+    sim2.setDiscountOverlaps(true);
+    for (int iter = 0; iter < 100; ++iter) {
+      final int length = TestUtil.nextInt(random(), 1, 1000);
+      final int position = random().nextInt(length);
+      final int numOverlaps = random().nextInt(length);
+      FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", position, length, numOverlaps, 100);
+      assertEquals(
+          sim2.computeNorm(state),
+          sim1.computeNorm(state),
+          0f);
+    }
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
@ -20,16 +20,23 @@ package org.apache.lucene.search.similarities;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
@ -37,9 +44,13 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.similarities.Similarity.SimWeight;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
+
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;

 /**
 * Tests the {@link SimilarityBase}-based Similarities. Contains unit tests and 
@ -586,11 +597,11 @@ public class TestSimilarityBase extends LuceneTestCase {
  
  // LUCENE-5221
  public void testDiscountOverlapsBoost() throws IOException {
-    ClassicSimilarity expected = new ClassicSimilarity();
+    BM25Similarity expected = new BM25Similarity();
    SimilarityBase actual = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
    expected.setDiscountOverlaps(false);
    actual.setDiscountOverlaps(false);
-    FieldInvertState state = new FieldInvertState("foo");
+    FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo");
    state.setLength(5);
    state.setNumOverlap(2);
    assertEquals(expected.computeNorm(state), actual.computeNorm(state));
@ -598,64 +609,32 @@ public class TestSimilarityBase extends LuceneTestCase {
    actual.setDiscountOverlaps(true);
    assertEquals(expected.computeNorm(state), actual.computeNorm(state));
  }
-  
-  public void testSaneNormValues() {
-    for (SimilarityBase sim : sims) {
-      for (int i = 0; i < 256; i++) {
-        float len = sim.decodeNormValue((byte) i);
-        assertFalse("negative len: " + len + ", byte=" + i + ", sim=" + sim, len < 0.0f);
-        assertFalse("inf len: " + len + ", byte=" + i + ", sim=" + sim, Float.isInfinite(len));
-        assertFalse("nan len for byte=" + i + ", sim=" + sim, Float.isNaN(len));
-        if (i > 0) {
-          assertTrue("len is not decreasing: " + len + ",byte=" + i + ",sim=" + sim, len < sim.decodeNormValue((byte)(i-1)));
-        }
-      }
-    }
-  }
-  
-  /**
-   * make sure the similarity does not go crazy when tested against all possible norm values.
-   */
-  public void testCrazyIndexTimeBoosts() throws Exception {
-    long avgLength = 750;
-    long docCount = 500000;
-    long numTokens = docCount * avgLength;
-   
-    CollectionStatistics collectionStats = new CollectionStatistics("body", docCount, docCount, numTokens, numTokens);
-    
-    long docFreq = 2000;
-    long totalTermFreq = 2000 * avgLength;
-    
-    TermStatistics termStats = new TermStatistics(new BytesRef("term"), docFreq, totalTermFreq);
-    
-    for (SimilarityBase sim : sims) {
-      if (sim instanceof IBSimilarity) {
-        if (((IBSimilarity)sim).getDistribution() instanceof DistributionSPL) {
-          // score goes infinite for tiny doc lengths and negative for huge doc lengths
-          // TODO: fix this
-          continue;
-        }
-      } else if (sim instanceof DFRSimilarity) {
-        BasicModel model = ((DFRSimilarity)sim).getBasicModel();
-        if (model instanceof BasicModelD || model instanceof BasicModelP) {
-          // score goes NaN for tiny doc lengths
-          // TODO: fix this
-          continue;
-        } else if (model instanceof BasicModelBE) {
-          // score goes negative infinity for tiny doc lengths
-          // TODO: fix this
-          continue;
-        }
-      }
-      BasicStats stats = (BasicStats) sim.computeWeight(1f, collectionStats, termStats);
-      for (float tf = 1.0f; tf <= 10.0f; tf += 1.0f) {
-        for (int i = 0; i < 256; i++) {
-          float len = sim.decodeNormValue((byte) i);
-          float score = sim.score(stats, tf, len);
-          assertFalse("negative score for " + sim + ", len=" + len + ",score=" + score, score < 0.0f);
-          assertFalse("inf score for " + sim + ", len=" + len, Float.isInfinite(score));
-          assertFalse("nan score for " + sim + ", len=" + len, Float.isNaN(score));
-        }
+
+  public void testLengthEncodingBackwardCompatibility() throws IOException {
+    Similarity similarity = RandomPicks.randomFrom(random(), sims);
+    for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major}) {
+      for (int length : new int[] {1, 2, 4}) { // these length values are encoded accurately on both cases
+        Directory dir = newDirectory();
+        // set the version on the directory
+        new SegmentInfos(indexCreatedVersionMajor).commit(dir);
+        IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
+        Document doc = new Document();
+        String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
+        doc.add(new TextField("foo", value, Store.NO));
+        w.addDocument(doc);
+        IndexReader reader = DirectoryReader.open(w);
+        IndexSearcher searcher = newSearcher(reader);
+        searcher.setSimilarity(similarity);
+        Term term = new Term("foo", "b");
+        TermContext context = TermContext.build(reader.getContext(), term);
+        SimWeight simWeight = similarity.computeWeight(1f, searcher.collectionStatistics("foo"), searcher.termStatistics(term, context));
+        SimilarityBase.BasicSimScorer simScorer = (SimilarityBase.BasicSimScorer) similarity.simScorer(simWeight, reader.leaves().get(0));
+        float docLength = simScorer.getLengthValue(0);
+        assertEquals(length, (int) docLength);
+        
+        w.close();
+        reader.close();
+        dir.close();
      }
    }
  }
--- a/lucene/core/src/test/org/apache/lucene/util/TestSmallFloat.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestSmallFloat.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.util;

+import java.util.Arrays;
+
 public class TestSmallFloat extends LuceneTestCase {

  // original lucene byteToFloat
@ -87,10 +89,6 @@ public class TestSmallFloat extends LuceneTestCase {
      float f3 = SmallFloat.byte315ToFloat((byte)i);
      assertEquals(f1,f2,0.0);
      assertEquals(f2,f3,0.0);
-
-      float f4 = SmallFloat.byteToFloat((byte)i,5,2);
-      float f5 = SmallFloat.byte52ToFloat((byte)i);
-      assertEquals(f4,f5,0.0);
    }
  }

@ -121,10 +119,51 @@ public class TestSmallFloat extends LuceneTestCase {
      byte b3 = SmallFloat.floatToByte315(f);
      assertEquals(b1,b2);
      assertEquals(b2,b3);
+    }
+  }

-      byte b4 = SmallFloat.floatToByte(f,5,2);
-      byte b5 = SmallFloat.floatToByte52(f);
-      assertEquals(b4,b5);
+  public void testInt4() {
+    for (int i = 0; i <= 16; ++i) {
+      // all values in 0-16 are encoded accurately
+      assertEquals(i, SmallFloat.int4ToLong(SmallFloat.longToInt4(i)));
+    }
+    final int maxEncoded = SmallFloat.longToInt4(Long.MAX_VALUE);
+    for (int i = 1; i < maxEncoded; ++i) {
+      assertTrue(SmallFloat.int4ToLong(i) > SmallFloat.int4ToLong(i - 1));
+    }
+    final int iters = atLeast(1000);
+    for (int iter = 0; iter < iters; ++iter) {
+      final long l = TestUtil.nextLong(random(), 0, 1L << TestUtil.nextInt(random(), 5, 61));
+      int numBits = 64 - Long.numberOfLeadingZeros(l);
+      long expected = l;
+      if (numBits > 4) {
+        long mask = ~0L << (numBits - 4);
+        expected &= mask;
+      }
+      long l2 = SmallFloat.int4ToLong(SmallFloat.longToInt4(l));
+      assertEquals(expected, l2);
+    }
+  }
+
+  public void testByte4() {
+    int[] decoded = new int[256];
+    for (int b = 0; b < 256; ++b) {
+      decoded[b] = SmallFloat.byte4ToInt((byte) b);
+      assertEquals((byte) b, SmallFloat.intToByte4(decoded[b]));
+    }
+    for (int i = 1; i < 256; ++i) {
+      assertTrue(decoded[i] > decoded[i-1]);
+    }
+    assertEquals((byte) 255, SmallFloat.intToByte4(Integer.MAX_VALUE));
+    final int iters = atLeast(1000);
+    for (int iter = 0; iter < iters; ++iter) {
+      final int i = random().nextInt(1 << TestUtil.nextInt(random(), 5, 30));
+      int idx = Arrays.binarySearch(decoded, i);
+      if (idx < 0) {
+        idx = -2 - idx;
+      }
+      assertTrue(decoded[idx] <= i);
+      assertEquals((byte) idx, SmallFloat.intToByte4(i));
    }
  }

@ -146,5 +185,4 @@ public class TestSmallFloat extends LuceneTestCase {
    }
  }
  ***/
-
 }
--- a/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionRescorer.java
+++ b/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionRescorer.java
@ -44,7 +44,7 @@ public class TestExpressionRescorer extends LuceneTestCase {
  public void setUp() throws Exception {
    super.setUp();
    dir = newDirectory();
-    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig().setSimilarity(new ClassicSimilarity()));
    
    Document doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@ -72,6 +72,8 @@ import org.apache.lucene.search.PhraseQuery.Builder;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SynonymQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
@ -147,7 +149,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
    CustomScoreQuery query = new CustomScoreQuery(termQuery);

    searcher = newSearcher(reader);
-    TopDocs hits = searcher.search(query, 10);
+    TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
    assertEquals(2, hits.totalHits);
    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
    Highlighter highlighter = new Highlighter(scorer);
@ -199,7 +201,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
    query.add(new Term(FIELD_NAME, "very"));

    searcher = newSearcher(reader);
-    TopDocs hits = searcher.search(query, 10);
+    TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
    assertEquals(2, hits.totalHits);
    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
    Highlighter highlighter = new Highlighter(scorer);
@ -271,7 +273,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
    };

    searcher = newSearcher(reader);
-    TopDocs hits = searcher.search(query, 10);
+    TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
    assertEquals(2, hits.totalHits);
    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
    Highlighter highlighter = new Highlighter(scorer);
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@ -892,7 +892,7 @@ public class MemoryIndex {

    NumericDocValues getNormDocValues() {
      if (norm == null) {
-        FieldInvertState invertState = new FieldInvertState(fieldInfo.name, fieldInfo.number,
+        FieldInvertState invertState = new FieldInvertState(Version.LATEST.major, fieldInfo.name, fieldInfo.number,
            numTokens, numOverlapTokens, 0);
        final long value = normSimilarity.computeNorm(invertState);
        if (DEBUG) System.err.println("MemoryIndexReader.norms: " + fieldInfo.name + ":" + value + ":" + numTokens);
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
@ -50,6 +50,7 @@ import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SortedDocValues;
@ -57,13 +58,16 @@ import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
@ -145,32 +149,32 @@ public class TestMemoryIndex extends LuceneTestCase {

    assertEquals(reader.getTermVectors(0).size(), 1);
  }
-  
+
  public void testReaderConsistency() throws IOException {
    Analyzer analyzer = new MockPayloadAnalyzer();
-    
+
    // defaults
    MemoryIndex mi = new MemoryIndex();
    mi.addField("field", "some terms be here", analyzer);
    TestUtil.checkReader(mi.createSearcher().getIndexReader());
-    
+
    // all combinations of offsets/payloads options
    mi = new MemoryIndex(true, true);
    mi.addField("field", "some terms be here", analyzer);
    TestUtil.checkReader(mi.createSearcher().getIndexReader());
-    
+
    mi = new MemoryIndex(true, false);
    mi.addField("field", "some terms be here", analyzer);
    TestUtil.checkReader(mi.createSearcher().getIndexReader());
-    
+
    mi = new MemoryIndex(false, true);
    mi.addField("field", "some terms be here", analyzer);
    TestUtil.checkReader(mi.createSearcher().getIndexReader());
-    
+
    mi = new MemoryIndex(false, false);
    mi.addField("field", "some terms be here", analyzer);
    TestUtil.checkReader(mi.createSearcher().getIndexReader());
-    
+
    analyzer.close();
  }

@ -187,11 +191,23 @@ public class TestMemoryIndex extends LuceneTestCase {
    float n1 = norms.longValue();

    // Norms are re-computed when we change the Similarity
-    mi.setSimilarity(new ClassicSimilarity() {
+    mi.setSimilarity(new Similarity() {
+
      @Override
-      public float lengthNorm(FieldInvertState state) {
+      public long computeNorm(FieldInvertState state) {
        return 74;
      }
+
+      @Override
+      public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
    });
    norms = reader.getNormValues("f1");
    assertEquals(0, norms.nextDoc());
--- a/lucene/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java
@ -17,7 +17,6 @@
 package org.apache.lucene.misc;

 import org.apache.lucene.search.similarities.ClassicSimilarity;
-import org.apache.lucene.index.FieldInvertState;

 /**
 * <p>
@ -86,7 +85,7 @@ public class SweetSpotSimilarity extends ClassicSimilarity {
   * Sets the default function variables used by lengthNorm when no field
   * specific variables have been set.
   *
-   * @see #computeLengthNorm
+   * @see #lengthNorm
   */
  public void setLengthNormFactors(int min, int max, float steepness, boolean discountOverlaps) {
    this.ln_min = min;
@ -94,25 +93,6 @@ public class SweetSpotSimilarity extends ClassicSimilarity {
    this.ln_steep = steepness;
    this.discountOverlaps = discountOverlaps;
  }
-    
-  /**
-   * Implemented as <code> state.getBoost() *
-   * computeLengthNorm(numTokens) </code> where
-   * numTokens does not count overlap tokens if
-   * discountOverlaps is true by default or true for this
-   * specific field. 
-   */
-  @Override
-  public float lengthNorm(FieldInvertState state) {
-    final int numTokens;
-
-    if (discountOverlaps)
-      numTokens = state.getLength() - state.getNumOverlap();
-    else
-      numTokens = state.getLength();
-
-    return computeLengthNorm(numTokens);
-  }

  /**
   * Implemented as:
@ -133,7 +113,8 @@ public class SweetSpotSimilarity extends ClassicSimilarity {
   * @see #setLengthNormFactors
   * @see <a href="doc-files/ss.computeLengthNorm.svg">An SVG visualization of this function</a> 
   */
-  public float computeLengthNorm(int numTerms) {
+  @Override
+  public float lengthNorm(int numTerms) {
    final int l = ln_min;
    final int h = ln_max;
    final float s = ln_steep;
--- a/lucene/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java
@ -16,27 +16,62 @@
 */
 package org.apache.lucene.misc;

+import java.io.IOException;
+import java.util.Collections;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.search.similarities.TFIDFSimilarity;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.index.FieldInvertState;

 /**
 * Test of the SweetSpotSimilarity
 */
 public class SweetSpotSimilarityTest extends LuceneTestCase {
  
-  public static float computeAndDecodeNorm(SweetSpotSimilarity decode, Similarity encode, FieldInvertState state) {
-    return decode.decodeNormValue(computeAndGetNorm(encode, state));
-  }
-  
-  public static byte computeAndGetNorm(Similarity s, FieldInvertState state) {
-    return (byte) s.computeNorm(state);
+  private static float computeNorm(Similarity sim, String field, int length) throws IOException {
+    String value = IntStream.range(0, length).mapToObj(i -> "a").collect(Collectors.joining(" "));
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
+    w.addDocument(Collections.singleton(newTextField(field, value, Store.NO)));
+    DirectoryReader reader = DirectoryReader.open(w);
+    w.close();
+    IndexSearcher searcher = new IndexSearcher(reader);
+    searcher.setSimilarity(sim);
+    Explanation expl = searcher.explain(new TermQuery(new Term(field, "a")), 0);
+    reader.close();
+    dir.close();
+    Explanation norm = findExplanation(expl, "fieldNorm");
+    assertNotNull(norm);
+    return norm.getValue();
  }

-  public void testSweetSpotComputeNorm() {
+  private static Explanation findExplanation(Explanation expl, String text) {
+    if (expl.getDescription().startsWith(text)) {
+      return expl;
+    } else {
+      for (Explanation sub : expl.getDetails()) {
+        Explanation match = findExplanation(sub, text);
+        if (match != null) {
+          return match;
+        }
+      }
+    }
+    return null;
+  }
+
+  public void testSweetSpotComputeNorm() throws IOException {
  
    final SweetSpotSimilarity ss = new SweetSpotSimilarity();
    ss.setLengthNormFactors(1,1,0.5f,true);
@ -46,12 +81,10 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {


    // base case, should degrade
-    FieldInvertState invertState = new FieldInvertState("bogus");
    for (int i = 1; i < 1000; i++) {
-      invertState.setLength(i);
      assertEquals("base case: i="+i,
-                   computeAndGetNorm(d, invertState),
-                   computeAndGetNorm(s, invertState),
+                   computeNorm(d, "bogus", i),
+                   computeNorm(s, "bogus", i),
                   0.0f);
    }

@ -60,22 +93,19 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
    ss.setLengthNormFactors(3,10,0.5f,true);
  
    for (int i = 3; i <=10; i++) {
-      invertState.setLength(i);
      assertEquals("3,10: spot i="+i,
                   1.0f,
-                   computeAndDecodeNorm(ss, ss, invertState),
+                   computeNorm(ss, "bogus", i),
                   0.0f);
    }
  
    for (int i = 10; i < 1000; i++) {
-      invertState.setLength(i-9);
-      final byte normD = computeAndGetNorm(d, invertState);
-      invertState.setLength(i);
-      final byte normS = computeAndGetNorm(s, invertState);
+      final float normD = computeNorm(d, "bogus", i - 9);
+      final float normS = computeNorm(s, "bogus", i);
      assertEquals("3,10: 10<x : i="+i,
                   normD,
                   normS,
-                   0.0f);
+                   0.01f);
    }


@ -106,78 +136,60 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
      }
    };

-    invertState = new FieldInvertState("foo");
    for (int i = 3; i <=10; i++) {
-      invertState.setLength(i);
      assertEquals("f: 3,10: spot i="+i,
                   1.0f,
-                   computeAndDecodeNorm(ss, sp, invertState),
+                   computeNorm(sp, "foo", i),
                   0.0f);
    }
    
    for (int i = 10; i < 1000; i++) {
-      invertState.setLength(i-9);
-      final byte normD = computeAndGetNorm(d, invertState);
-      invertState.setLength(i);
-      final byte normS = computeAndGetNorm(sp, invertState);
+      final float normD = computeNorm(d, "foo", i-9);
+      final float normS = computeNorm(sp, "foo", i);
      assertEquals("f: 3,10: 10<x : i="+i,
                   normD,
                   normS,
-                   0.0f);
+                   0.01f);
    }
    
-    invertState = new FieldInvertState("bar");
    for (int i = 8; i <=13; i++) {
-      invertState.setLength(i);
      assertEquals("f: 8,13: spot i="+i,
                   1.0f,
-                   computeAndDecodeNorm(ss, sp, invertState),
-                   0.0f);
+                   computeNorm(sp, "bar", i),
+                   0.01f);
    }
    
-    invertState = new FieldInvertState("yak");
    for (int i = 6; i <=9; i++) {
-      invertState.setLength(i);
      assertEquals("f: 6,9: spot i="+i,
                   1.0f,
-                   computeAndDecodeNorm(ss, sp, invertState),
-                   0.0f);
+                   computeNorm(sp, "yak", i),
+                   0.01f);
    }
    
-    invertState = new FieldInvertState("bar");
    for (int i = 13; i < 1000; i++) {
-      invertState.setLength(i-12);
-      final byte normD = computeAndGetNorm(d, invertState);
-      invertState.setLength(i);
-      final byte normS = computeAndGetNorm(sp, invertState);
+      final float normD = computeNorm(d, "bar", i-12);
+      final float normS = computeNorm(sp, "bar", i);
      assertEquals("f: 8,13: 13<x : i="+i,
                   normD,
                   normS,
-                   0.0f);
+                   0.01f);
    }
    
-    invertState = new FieldInvertState("yak");
    for (int i = 9; i < 1000; i++) {
-      invertState.setLength(i-8);
-      final byte normD = computeAndGetNorm(d, invertState);
-      invertState.setLength(i);
-      final byte normS = computeAndGetNorm(sp, invertState);
+      final float normD = computeNorm(d, "yak", i-8);
+      final float normS = computeNorm(sp, "yak", i);
      assertEquals("f: 6,9: 9<x : i="+i,
                   normD,
                   normS,
-                   0.0f);
+                   0.01f);
    }


    // steepness

    for (int i = 9; i < 1000; i++) {
-      invertState = new FieldInvertState("a");
-      invertState.setLength(i);
-      final byte normSS = computeAndGetNorm(sp, invertState);
-      invertState = new FieldInvertState("b");
-      invertState.setLength(i);
-      final byte normS = computeAndGetNorm(sp, invertState);
+      final float normSS = computeNorm(sp, "a", i);
+      final float normS = computeNorm(sp, "b", i);
      assertTrue("s: i="+i+" : a="+normSS+
                 " < b="+normS,
                 normSS < normS);
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java
@ -20,19 +20,24 @@ import java.io.IOException;
 import java.util.Map;

 import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.queries.function.docvalues.FloatDocValues;
+import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.similarities.TFIDFSimilarity;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimWeight;

 /** 
- * Function that returns {@link TFIDFSimilarity#decodeNormValue(long)}
- * for every document.
+ * Function that returns the decoded norm for every document.
 * <p>
 * Note that the configured Similarity for the field must be
- * a subclass of {@link TFIDFSimilarity}
+ * a subclass of {@link TFIDFSimilarity} and the contribution of
+ * the TF needs to be 1 when the freq is 1 and the contribution
+ * of the IDF needs to be 1 when docFreq == docCount == 1.
 * @lucene.internal */
 public class NormValueSource extends ValueSource {
  protected final String field;
@ -61,11 +66,12 @@ public class NormValueSource extends ValueSource {
    if (similarity == null) {
      throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
    }
-    final NumericDocValues norms = readerContext.reader().getNormValues(field);
-
-    if (norms == null) {
-      return new ConstDoubleDocValues(0.0, this);
-    }
+    // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
+    // is 1 when docCount == docFreq == 1
+    final SimWeight simWeight = similarity.computeWeight(1f,
+        new CollectionStatistics(field, 1, 1, 1, 1),
+        new TermStatistics(new BytesRef("bogus"), 1, 1));
+    final SimScorer simScorer = similarity.simScorer(simWeight, readerContext);
    
    return new FloatDocValues(this) {
      int lastDocID = -1;
@ -74,16 +80,8 @@ public class NormValueSource extends ValueSource {
        if (docID < lastDocID) {
          throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
        }
-        if (docID > norms.docID()) {
-          norms.advance(docID);
-        }
-        long norm;
-        if (docID == norms.docID()) {
-          norm = norms.longValue();
-        } else {
-          norm = 0;
-        }
-        return similarity.decodeNormValue(norm);
+        lastDocID = docID;
+        return simScorer.score(docID, 1f);
      }
    };
  }
--- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java
@ -21,7 +21,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
@ -33,10 +32,9 @@ import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@ -48,7 +46,7 @@ public class TestLongNormValueSource extends LuceneTestCase {
  static IndexSearcher searcher;
  static Analyzer analyzer;
  
-  private static Similarity sim = new PreciseClassicSimilarity();
+  private static Similarity sim = new ClassicSimilarity();

  @BeforeClass
  public static void beforeClass() throws Exception {
@ -116,114 +114,3 @@ public class TestLongNormValueSource extends LuceneTestCase {
    CheckHits.checkExplanations(q, "", searcher);
  }
 }
-
-
-/** Encodes norm as 4-byte float. */
-class PreciseClassicSimilarity extends TFIDFSimilarity {
-
-  /** Sole constructor: parameter-free */
-  public PreciseClassicSimilarity() {}
-
-  /**
-   * Encodes a normalization factor for storage in an index.
-   * <p>
-   * The encoding uses a three-bit mantissa, a five-bit exponent, and the
-   * zero-exponent point at 15, thus representing values from around 7x10^9 to
-   * 2x10^-9 with about one significant decimal digit of accuracy. Zero is also
-   * represented. Negative numbers are rounded up to zero. Values too large to
-   * represent are rounded down to the largest representable value. Positive
-   * values too small to represent are rounded up to the smallest positive
-   * representable value.
-   *
-   * @see org.apache.lucene.util.SmallFloat
-   */
-  @Override
-  public final long encodeNormValue(float f) {
-    return Float.floatToIntBits(f);
-  }
-
-  /**
-   * Decodes the norm value, assuming it is a single byte.
-   *
-   * @see #encodeNormValue(float)
-   */
-  @Override
-  public final float decodeNormValue(long norm) {
-    return Float.intBitsToFloat((int)norm);
-  }
-
-  /** Implemented as
-   *  <code>state.getBoost()*lengthNorm(numTerms)</code>, where
-   *  <code>numTerms</code> is {@link org.apache.lucene.index.FieldInvertState#getLength()} if {@link
-   *  #setDiscountOverlaps} is false, else it's {@link
-   *  org.apache.lucene.index.FieldInvertState#getLength()} - {@link
-   *  org.apache.lucene.index.FieldInvertState#getNumOverlap()}.
-   *
-   *  @lucene.experimental */
-  @Override
-  public float lengthNorm(FieldInvertState state) {
-    final int numTerms;
-    if (discountOverlaps) {
-      numTerms = state.getLength() - state.getNumOverlap();
-    } else {
-      numTerms = state.getLength();
-    }
-    return (float) (1.0 / Math.sqrt(numTerms));
-  }
-
-  /** Implemented as <code>sqrt(freq)</code>. */
-  @Override
-  public float tf(float freq) {
-    return (float)Math.sqrt(freq);
-  }
-
-  /** Implemented as <code>1 / (distance + 1)</code>. */
-  @Override
-  public float sloppyFreq(int distance) {
-    return 1.0f / (distance + 1);
-  }
-
-  /** The default implementation returns <code>1</code> */
-  @Override
-  public float scorePayload(int doc, int start, int end, BytesRef payload) {
-    return 1;
-  }
-
-  /** Implemented as <code>log(docCount/(docFreq+1)) + 1</code>. */
-  @Override
-  public float idf(long docFreq, long docCount) {
-    return (float)(Math.log(docCount/(double)(docFreq+1)) + 1.0);
-  }
-
-  /**
-   * True if overlap tokens (tokens with a position of increment of zero) are
-   * discounted from the document's length.
-   */
-  protected boolean discountOverlaps = true;
-
-  /** Determines whether overlap tokens (Tokens with
-   *  0 position increment) are ignored when computing
-   *  norm.  By default this is true, meaning overlap
-   *  tokens do not count when computing norms.
-   *
-   *  @lucene.experimental
-   *
-   *  @see #computeNorm
-   */
-  public void setDiscountOverlaps(boolean v) {
-    discountOverlaps = v;
-  }
-
-  /**
-   * Returns true if overlap tokens are discounted from the document's length.
-   * @see #setDiscountOverlaps
-   */
-  public boolean getDiscountOverlaps() {
-    return discountOverlaps;
-  }
-
-  @Override
-  public String toString() {
-    return "DefaultSimilarity";
-  }
-}
--- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java
@ -367,7 +367,7 @@ public class TestValueSources extends LuceneTestCase {
      // no norm field (so agnostic to indexed similarity)
      searcher.setSimilarity(new ClassicSimilarity());
      ValueSource vs = new NormValueSource("byte");
-      assertHits(new FunctionQuery(vs), new float[] { 0f, 0f });
+      assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });

      // regardless of whether norms exist, value source exists == 0
      assertAllExist(vs);
--- a/lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadScoreQuery.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadScoreQuery.java
@ -26,7 +26,6 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.RandomIndexWriter;
@ -143,9 +142,9 @@ public class TestPayloadScoreQuery extends LuceneTestCase {
    // check includeSpanScore makes a difference here
    searcher.setSimilarity(new MultiplyingSimilarity());
    try {
-      checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 41.802513122558594f, 34.13160705566406f });
-      checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 34.13160705566406f, 20.901256561279297f });
-      checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[]{ 38.3189697265625f, 34.13160705566406f });
+      checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 20.901256561279297f, 17.06580352783203f });
+      checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 17.06580352783203f, 10.450628280639648f });
+      checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[]{ 19.15948486328125f, 17.06580352783203f });
      checkQuery(q, new MaxPayloadFunction(), false, new int[]{122, 222}, new float[]{4.0f, 4.0f});
      checkQuery(q, new MinPayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 2.0f});
      checkQuery(q, new AveragePayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 3.666666f});
@ -298,7 +297,7 @@ public class TestPayloadScoreQuery extends LuceneTestCase {
    //Make everything else 1 so we see the effect of the payload
    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    @Override
-    public float lengthNorm(FieldInvertState state) {
+    public float lengthNorm(int length) {
      return 1;
    }

--- a/lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadTermQuery.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadTermQuery.java
@ -25,7 +25,6 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
@ -268,7 +267,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
    //Make everything else 1 so we see the effect of the payload
    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    @Override 
-    public float lengthNorm(FieldInvertState state) {
+    public float lengthNorm(int length) {
      return 1;
    }

--- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
@ -31,7 +31,7 @@ import java.util.Random;
 * for the same field.
 */
 public class RandomSimilarity extends PerFieldSimilarityWrapper {
-  final ClassicSimilarity defaultSim = new ClassicSimilarity();
+  final BM25Similarity defaultSim = new BM25Similarity();
  final List<Similarity> knownSims;
  Map<String,Similarity> previousMappings = new HashMap<>();
  final int perFieldSeed;
--- a/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java
@ -86,8 +86,8 @@ public class DisMaxRequestHandlerTest extends SolrTestCaseJ4 {
            req("cool stuff")
            ,"//*[@numFound='3']"
            ,"//result/doc[1]/int[@name='id'][.='42']"
-            ,"//result/doc[2]/int[@name='id'][.='8675309']"
-            ,"//result/doc[3]/int[@name='id'][.='666']"
+            ,"//result/doc[2]/int[@name='id'][.='666']"
+            ,"//result/doc[3]/int[@name='id'][.='8675309']"
            );

    assertQ("multi qf",
--- a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java
@ -97,8 +97,8 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
          CommonParams.FL, "id, score, [elevated]")
          , "//*[@numFound='3']"
          , "//result/doc[1]/float[@name='id'][.='7.0']"
-          , "//result/doc[2]/float[@name='id'][.='8.0']"
-          , "//result/doc[3]/float[@name='id'][.='9.0']",
+          , "//result/doc[2]/float[@name='id'][.='9.0']"
+          , "//result/doc[3]/float[@name='id'][.='8.0']",
          "//result/doc[1]/bool[@name='[elevated]'][.='true']",
          "//result/doc[2]/bool[@name='[elevated]'][.='false']",
          "//result/doc[3]/bool[@name='[elevated]'][.='false']"
--- a/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java
+++ b/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java
@ -49,6 +49,6 @@ public class TestPayloadScoreQParserPlugin extends SolrTestCaseJ4 {

    // TODO: fix this includeSpanScore test to be less brittle - score result is score of "A" (via BM25) multipled by 1.0 (payload value)
    assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min}"), "//float[@name='score']='1.0'");
-    assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min includeSpanScore=true}"), "//float[@name='score']='0.25811607'");
+    assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min includeSpanScore=true}"), "//float[@name='score']='0.2876821'");
  }
 }
--- a/solr/core/src/test/org/apache/solr/search/function/SortByFunctionTest.java
+++ b/solr/core/src/test/org/apache/solr/search/function/SortByFunctionTest.java
@ -65,9 +65,9 @@ public class SortByFunctionTest extends AbstractSolrTestCase {
    assertQ(req("fl", "id,score", "q", "f_t:ipod", "sort", "score desc"),
            "//*[@numFound='4']",
            "//result/doc[1]/int[@name='id'][.='1']",
-            "//result/doc[2]/int[@name='id'][.='4']",
-            "//result/doc[3]/int[@name='id'][.='2']",
-            "//result/doc[4]/int[@name='id'][.='3']"
+            "//result/doc[2]/int[@name='id'][.='2']",
+            "//result/doc[3]/int[@name='id'][.='3']",
+            "//result/doc[4]/int[@name='id'][.='4']"
    );


--- a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java
+++ b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java
@ -25,7 +25,6 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Random;

-import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.solr.SolrTestCaseJ4;
@ -431,12 +430,8 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
    assertQ(req("fl","*,score","q", "{!func}tf(a_tfidf,cow)", "fq","id:6"),
            "//float[@name='score']='" + similarity.tf(5)  + "'");
    
-    FieldInvertState state = new FieldInvertState("a_tfidf");
-    state.setLength(4);
-    long norm = similarity.computeNorm(state);
-    float nrm = similarity.decodeNormValue((byte) norm);
    assertQ(req("fl","*,score","q", "{!func}norm(a_tfidf)", "fq","id:2"),
-        "//float[@name='score']='" + nrm  + "'");  // sqrt(4)==2 and is exactly representable when quantized to a byte
+        "//float[@name='score']='0.5'");  // 1/sqrt(4)==1/2==0.5
    
  }
  
--- a/solr/core/src/test/org/apache/solr/search/similarities/TestSweetSpotSimilarityFactory.java
+++ b/solr/core/src/test/org/apache/solr/search/similarities/TestSweetSpotSimilarityFactory.java
@ -16,8 +16,22 @@
 */
 package org.apache.solr.search.similarities;

+import java.io.IOException;
+import java.util.Collections;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.misc.SweetSpotSimilarity;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.store.Directory;
 import org.junit.BeforeClass;

 /**
@ -28,7 +42,38 @@ public class TestSweetSpotSimilarityFactory extends BaseSimilarityTestCase {
  public static void beforeClass() throws Exception {
    initCore("solrconfig-basic.xml","schema-sweetspot.xml");
  }
-  
+
+  private static float computeNorm(Similarity sim, int length) throws IOException {
+    String value = IntStream.range(0, length).mapToObj(i -> "a").collect(Collectors.joining(" "));
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
+    w.addDocument(Collections.singleton(newTextField("foo", value, Store.NO)));
+    DirectoryReader reader = DirectoryReader.open(w);
+    w.close();
+    IndexSearcher searcher = new IndexSearcher(reader);
+    searcher.setSimilarity(sim);
+    Explanation expl = searcher.explain(new TermQuery(new Term("foo", "a")), 0);
+    reader.close();
+    dir.close();
+    Explanation norm = findExplanation(expl, "fieldNorm");
+    assertNotNull(norm);
+    return norm.getValue();
+  }
+
+  private static Explanation findExplanation(Explanation expl, String text) {
+    if (expl.getDescription().startsWith(text)) {
+      return expl;
+    } else {
+      for (Explanation sub : expl.getDetails()) {
+        Explanation match = findExplanation(sub, text);
+        if (match != null) {
+          return match;
+        }
+      }
+    }
+    return null;
+  }
+
  /** default parameters */
  public void testDefaults() throws Exception {
    SweetSpotSimilarity sim = getSimilarity("text", SweetSpotSimilarity.class);
@ -40,9 +85,9 @@ public class TestSweetSpotSimilarityFactory extends BaseSimilarityTestCase {
    }

    // default norm sanity check
-    assertEquals("norm 1",  1.00F, sim.computeLengthNorm(1),  0.0F);
-    assertEquals("norm 4",  0.50F, sim.computeLengthNorm(4),  0.0F);
-    assertEquals("norm 16", 0.25F, sim.computeLengthNorm(16), 0.0F);
+    assertEquals("norm 1",  1.00F, computeNorm(sim, 1),  0.0F);
+    assertEquals("norm 4",  0.50F, computeNorm(sim, 4),  0.0F);
+    assertEquals("norm 16", 0.25F, computeNorm(sim, 16), 0.0F);
  }
  
  /** baseline with parameters */
@ -65,17 +110,17 @@ public class TestSweetSpotSimilarityFactory extends BaseSimilarityTestCase {

    // norms: plateau from 3-5
    assertEquals("norm 1 == 7", 
-                 sim.computeLengthNorm(1), sim.computeLengthNorm(7),  0.0F);
+                 computeNorm(sim, 1), computeNorm(sim, 7),  0.0F);
    assertEquals("norm 2 == 6",  
-                 sim.computeLengthNorm(1), sim.computeLengthNorm(7),  0.0F);
-    assertEquals("norm 3",  1.00F, sim.computeLengthNorm(3),  0.0F);
-    assertEquals("norm 4",  1.00F, sim.computeLengthNorm(4),  0.0F);
-    assertEquals("norm 5",  1.00F, sim.computeLengthNorm(5),  0.0F);
-    assertTrue("norm 6 too high: " + sim.computeLengthNorm(6),
-               sim.computeLengthNorm(6) < 1.0F);
+                 computeNorm(sim, 1), computeNorm(sim, 7),  0.0F);
+    assertEquals("norm 3",  1.00F, computeNorm(sim, 3),  0.0F);
+    assertEquals("norm 4",  1.00F, computeNorm(sim, 4),  0.0F);
+    assertEquals("norm 5",  1.00F, computeNorm(sim, 5),  0.0F);
+    assertTrue("norm 6 too high: " + computeNorm(sim, 6),
+               computeNorm(sim, 6) < 1.0F);
    assertTrue("norm 7 higher then norm 6", 
-               sim.computeLengthNorm(7) < sim.computeLengthNorm(6));
-    assertEquals("norm 20", 0.25F, sim.computeLengthNorm(20), 0.0F);
+               computeNorm(sim, 7) < computeNorm(sim, 6));
+    assertEquals("norm 20", 0.25F, computeNorm(sim, 20), 0.0F);
  }

  /** hyperbolic with parameters */
@ -92,16 +137,16 @@ public class TestSweetSpotSimilarityFactory extends BaseSimilarityTestCase {
    assertEquals("MID tf", 3.3F+(7.7F - 3.3F)/2.0F, sim.tf(5), 0.00001F);

    // norms: plateau from 1-5, shallow slope
-    assertEquals("norm 1",  1.00F, sim.computeLengthNorm(1),  0.0F);
-    assertEquals("norm 2",  1.00F, sim.computeLengthNorm(2),  0.0F);
-    assertEquals("norm 3",  1.00F, sim.computeLengthNorm(3),  0.0F);
-    assertEquals("norm 4",  1.00F, sim.computeLengthNorm(4),  0.0F);
-    assertEquals("norm 5",  1.00F, sim.computeLengthNorm(5),  0.0F);
-    assertTrue("norm 6 too high: " + sim.computeLengthNorm(6),
-               sim.computeLengthNorm(6) < 1.0F);
+    assertEquals("norm 1",  1.00F, computeNorm(sim, 1),  0.0F);
+    assertEquals("norm 2",  1.00F, computeNorm(sim, 2),  0.0F);
+    assertEquals("norm 3",  1.00F, computeNorm(sim, 3),  0.0F);
+    assertEquals("norm 4",  1.00F, computeNorm(sim, 4),  0.0F);
+    assertEquals("norm 5",  1.00F, computeNorm(sim, 5),  0.0F);
+    assertTrue("norm 6 too high: " + computeNorm(sim, 6),
+               computeNorm(sim, 6) < 1.0F);
    assertTrue("norm 7 higher then norm 6", 
-               sim.computeLengthNorm(7) < sim.computeLengthNorm(6));
-    assertTrue("norm 20 not high enough: " + sim.computeLengthNorm(20),
-               0.25F < sim.computeLengthNorm(20));
+               computeNorm(sim, 7) < computeNorm(sim, 6));
+    assertTrue("norm 20 not high enough: " + computeNorm(sim, 20),
+               0.25F < computeNorm(sim, 20));
  }
 }