Merge branch 'master' into feature/autoscaling

2025-02-23 18:55:50 +00:00 · 2017-06-06 19:30:00 +05:30 · 2017-06-06 19:30:00 +05:30 · 6a8768e395
commit 6a8768e395
parent ee2be2024e ea79c668c7
1423 changed files with 84567 additions and 62622 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,20 +1,19 @@
 # .
 /eclipse-build
 /classes
-**/build
+build
 /idea-build
-**/dist
-**/lib
-**/test-lib
+dist
+lib
+test-lib
 /*~
 /velocity.log
 /build.properties
 /.idea
 lucene/**/*.iml
-solr/**/*.iml
 parent.iml
-**/*.ipr
-**/*.iws
+*.ipr
+*.iws
 /.project
 /.classpath
 /.settings
@ -22,33 +21,7 @@ parent.iml
 /prj.el
 /bin
 /bin.*
-**/pom.xml
+pom.xml
 /nbproject
 /nb-build
 .pydevproject
-
-/solr/package
-
-# can this be minimized?
-/solr/example/start.jar
-/solr/example/webapps/*
-/solr/example/logs/*.log
-/solr/example/**/data
-/solr/example/solr/lib
-/solr/example/solr/logs
-/solr/example/solr/zoo_data
-/solr/example/work/*
-/solr/example/exampledocs/post.jar
-
-/solr/example/example-DIH/**/data
-/solr/example/example-DIH/**/dataimport.properties
-/solr/example/example-DIH/solr/mail/lib/*.jar
-
-solr/contrib/dataimporthandler/test-lib/
-
-solr/core/test-lib/
-
-solr/server/logs/
-solr/server/solr/zoo_data/
-solr/server/solr-webapp
-solr/server/start.jar
--- a/dev-tools/doap/lucene.rdf
+++ b/dev-tools/doap/lucene.rdf
@ -66,6 +66,13 @@
      </foaf:Person>
    </maintainer>

+    <release>
+      <Version>
+        <name>lucene-6.5.1</name>
+        <created>2017-04-27</created>
+        <revision>6.5.1</revision>
+      </Version>
+    </release>
    <release>
      <Version>
        <name>lucene-6.5.0</name>
--- a/dev-tools/doap/solr.rdf
+++ b/dev-tools/doap/solr.rdf
@ -66,6 +66,13 @@
      </foaf:Person>
    </maintainer>

+    <release>
+      <Version>
+        <name>solr-6.5.1</name>
+        <created>2017-04-27</created>
+        <revision>6.5.1</revision>
+      </Version>
+    </release>
    <release>
      <Version>
        <name>solr-6.5.0</name>
--- a/dev-tools/idea/.idea/libraries/HSQLDB.xml
+++ b/dev-tools/idea/.idea/libraries/HSQLDB.xml
@ -1,7 +1,7 @@
 <component name="libraryTable">
  <library name="HSQLDB">
    <CLASSES>
-      <root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/hsqldb-1.8.0.10.jar!/" />
+      <root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/hsqldb-2.4.0.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
--- a/dev-tools/idea/lucene/classification/classification.iml
+++ b/dev-tools/idea/lucene/classification/classification.iml
@ -16,8 +16,9 @@
    <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
    <orderEntry type="module" module-name="lucene-core" />
    <orderEntry type="module" module-name="queries" />
-    <orderEntry type="module" scope="TEST" module-name="analysis-common" />
+    <orderEntry type="module" module-name="analysis-common" />
    <orderEntry type="module" module-name="grouping" />
    <orderEntry type="module" module-name="misc" />
+    <orderEntry type="module" module-name="sandbox" />
  </component>
 </module>
--- a/dev-tools/idea/solr/core/src/java/solr-core.iml
+++ b/dev-tools/idea/solr/core/src/java/solr-core.iml
@ -32,5 +32,6 @@
    <orderEntry type="module" module-name="join" />
    <orderEntry type="module" module-name="sandbox" />
    <orderEntry type="module" module-name="backward-codecs" />
+    <orderEntry type="module" module-name="codecs" />
  </component>
 </module>
--- a/dev-tools/scripts/buildAndPushRelease.py
+++ b/dev-tools/scripts/buildAndPushRelease.py
@ -97,8 +97,8 @@ def prepare(root, version, gpgKeyID, gpgPassword):
  print('  Check DOAP files')
  checkDOAPfiles(version)

-  print('  ant clean test')
-  run('ant clean test')
+  print('  ant clean test validate documentation-lint')
+  run('ant clean test validate documentation-lint')

  open('rev.txt', mode='wb').write(rev.encode('UTF-8'))
  
--- a/dev-tools/scripts/checkJavaDocs.py
+++ b/dev-tools/scripts/checkJavaDocs.py
@ -296,7 +296,7 @@ def checkSummary(fullPath):
        print()
        print(fullPath)
        printed = True
-      print('  missing: %s' % unescapeHTML(lastHREF))
+      print('  missing description: %s' % unescapeHTML(lastHREF))
      anyMissing = True
    elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1:
      if not printed:
--- a/dev-tools/scripts/checkJavadocLinks.py
+++ b/dev-tools/scripts/checkJavadocLinks.py
@ -266,7 +266,10 @@ def checkAll(dirName):
 if __name__ == '__main__':
  if checkAll(sys.argv[1]):
    print()
-    print('Broken javadocs links were found!')
+    print('Broken javadocs links were found! Common root causes:')
+    # please feel free to add to this list
+    print('* A typo of some sort for manually created links.')
+    print('* Public methods referencing non-public classes in their signature.')
    sys.exit(1)
  sys.exit(0)
  
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@ -707,8 +707,10 @@ def verifyUnpacked(java, project, artifact, unpackPath, gitRevision, version, te
        print('      %s' % line.strip())
      raise RuntimeError('source release has WARs...')

-    print('    run "ant validate"')
-    java.run_java8('ant validate', '%s/validate.log' % unpackPath)
+    # Can't run documentation-lint in lucene src, because dev-tools is missing
+    validateCmd = 'ant validate' if project == 'lucene' else 'ant validate documentation-lint';
+    print('    run "%s"' % validateCmd)
+    java.run_java8(validateCmd, '%s/validate.log' % unpackPath)

    if project == 'lucene':
      print("    run tests w/ Java 8 and testArgs='%s'..." % testArgs)
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -50,16 +50,31 @@ API Changes
 * LUCENE-7701: Grouping collectors have been refactored, such that groups are
  now defined by a GroupSelector implementation. (Alan Woodward)

+* LUCENE-7741: DoubleValuesSource now has an explain() method (Alan Woodward,
+  Adrien Grand)
+
+* LUCENE-7815: Removed the PostingsHighlighter; you should use the UnifiedHighlighter
+  instead, which derived from the UH.  WholeBreakIterator and
+  CustomSeparatorBreakIterator were moved to UH's package. (David Smiley)
+
+* LUCENE-7850: Removed support for legacy numerics. (Adrien Grand)
+
 Bug Fixes

 * LUCENE-7626: IndexWriter will no longer accept broken token offsets
  (Mike McCandless)

+* LUCENE-7859: Spatial-extras PackedQuadPrefixTree bug that only revealed itself
+  with the new pointsOnly optimizations in LUCENE-7845. (David Smiley)
+
 Improvements

 * LUCENE-7489: Better storage of sparse doc-values fields with the default
  codec. (Adrien Grand)

+* LUCENE-7730: More accurate encoding of the length normalization factor
+  thanks to the removal of index-time boosts. (Adrien Grand)
+
 Optimizations

 * LUCENE-7416: BooleanQuery optimizes queries that have queries that occur both
@ -78,6 +93,10 @@ Optimizations
  values using different numbers of bits per value if this proves to save
  storage. (Adrien Grand)

+* LUCENE-7845: Enhance spatial-extras RecursivePrefixTreeStrategy queries when the
+  query is a point (for 2D) or a is a simple date interval (e.g. 1 month).  When
+  the strategy is marked as pointsOnly, the results is a TermQuery. (David Smiley)
+
 Other

 * LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
@ -89,14 +108,76 @@ Other
 * LUCENE-7753: Make fields static when possible.
  (Daniel Jelinski via Adrien Grand)

+* LUCENE-7540: Upgrade ICU to 59.1 (Mike McCandless, Jim Ferenczi)
+
+* LUCENE-7852: Correct copyright year(s) in lucene/LICENSE.txt file.
+  (Christine Poerschke, Steve Rowe)
+
+======================= Lucene 6.7.0 =======================
+
+Other
+
+* LUCENE-7800: Remove code that potentially rethrows checked exceptions 
+  from methods that don't declare them ("sneaky throw" hack). (Robert Muir,
+  Uwe Schindler, Dawid Weiss)
+
+Improvements
+
+* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
+
 ======================= Lucene 6.6.0 =======================

+New Features
+
+* LUCENE-7811: Add a concurrent SortedSet facets implementation.
+  (Mike McCandless)
+
 Bug Fixes

 * LUCENE-7777: ByteBlockPool.readBytes sometimes throws
  ArrayIndexOutOfBoundsException when byte blocks larger than 32 KB
  were added (Mike McCandless)

+* LUCENE-7797: The static FSDirectory.listAll(Path) method was always
+  returning an empty array.  (Atkins Chang via Mike McCandless)
+
+* LUCENE-7481: Fixed missing rewrite methods for SpanPayloadCheckQuery
+  and PayloadScoreQuery. (Erik Hatcher)
+
+* LUCENE-7808: Fixed PayloadScoreQuery and SpanPayloadCheckQuery
+  .equals and .hashCode methods.  (Erik Hatcher)
+
+* LUCENE-7798: Add .equals and .hashCode to ToParentBlockJoinSortField 
+  (Mikhail Khludnev)
+
+* LUCENE-7814: DateRangePrefixTree (in spatial-extras) had edge-case bugs for
+  years >= 292,000,000. (David Smiley)
+
+* LUCENE-5365, LUCENE-7818: Fix incorrect condition in queryparser's
+  QueryNodeOperation#logicalAnd().  (Olivier Binda, Amrit Sarkar,
+  AppChecker via Uwe Schindler)
+  
+* LUCENE-7821: The classic and flexible query parsers, as well as Solr's
+ "lucene"/standard query parser, should require " TO " in range queries,
+  and accept "TO" as endpoints in range queries. (hossman, Steve Rowe)
+
+* LUCENE-7824: Fix graph query analysis for multi-word synonym rules with common terms (eg. new york, new york city).
+  (Jim Ferenczi)
+
+* LUCENE-7817: Pass cached query to onQueryCache instead of null.
+  (Christoph Kaser via Adrien Grand)
+
+* LUCENE-7831: CodecUtil should not seek to negative offsets. (Adrien Grand)
+
+* LUCENE-7833: ToParentBlockJoinQuery computed the min score instead of the max
+  score with ScoreMode.MAX. (Adrien Grand)
+
+* LUCENE-7847: Fixed all-docs-match optimization of range queries on range
+  fields. (Adrien Grand)
+
+* LUCENE-7810: Fix equals() and hashCode() methods of several join queries.
+  (Hossman, Adrien Grand, Martijn van Groningen)
+
 Improvements

 * LUCENE-7782: OfflineSorter now passes the total number of items it
@ -105,6 +186,16 @@ Improvements
 * LUCENE-7785: Move dictionary for Ukrainian analyzer to external dependency.
  (Andriy Rysin via Steve Rowe, Dawid Weiss)

+* LUCENE-7801: SortedSetDocValuesReaderState now implements
+  Accountable so you can see how much RAM it's using (Robert Muir,
+  Mike McCandless)
+
+* LUCENE-7792: OfflineSorter can now run concurrently if you pass it
+  an optional ExecutorService (Dawid Weiss, Mike McCandless)
+
+* LUCENE-7811: Sorted set facets now use sparse storage when
+  collecting hits, when appropriate.  (Mike McCandless)
+
 Optimizations

 * LUCENE-7787: spatial-extras HeatmapFacetCounter will now short-circuit it's
@ -112,6 +203,12 @@ Optimizations

 Other

+* LUCENE-7796: Make IOUtils.reThrow idiom declare Error return type so 
+  callers may use it in a way that compiler knows subsequent code is 
+  unreachable. reThrow is now deprecated in favor of IOUtils.rethrowAlways
+  with a slightly different semantics (see javadoc). (Hossman, Robert Muir, 
+  Dawid Weiss)
+
 * LUCENE-7754: Inner classes should be static whenever possible.
  (Daniel Jelinski via Adrien Grand)

--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@ -74,3 +74,9 @@ collecting TopDocs for each group, but instead takes a GroupReducer that will
 perform any type of reduction on the top groups collected on a first-pass.  To
 reproduce the old behaviour of SecondPassGroupingCollector, you should instead
 use TopGroupsCollector.
+
+## Removed legacy numerics (LUCENE-7850)
+
+Support for legacy numerics has been removed since legacy numerics had been
+deprecated since Lucene 6.0. Points should be used instead, see
+org.apache.lucene.index.PointValues for an introduction.
--- a/lucene/NOTICE.txt
+++ b/lucene/NOTICE.txt
@ -1,5 +1,5 @@
 Apache Lucene
-Copyright 2014 The Apache Software Foundation
+Copyright 2001-2017 The Apache Software Foundation

 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
@ -18,13 +18,13 @@ Some data files (under analysis/icu/src/data) are derived from Unicode data such
 as the Unicode Character Database. See http://unicode.org/copyright.html for more
 details.

-Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is 
+Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is
 BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/

 The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were
 automatically generated with the moman/finenight FSA library, created by
 Jean-Philippe Barrette-LaPierre. This library is available under an MIT license,
-see http://sites.google.com/site/rrettesite/moman and 
+see http://sites.google.com/site/rrettesite/moman and
 http://bitbucket.org/jpbarrette/moman/overview/

 The class org.apache.lucene.util.WeakIdentityMap was derived from
@ -78,7 +78,7 @@ analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.ja
 analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
 analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java

-The Stempel analyzer (stempel) includes BSD-licensed software developed 
+The Stempel analyzer (stempel) includes BSD-licensed software developed
 by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
 and Edmond Nolan.

@ -90,8 +90,8 @@ See http://project.carrot2.org/license.html.
 The SmartChineseAnalyzer source code (smartcn) was
 provided by Xiaoping Gao and copyright 2009 by www.imdict.net.

-WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/) 
-is derived from Unicode data such as the Unicode Character Database. 
+WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/)
+is derived from Unicode data such as the Unicode Character Database.
 See http://unicode.org/copyright.html for more details.

 The Morfologik analyzer (morfologik) includes BSD-licensed software
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
@ -24,6 +24,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.AttributeFactory;

+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
 /**
 * Emits the entire input as a single token.
 */
@ -41,16 +43,16 @@ public final class KeywordTokenizer extends Tokenizer {
  }

  public KeywordTokenizer(int bufferSize) {
-    if (bufferSize <= 0) {
-      throw new IllegalArgumentException("bufferSize must be > 0");
+    if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
+      throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
    }
    termAtt.resizeBuffer(bufferSize);
  }

  public KeywordTokenizer(AttributeFactory factory, int bufferSize) {
    super(factory);
-    if (bufferSize <= 0) {
-      throw new IllegalArgumentException("bufferSize must be > 0");
+    if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
+      throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
    }
    termAtt.resizeBuffer(bufferSize);
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
@ -16,26 +16,39 @@
 */
 package org.apache.lucene.analysis.core;

-
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.util.AttributeFactory;

 import java.util.Map;

+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
 /**
 * Factory for {@link KeywordTokenizer}. 
 * <pre class="prettyprint">
 * &lt;fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"&gt;
 *   &lt;analyzer&gt;
- *     &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
+ *     &lt;tokenizer class="solr.KeywordTokenizerFactory" maxTokenLen="256"/&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre> 
+ *
+ * Options:
+ * <ul>
+ *   <li>maxTokenLen: max token length, should be greater than 0 and less than 
+ *        MAX_TOKEN_LENGTH_LIMIT (1024*1024). It is rare to need to change this
+ *      else {@link KeywordTokenizer}::DEFAULT_BUFFER_SIZE</li>
+ * </ul>
 */
 public class KeywordTokenizerFactory extends TokenizerFactory {
+  private final int maxTokenLen;
  
  /** Creates a new KeywordTokenizerFactory */
  public KeywordTokenizerFactory(Map<String,String> args) {
    super(args);
+    maxTokenLen = getInt(args, "maxTokenLen", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+    if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+      throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+    }
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -43,6 +56,6 @@ public class KeywordTokenizerFactory extends TokenizerFactory {
  
  @Override
  public KeywordTokenizer create(AttributeFactory factory) {
-    return new KeywordTokenizer(factory, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+    return new KeywordTokenizer(factory, maxTokenLen);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
@ -50,6 +50,20 @@ public class LetterTokenizer extends CharTokenizer {
    super(factory);
  }
  
+  /**
+   * Construct a new LetterTokenizer using a given
+   * {@link org.apache.lucene.util.AttributeFactory}.
+   *
+   * @param factory the attribute factory to use for this {@link Tokenizer}
+   * @param maxTokenLen maximum token length the tokenizer will emit. 
+   *        Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+   * @throws IllegalArgumentException if maxTokenLen is invalid.
+
+   */
+  public LetterTokenizer(AttributeFactory factory, int maxTokenLen) {
+    super(factory, maxTokenLen);
+  }
+
  /** Collects only characters which satisfy
   * {@link Character#isLetter(int)}.*/
  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
@ -17,25 +17,40 @@
 package org.apache.lucene.analysis.core;


+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.util.AttributeFactory;

 import java.util.Map;

+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
 /**
 * Factory for {@link LetterTokenizer}. 
 * <pre class="prettyprint">
 * &lt;fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100"&gt;
 *   &lt;analyzer&gt;
- *     &lt;tokenizer class="solr.LetterTokenizerFactory"/&gt;
+ *     &lt;tokenizer class="solr.LetterTokenizerFactory" maxTokenLen="256"/&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
+ *
+ * Options:
+ * <ul>
+ *   <li>maxTokenLen: max token length, must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
+ *       It is rare to need to change this
+ *      else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
+ * </ul>
 */
 public class LetterTokenizerFactory extends TokenizerFactory {
+  private final int maxTokenLen;

  /** Creates a new LetterTokenizerFactory */
  public LetterTokenizerFactory(Map<String,String> args) {
    super(args);
+    maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
+    if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+      throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+    }
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -43,6 +58,6 @@ public class LetterTokenizerFactory extends TokenizerFactory {

  @Override
  public LetterTokenizer create(AttributeFactory factory) {
-    return new LetterTokenizer(factory);
+    return new LetterTokenizer(factory, maxTokenLen);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
@ -50,6 +50,19 @@ public final class LowerCaseTokenizer extends LetterTokenizer {
    super(factory);
  }
  
+  /**
+   * Construct a new LowerCaseTokenizer using a given
+   * {@link org.apache.lucene.util.AttributeFactory}.
+   *
+   * @param factory the attribute factory to use for this {@link Tokenizer}
+   * @param maxTokenLen maximum token length the tokenizer will emit. 
+   *        Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+   * @throws IllegalArgumentException if maxTokenLen is invalid.
+   */
+  public LowerCaseTokenizer(AttributeFactory factory, int maxTokenLen) {
+    super(factory, maxTokenLen);
+  }
+  
  /** Converts char to lower case
   * {@link Character#toLowerCase(int)}.*/
  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.core;


 import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.util.AttributeFactory;
@ -25,20 +26,36 @@ import org.apache.lucene.util.AttributeFactory;
 import java.util.HashMap;
 import java.util.Map;

+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
 /**
- * Factory for {@link LowerCaseTokenizer}. 
+ * Factory for {@link LowerCaseTokenizer}.
 * <pre class="prettyprint">
 * &lt;fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"&gt;
- *   &lt;analyzer&gt;
- *     &lt;tokenizer class="solr.LowerCaseTokenizerFactory"/&gt;
- *   &lt;/analyzer&gt;
+ * &lt;analyzer&gt;
+ * &lt;tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="256"/&gt;
+ * &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
+ * <p>
+ * Options:
+ * <ul>
+ * <li>maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
+ *     It is rare to need to change this
+ * else {@link CharTokenizer}::DEFAULT_MAX_WORD_LEN</li>
+ * </ul>
 */
 public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
-  
-  /** Creates a new LowerCaseTokenizerFactory */
-  public LowerCaseTokenizerFactory(Map<String,String> args) {
+  private final int maxTokenLen;
+
+  /**
+   * Creates a new LowerCaseTokenizerFactory
+   */
+  public LowerCaseTokenizerFactory(Map<String, String> args) {
    super(args);
+    maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
+    if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+      throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+    }
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -46,11 +63,13 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi

  @Override
  public LowerCaseTokenizer create(AttributeFactory factory) {
-    return new LowerCaseTokenizer(factory);
+    return new LowerCaseTokenizer(factory, maxTokenLen);
  }

  @Override
  public AbstractAnalysisFactory getMultiTermComponent() {
-    return new LowerCaseFilterFactory(new HashMap<>(getOriginalArgs()));
+    Map map = new HashMap<>(getOriginalArgs());
+    map.remove("maxTokenLen"); //removing "maxTokenLen" argument for LowerCaseFilterFactory init
+    return new LowerCaseFilterFactory(map);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
@ -58,7 +58,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
 * <ul>
 *  <li><code>wordset</code> - This is the default format, which supports one word per 
 *      line (including any intra-word whitespace) and allows whole line comments 
- *      begining with the "#" character.  Blank lines are ignored.  See 
+ *      beginning with the "#" character.  Blank lines are ignored.  See 
 *      {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
 *  </li>
 *  <li><code>snowball</code> - This format allows for multiple words specified on each 
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java
@ -47,6 +47,19 @@ public final class UnicodeWhitespaceTokenizer extends CharTokenizer {
  public UnicodeWhitespaceTokenizer(AttributeFactory factory) {
    super(factory);
  }
+
+  /**
+   * Construct a new UnicodeWhitespaceTokenizer using a given
+   * {@link org.apache.lucene.util.AttributeFactory}.
+   *
+   * @param factory the attribute factory to use for this {@link Tokenizer}
+   * @param maxTokenLen maximum token length the tokenizer will emit. 
+   *        Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+   * @throws IllegalArgumentException if maxTokenLen is invalid.
+   */
+  public UnicodeWhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) {
+    super(factory, maxTokenLen);
+  }
  
  /** Collects only characters which do not satisfy Unicode's WHITESPACE property. */
  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
@ -46,6 +46,19 @@ public final class WhitespaceTokenizer extends CharTokenizer {
  public WhitespaceTokenizer(AttributeFactory factory) {
    super(factory);
  }
+
+  /**
+   * Construct a new WhitespaceTokenizer using a given
+   * {@link org.apache.lucene.util.AttributeFactory}.
+   *
+   * @param factory the attribute factory to use for this {@link Tokenizer}
+   * @param maxTokenLen maximum token length the tokenizer will emit. 
+   *        Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+   * @throws IllegalArgumentException if maxTokenLen is invalid.
+   */
+  public WhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) {
+    super(factory, maxTokenLen);
+  }
  
  /** Collects only characters which do not satisfy
   * {@link Character#isWhitespace(int)}.*/
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
@ -22,15 +22,18 @@ import java.util.Collection;
 import java.util.Map;

 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.util.AttributeFactory;

+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
 /**
 * Factory for {@link WhitespaceTokenizer}. 
 * <pre class="prettyprint">
 * &lt;fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"&gt;
 *   &lt;analyzer&gt;
- *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory" rule="unicode"/&gt;
+ *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory" rule="unicode"  maxTokenLen="256"/&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
 *
@ -38,6 +41,9 @@ import org.apache.lucene.util.AttributeFactory;
 * <ul>
 *   <li>rule: either "java" for {@link WhitespaceTokenizer}
 *      or "unicode" for {@link UnicodeWhitespaceTokenizer}</li>
+ *   <li>maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
+ *       It is rare to need to change this
+ *      else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
 * </ul>
 */
 public class WhitespaceTokenizerFactory extends TokenizerFactory {
@ -46,13 +52,17 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
  private static final Collection<String> RULE_NAMES = Arrays.asList(RULE_JAVA, RULE_UNICODE);

  private final String rule;
+  private final int maxTokenLen;

  /** Creates a new WhitespaceTokenizerFactory */
  public WhitespaceTokenizerFactory(Map<String,String> args) {
    super(args);

    rule = get(args, "rule", RULE_NAMES, RULE_JAVA);
-
+    maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
+    if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+      throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+    }
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -62,9 +72,9 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
  public Tokenizer create(AttributeFactory factory) {
    switch (rule) {
      case RULE_JAVA:
-        return new WhitespaceTokenizer(factory);
+        return new WhitespaceTokenizer(factory, maxTokenLen);
      case RULE_UNICODE:
-        return new UnicodeWhitespaceTokenizer(factory);
+        return new UnicodeWhitespaceTokenizer(factory, maxTokenLen);
      default:
        throw new AssertionError();
    }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.util.AttributeFactory;

+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
 /**
 * An abstract base class for simple, character-oriented tokenizers.
 * <p>
@ -50,6 +52,7 @@ public abstract class CharTokenizer extends Tokenizer {
   * Creates a new {@link CharTokenizer} instance
   */
  public CharTokenizer() {
+    this.maxTokenLen = DEFAULT_MAX_WORD_LEN;
  }
  
  /**
@ -60,6 +63,23 @@ public abstract class CharTokenizer extends Tokenizer {
   */
  public CharTokenizer(AttributeFactory factory) {
    super(factory);
+    this.maxTokenLen = DEFAULT_MAX_WORD_LEN;
+  }
+  
+  /**
+   * Creates a new {@link CharTokenizer} instance
+   *
+   * @param factory the attribute factory to use for this {@link Tokenizer}
+   * @param maxTokenLen maximum token length the tokenizer will emit. 
+   *        Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+   * @throws IllegalArgumentException if maxTokenLen is invalid.
+   */
+  public CharTokenizer(AttributeFactory factory, int maxTokenLen) {
+    super(factory);
+    if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+      throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+    }
+    this.maxTokenLen = maxTokenLen;
  }
  
  /**
@ -193,9 +213,10 @@ public abstract class CharTokenizer extends Tokenizer {
  }
  
  private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
-  private static final int MAX_WORD_LEN = 255;
+  public static final int DEFAULT_MAX_WORD_LEN = 255;
  private static final int IO_BUFFER_SIZE = 4096;
-  
+  private final int maxTokenLen;
+
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  
@ -256,7 +277,7 @@ public abstract class CharTokenizer extends Tokenizer {
        }
        end += charCount;
        length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
-        if (length >= MAX_WORD_LEN) { // buffer overflow! make sure to check for >= surrogate pair could break == test
+        if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test
          break;
        }
      } else if (length > 0) {           // at non-Letter w/ chars
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java
@ -24,15 +24,15 @@ import org.apache.lucene.util.SparseFixedBitSet;

 /**
 * This file contains unicode properties used by various {@link CharTokenizer}s.
- * The data was created using ICU4J v56.1.0.0
+ * The data was created using ICU4J v59.1.0.0
 * <p>
- * Unicode version: 8.0.0.0
+ * Unicode version: 9.0.0.0
 */
 public final class UnicodeProps {
  private UnicodeProps() {}
  
  /** Unicode version that was used to generate this file: {@value} */
-  public static final String UNICODE_VERSION = "8.0.0.0";
+  public static final String UNICODE_VERSION = "9.0.0.0";
  
  /** Bitset with Unicode WHITESPACE code points. */
  public static final Bits WHITESPACE = createBits(
--- a/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java
+++ b/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java
@ -31,6 +31,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 package org.tartarus.snowball;

+import java.lang.reflect.UndeclaredThrowableException;
+
 import org.apache.lucene.util.ArrayUtil;

 /**
@ -313,8 +315,10 @@ public abstract class SnowballProgram {
          boolean res = false;
          try {
            res = (boolean) w.method.invokeExact(this);
+          } catch (Error | RuntimeException e) {
+            throw e;
          } catch (Throwable e) {
-            rethrow(e);
+            throw new UndeclaredThrowableException(e);
          }
          cursor = c + w.s_size;
          if (res) return w.result;
@ -376,8 +380,10 @@ public abstract class SnowballProgram {
          boolean res = false;
          try {
            res = (boolean) w.method.invokeExact(this);
+          } catch (Error | RuntimeException e) {
+            throw e;
          } catch (Throwable e) {
-            rethrow(e);
+            throw new UndeclaredThrowableException(e);
          }
          cursor = c - w.s_size;
          if (res) return w.result;
@ -485,15 +491,5 @@ extern void debug(struct SN_env * z, int number, int line_count)
    printf("'\n");
 }
 */
-
-    // Hack to rethrow unknown Exceptions from {@link MethodHandle#invoke}:
-    private static void rethrow(Throwable t) {
-      SnowballProgram.<Error>rethrow0(t);
-    }
-    
-    @SuppressWarnings("unchecked")
-    private static <T extends Throwable> void rethrow0(Throwable t) throws T {
-      throw (T) t;
-    }
 };

--- a/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd
+++ b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd
@ -53,7 +53,7 @@
 <!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
     characters as described before, between any two word characters a digit
     in the range 0 to 9 may be specified. The absence of a digit is equivalent
-     to zero. The '.' character is reserved to indicate begining or ending
+     to zero. The '.' character is reserved to indicate beginning or ending
     of words. -->
 <!ELEMENT patterns (#PCDATA)>

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd
@ -54,7 +54,7 @@
 <!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
     characters as described before, between any two word characters a digit
     in the range 0 to 9 may be specified. The absence of a digit is equivalent
-     to zero. The '.' character is reserved to indicate begining or ending
+     to zero. The '.' character is reserved to indicate beginning or ending
     of words. -->
 <!ELEMENT patterns (#PCDATA)>

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordTokenizer.java
@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.core;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.AttributeFactory;
+
+public class TestKeywordTokenizer extends BaseTokenStreamTestCase {
+
+  public void testSimple() throws IOException {
+    StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+    KeywordTokenizer tokenizer = new KeywordTokenizer();
+    tokenizer.setReader(reader);
+    assertTokenStreamContents(tokenizer, new String[]{"Tokenizer \ud801\udc1ctest"});
+  }
+
+  public void testFactory() {
+    Map<String, String> args = new HashMap<>();
+    KeywordTokenizerFactory factory = new KeywordTokenizerFactory(args);
+    AttributeFactory attributeFactory = newAttributeFactory();
+    Tokenizer tokenizer = factory.create(attributeFactory);
+    assertEquals(KeywordTokenizer.class, tokenizer.getClass());
+  }
+
+  private Map<String, String> makeArgs(String... args) {
+    Map<String, String> ret = new HashMap<>();
+    for (int idx = 0; idx < args.length; idx += 2) {
+      ret.put(args[idx], args[idx + 1]);
+    }
+    return ret;
+  }
+
+  public void testParamsFactory() throws IOException {
+    // negative maxTokenLen
+    IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () ->
+        new KeywordTokenizerFactory(makeArgs("maxTokenLen", "-1")));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage());
+
+    // zero maxTokenLen
+    iae = expectThrows(IllegalArgumentException.class, () ->
+        new KeywordTokenizerFactory(makeArgs("maxTokenLen", "0")));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage());
+
+    // Added random param, should throw illegal error
+    iae = expectThrows(IllegalArgumentException.class, () ->
+        new KeywordTokenizerFactory(makeArgs("maxTokenLen", "255", "randomParam", "rValue")));
+    assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage());
+
+    // tokeniser will never split, no matter what is passed, 
+    // but the buffer will not be more than length of the token
+
+    KeywordTokenizerFactory factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "5"));
+    AttributeFactory attributeFactory = newAttributeFactory();
+    Tokenizer tokenizer = factory.create(attributeFactory);
+    StringReader reader = new StringReader("Tokenizertest");
+    tokenizer.setReader(reader);
+    assertTokenStreamContents(tokenizer, new String[]{"Tokenizertest"});
+
+    // tokeniser will never split, no matter what is passed, 
+    // but the buffer will not be more than length of the token
+    factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "2"));
+    attributeFactory = newAttributeFactory();
+    tokenizer = factory.create(attributeFactory);
+    reader = new StringReader("Tokenizer\u00A0test");
+    tokenizer.setReader(reader);
+    assertTokenStreamContents(tokenizer, new String[]{"Tokenizer\u00A0test"});
+  }
+}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java
@ -54,4 +54,55 @@ public class TestUnicodeWhitespaceTokenizer extends BaseTokenStreamTestCase {
    assertEquals(UnicodeWhitespaceTokenizer.class, tokenizer.getClass());
  }

+  private Map<String, String> makeArgs(String... args) {
+    Map<String, String> ret = new HashMap<>();
+    for (int idx = 0; idx < args.length; idx += 2) {
+      ret.put(args[idx], args[idx + 1]);
+    }
+    return ret;
+  }
+
+  public void testParamsFactory() throws IOException {
+    
+
+    // negative maxTokenLen
+    IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () ->
+        new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "-1")));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage());
+
+    // zero maxTokenLen
+    iae = expectThrows(IllegalArgumentException.class, () ->
+        new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "0")));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage());
+
+    // Added random param, should throw illegal error
+    iae = expectThrows(IllegalArgumentException.class, () ->
+        new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "255", "randomParam", "rValue")));
+    assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage());
+
+    // tokeniser will split at 5, Token | izer, no matter what happens 
+    WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "5"));
+    AttributeFactory attributeFactory = newAttributeFactory();
+    Tokenizer tokenizer = factory.create(attributeFactory);
+    StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+    tokenizer.setReader(reader);
+    assertTokenStreamContents(tokenizer, new String[]{"Token", "izer", "\ud801\udc1ctes", "t"});
+
+    // tokeniser will split at 2, To | ke | ni | ze | r, no matter what happens 
+    factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "2"));
+    attributeFactory = newAttributeFactory();
+    tokenizer = factory.create(attributeFactory);
+    reader = new StringReader("Tokenizer\u00A0test");
+    tokenizer.setReader(reader);
+    assertTokenStreamContents(tokenizer, new String[]{"To", "ke", "ni", "ze", "r", "te", "st"});
+
+    // tokeniser will split at 10, no matter what happens, 
+    // but tokens' length are less than that
+    factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "10"));
+    attributeFactory = newAttributeFactory();
+    tokenizer = factory.create(attributeFactory);
+    reader = new StringReader("Tokenizer\u00A0test");
+    tokenizer.setReader(reader);
+    assertTokenStreamContents(tokenizer, new String[]{"Tokenizer", "test"});
+  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
@ -25,8 +25,10 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.LetterTokenizer;
 import org.apache.lucene.analysis.core.LowerCaseTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.util.TestUtil;

@ -89,6 +91,99 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
    tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
    assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
  }
+
+  /*
+   * tests the max word length passed as parameter - tokenizer will split at the passed position char no matter what happens
+   */
+  public void testCustomMaxTokenLength() throws IOException {
+
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < 100; i++) {
+      builder.append("A");
+    }
+    Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 100);
+    // Tricky, passing two copies of the string to the reader....
+    tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
+    assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT), 
+        builder.toString().toLowerCase(Locale.ROOT) });
+
+    Exception e = expectThrows(IllegalArgumentException.class, () ->
+        new LowerCaseTokenizer(newAttributeFactory(), -1));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
+
+    tokenizer = new LetterTokenizer(newAttributeFactory(), 100);
+    tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
+    assertTokenStreamContents(tokenizer, new String[]{builder.toString(), builder.toString()});
+
+
+    // Let's test that we can get a token longer than 255 through.
+    builder.setLength(0);
+    for (int i = 0; i < 500; i++) {
+      builder.append("Z");
+    }
+    tokenizer = new LetterTokenizer(newAttributeFactory(), 500);
+    tokenizer.setReader(new StringReader(builder.toString()));
+    assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
+
+    
+    // Just to be sure what is happening here, token lengths of zero make no sense, 
+    // Let's try the edge cases, token > I/O buffer (4096)
+    builder.setLength(0);
+    for (int i = 0; i < 600; i++) {
+      builder.append("aUrOkIjq"); // 600 * 8 = 4800 chars.
+    }
+
+    e = expectThrows(IllegalArgumentException.class, () ->
+        new LowerCaseTokenizer(newAttributeFactory(), 0));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
+
+    e = expectThrows(IllegalArgumentException.class, () ->
+        new LowerCaseTokenizer(newAttributeFactory(), 10_000_000));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
+
+    tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 4800);
+    tokenizer.setReader(new StringReader(builder.toString()));
+    assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT)});
+
+
+    e = expectThrows(IllegalArgumentException.class, () ->
+        new KeywordTokenizer(newAttributeFactory(), 0));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
+
+    e = expectThrows(IllegalArgumentException.class, () ->
+        new KeywordTokenizer(newAttributeFactory(), 10_000_000));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
+
+
+    tokenizer = new KeywordTokenizer(newAttributeFactory(), 4800);
+    tokenizer.setReader(new StringReader(builder.toString()));
+    assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
+
+    e = expectThrows(IllegalArgumentException.class, () ->
+        new LetterTokenizer(newAttributeFactory(), 0));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
+
+    e = expectThrows(IllegalArgumentException.class, () ->
+        new LetterTokenizer(newAttributeFactory(), 2_000_000));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 2000000", e.getMessage());
+
+    tokenizer = new LetterTokenizer(newAttributeFactory(), 4800);
+    tokenizer.setReader(new StringReader(builder.toString()));
+    assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
+
+    e = expectThrows(IllegalArgumentException.class, () ->
+        new WhitespaceTokenizer(newAttributeFactory(), 0));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
+
+    e = expectThrows(IllegalArgumentException.class, () ->
+        new WhitespaceTokenizer(newAttributeFactory(), 3_000_000));
+    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 3000000", e.getMessage());
+
+    tokenizer = new WhitespaceTokenizer(newAttributeFactory(), 4800);
+    tokenizer.setReader(new StringReader(builder.toString()));
+    assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
+
+  }
  
  /*
   * tests the max word length of 255 with a surrogate pair at position 255
--- a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
+++ b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
@ -168,11 +168,14 @@ FFE3>
 1134D>
 11366..1136C>
 11370..11374>
+11442>
+11446>
 114C2..114C3>
 115BF..115C0>
 1163F>
 116B6..116B7>
 1172B>
+11C3F>
 16AF0..16AF4>
 16F8F..16F9F>
 1D167..1D169>
@ -181,6 +184,8 @@ FFE3>
 1D185..1D18B>
 1D1AA..1D1AD>
 1E8D0..1E8D6>
+1E944..1E946>
+1E948..1E94A>

 # Latin script "composed" that do not further decompose, so decompose here
 # These are from AsciiFoldingFilter
--- a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
+++ b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
@ -510,6 +510,16 @@ ABF9>0039   # MEETEI MAYEK DIGIT NINE
 112F7>0037   # KHUDAWADI DIGIT SEVEN
 112F8>0038   # KHUDAWADI DIGIT EIGHT
 112F9>0039   # KHUDAWADI DIGIT NINE
+11450>0030   # NEWA DIGIT ZERO
+11451>0031   # NEWA DIGIT ONE
+11452>0032   # NEWA DIGIT TWO
+11453>0033   # NEWA DIGIT THREE
+11454>0034   # NEWA DIGIT FOUR
+11455>0035   # NEWA DIGIT FIVE
+11456>0036   # NEWA DIGIT SIX
+11457>0037   # NEWA DIGIT SEVEN
+11458>0038   # NEWA DIGIT EIGHT
+11459>0039   # NEWA DIGIT NINE
 114D0>0030   # TIRHUTA DIGIT ZERO
 114D1>0031   # TIRHUTA DIGIT ONE
 114D2>0032   # TIRHUTA DIGIT TWO
@ -560,6 +570,16 @@ ABF9>0039   # MEETEI MAYEK DIGIT NINE
 118E7>0037   # WARANG CITI DIGIT SEVEN
 118E8>0038   # WARANG CITI DIGIT EIGHT
 118E9>0039   # WARANG CITI DIGIT NINE
+11C50>0030   # BHAIKSUKI DIGIT ZERO
+11C51>0031   # BHAIKSUKI DIGIT ONE
+11C52>0032   # BHAIKSUKI DIGIT TWO
+11C53>0033   # BHAIKSUKI DIGIT THREE
+11C54>0034   # BHAIKSUKI DIGIT FOUR
+11C55>0035   # BHAIKSUKI DIGIT FIVE
+11C56>0036   # BHAIKSUKI DIGIT SIX
+11C57>0037   # BHAIKSUKI DIGIT SEVEN
+11C58>0038   # BHAIKSUKI DIGIT EIGHT
+11C59>0039   # BHAIKSUKI DIGIT NINE
 16A60>0030   # MRO DIGIT ZERO
 16A61>0031   # MRO DIGIT ONE
 16A62>0032   # MRO DIGIT TWO
@ -580,4 +600,14 @@ ABF9>0039   # MEETEI MAYEK DIGIT NINE
 16B57>0037   # PAHAWH HMONG DIGIT SEVEN
 16B58>0038   # PAHAWH HMONG DIGIT EIGHT
 16B59>0039   # PAHAWH HMONG DIGIT NINE
+1E950>0030   # ADLAM DIGIT ZERO
+1E951>0031   # ADLAM DIGIT ONE
+1E952>0032   # ADLAM DIGIT TWO
+1E953>0033   # ADLAM DIGIT THREE
+1E954>0034   # ADLAM DIGIT FOUR
+1E955>0035   # ADLAM DIGIT FIVE
+1E956>0036   # ADLAM DIGIT SIX
+1E957>0037   # ADLAM DIGIT SEVEN
+1E958>0038   # ADLAM DIGIT EIGHT
+1E959>0039   # ADLAM DIGIT NINE

--- a/lucene/analysis/icu/src/data/utr30/nfc.txt
+++ b/lucene/analysis/icu/src/data/utr30/nfc.txt
@ -1,4 +1,4 @@
-# Copyright (C) 1999-2014, International Business Machines
+# Copyright (C) 1999-2016, International Business Machines
 # Corporation and others.  All Rights Reserved.
 #
 # file name: nfc.txt
@ -7,7 +7,7 @@
 #
 # Complete data for Unicode NFC normalization.

-* Unicode 7.0.0
+* Unicode 9.0.0

 # Canonical_Combining_Class (ccc) values
 0300..0314:230
@ -129,6 +129,8 @@
 0825..0827:230
 0829..082D:230
 0859..085B:220
+08D4..08E1:230
+08E3:220
 08E4..08E5:230
 08E6:220
 08E7..08E8:230
@ -232,6 +234,7 @@
 1DCF:220
 1DD0:202
 1DD1..1DF5:230
+1DFB:230
 1DFC:233
 1DFD:220
 1DFE:230
@ -260,7 +263,7 @@
 3099..309A:8
 A66F:230
 A674..A67D:230
-A69F:230
+A69E..A69F:230
 A6F0..A6F1:230
 A806:9
 A8C4:9
@ -280,6 +283,7 @@ ABED:9
 FB1E:26
 FE20..FE26:230
 FE27..FE2D:220
+FE2E..FE2F:230
 101FD:220
 102E0:220
 10376..1037A:230
@ -299,6 +303,7 @@ FE27..FE2D:220
 11133..11134:9
 11173:7
 111C0:9
+111CA:7
 11235:9
 11236:7
 112E9:7
@ -307,6 +312,8 @@ FE27..FE2D:220
 1134D:9
 11366..1136C:230
 11370..11374:230
+11442:9
+11446:7
 114C2:9
 114C3:7
 115BF:9
@ -314,6 +321,8 @@ FE27..FE2D:220
 1163F:9
 116B6:9
 116B7:7
+1172B:9
+11C3F:9
 16AF0..16AF4:1
 16B30..16B36:230
 1BC9E:1
@ -326,7 +335,14 @@ FE27..FE2D:220
 1D18A..1D18B:220
 1D1AA..1D1AD:230
 1D242..1D244:230
+1E000..1E006:230
+1E008..1E018:230
+1E01B..1E021:230
+1E023..1E024:230
+1E026..1E02A:230
 1E8D0..1E8D6:220
+1E944..1E949:230
+1E94A:7

 # Canonical decomposition mappings
 00C0>0041 0300  # one-way: diacritic 0300
--- a/lucene/analysis/icu/src/data/utr30/nfkc.txt
+++ b/lucene/analysis/icu/src/data/utr30/nfkc.txt
@ -1,4 +1,4 @@
-# Copyright (C) 1999-2014, International Business Machines
+# Copyright (C) 1999-2016, International Business Machines
 # Corporation and others.  All Rights Reserved.
 #
 # file name: nfkc.txt
@ -11,7 +11,7 @@
 # to NFKC one-way mappings.
 # Use this file as the second gennorm2 input file after nfc.txt.

-* Unicode 7.0.0
+* Unicode 9.0.0

 00A0>0020
 00A8>0020 0308
@ -3675,6 +3675,7 @@ FFEE>25CB
 1F238>7533
 1F239>5272
 1F23A>55B6
+1F23B>914D
 1F240>3014 672C 3015
 1F241>3014 4E09 3015
 1F242>3014 4E8C 3015
--- a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
+++ b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
@ -1,5 +1,5 @@
 # Unicode Character Database
-# Copyright (c) 1991-2014 Unicode, Inc.
+# Copyright (c) 1991-2016 Unicode, Inc.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 # For documentation, see http://www.unicode.org/reports/tr44/
 #
@ -12,7 +12,7 @@
 # and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
 # Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.

-* Unicode 7.0.0
+* Unicode 9.0.0

 0041>0061
 0042>0062
@ -632,8 +632,22 @@
 10CD>2D2D
 10FC>10DC
 115F..1160>
+13F8>13F0
+13F9>13F1
+13FA>13F2
+13FB>13F3
+13FC>13F4
+13FD>13F5
 17B4..17B5>
 180B..180E>
+1C80>0432
+1C81>0434
+1C82>043E
+1C83>0441
+1C84..1C85>0442
+1C86>044A
+1C87>0463
+1C88>A64B
 1D2C>0061
 1D2D>00E6
 1D2E>0062
@ -2382,14 +2396,99 @@ A7AA>0266
 A7AB>025C
 A7AC>0261
 A7AD>026C
+A7AE>026A
 A7B0>029E
 A7B1>0287
+A7B2>029D
+A7B3>AB53
+A7B4>A7B5
+A7B6>A7B7
 A7F8>0127
 A7F9>0153
 AB5C>A727
 AB5D>AB37
 AB5E>026B
 AB5F>AB52
+AB70>13A0
+AB71>13A1
+AB72>13A2
+AB73>13A3
+AB74>13A4
+AB75>13A5
+AB76>13A6
+AB77>13A7
+AB78>13A8
+AB79>13A9
+AB7A>13AA
+AB7B>13AB
+AB7C>13AC
+AB7D>13AD
+AB7E>13AE
+AB7F>13AF
+AB80>13B0
+AB81>13B1
+AB82>13B2
+AB83>13B3
+AB84>13B4
+AB85>13B5
+AB86>13B6
+AB87>13B7
+AB88>13B8
+AB89>13B9
+AB8A>13BA
+AB8B>13BB
+AB8C>13BC
+AB8D>13BD
+AB8E>13BE
+AB8F>13BF
+AB90>13C0
+AB91>13C1
+AB92>13C2
+AB93>13C3
+AB94>13C4
+AB95>13C5
+AB96>13C6
+AB97>13C7
+AB98>13C8
+AB99>13C9
+AB9A>13CA
+AB9B>13CB
+AB9C>13CC
+AB9D>13CD
+AB9E>13CE
+AB9F>13CF
+ABA0>13D0
+ABA1>13D1
+ABA2>13D2
+ABA3>13D3
+ABA4>13D4
+ABA5>13D5
+ABA6>13D6
+ABA7>13D7
+ABA8>13D8
+ABA9>13D9
+ABAA>13DA
+ABAB>13DB
+ABAC>13DC
+ABAD>13DD
+ABAE>13DE
+ABAF>13DF
+ABB0>13E0
+ABB1>13E1
+ABB2>13E2
+ABB3>13E3
+ABB4>13E4
+ABB5>13E5
+ABB6>13E6
+ABB7>13E7
+ABB8>13E8
+ABB9>13E9
+ABBA>13EA
+ABBB>13EB
+ABBC>13EC
+ABBD>13ED
+ABBE>13EE
+ABBF>13EF
 F900>8C48
 F901>66F4
 F902>8ECA
@ -3766,6 +3865,93 @@ FFF0..FFF8>
 10425>1044D
 10426>1044E
 10427>1044F
+104B0>104D8
+104B1>104D9
+104B2>104DA
+104B3>104DB
+104B4>104DC
+104B5>104DD
+104B6>104DE
+104B7>104DF
+104B8>104E0
+104B9>104E1
+104BA>104E2
+104BB>104E3
+104BC>104E4
+104BD>104E5
+104BE>104E6
+104BF>104E7
+104C0>104E8
+104C1>104E9
+104C2>104EA
+104C3>104EB
+104C4>104EC
+104C5>104ED
+104C6>104EE
+104C7>104EF
+104C8>104F0
+104C9>104F1
+104CA>104F2
+104CB>104F3
+104CC>104F4
+104CD>104F5
+104CE>104F6
+104CF>104F7
+104D0>104F8
+104D1>104F9
+104D2>104FA
+104D3>104FB
+10C80>10CC0
+10C81>10CC1
+10C82>10CC2
+10C83>10CC3
+10C84>10CC4
+10C85>10CC5
+10C86>10CC6
+10C87>10CC7
+10C88>10CC8
+10C89>10CC9
+10C8A>10CCA
+10C8B>10CCB
+10C8C>10CCC
+10C8D>10CCD
+10C8E>10CCE
+10C8F>10CCF
+10C90>10CD0
+10C91>10CD1
+10C92>10CD2
+10C93>10CD3
+10C94>10CD4
+10C95>10CD5
+10C96>10CD6
+10C97>10CD7
+10C98>10CD8
+10C99>10CD9
+10C9A>10CDA
+10C9B>10CDB
+10C9C>10CDC
+10C9D>10CDD
+10C9E>10CDE
+10C9F>10CDF
+10CA0>10CE0
+10CA1>10CE1
+10CA2>10CE2
+10CA3>10CE3
+10CA4>10CE4
+10CA5>10CE5
+10CA6>10CE6
+10CA7>10CE7
+10CA8>10CE8
+10CA9>10CE9
+10CAA>10CEA
+10CAB>10CEB
+10CAC>10CEC
+10CAD>10CED
+10CAE>10CEE
+10CAF>10CEF
+10CB0>10CF0
+10CB1>10CF1
+10CB2>10CF2
 118A0>118C0
 118A1>118C1
 118A2>118C2
@ -4803,6 +4989,40 @@ FFF0..FFF8>
 1D7FD>0037
 1D7FE>0038
 1D7FF>0039
+1E900>1E922
+1E901>1E923
+1E902>1E924
+1E903>1E925
+1E904>1E926
+1E905>1E927
+1E906>1E928
+1E907>1E929
+1E908>1E92A
+1E909>1E92B
+1E90A>1E92C
+1E90B>1E92D
+1E90C>1E92E
+1E90D>1E92F
+1E90E>1E930
+1E90F>1E931
+1E910>1E932
+1E911>1E933
+1E912>1E934
+1E913>1E935
+1E914>1E936
+1E915>1E937
+1E916>1E938
+1E917>1E939
+1E918>1E93A
+1E919>1E93B
+1E91A>1E93C
+1E91B>1E93D
+1E91C>1E93E
+1E91D>1E93F
+1E91E>1E940
+1E91F>1E941
+1E920>1E942
+1E921>1E943
 1EE00>0627
 1EE01>0628
 1EE02>062C
@ -5067,6 +5287,7 @@ FFF0..FFF8>
 1F238>7533
 1F239>5272
 1F23A>55B6
+1F23B>914D
 1F240>3014 672C 3015
 1F241>3014 4E09 3015
 1F242>3014 4E8C 3015
--- a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk
+++ b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk
--- a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk
+++ b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk
--- a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
+++ b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
--- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java
+++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java
@ -53,7 +53,14 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase {
        new String[] { "我", "购买", "了", "道具", "和", "服装" }
    );
  }
-  
+
+  public void testTraditionalChinese() throws Exception {
+    assertAnalyzesTo(a, "我購買了道具和服裝。",
+        new String[] { "我", "購買", "了", "道具", "和", "服裝"});
+    assertAnalyzesTo(a, "定義切分字串的基本單位是訂定分詞標準的首要工作", // From http://godel.iis.sinica.edu.tw/CKIP/paper/wordsegment_standard.pdf
+        new String[] { "定義", "切", "分", "字串", "的", "基本", "單位", "是", "訂定", "分詞", "標準", "的", "首要", "工作" });
+  }
+
  public void testChineseNumerics() throws Exception {
    assertAnalyzesTo(a, "９４８３", new String[] { "９４８３" });
    assertAnalyzesTo(a, "院內分機９４８３。",
--- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
+++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
@ -63,7 +63,7 @@ import java.util.regex.Pattern;
 public class GenerateUTR30DataFiles {
  private static final String ICU_SVN_TAG_URL
      = "http://source.icu-project.org/repos/icu/icu/tags";
-  private static final String ICU_RELEASE_TAG = "release-54-1";
+  private static final String ICU_RELEASE_TAG = "release-58-1";
  private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2";
  private static final String NFC_TXT = "nfc.txt";
  private static final String NFKC_TXT = "nfkc.txt";
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
@ -116,6 +116,8 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
    // ignored characters
    builder.add("\u0301", "");
    builder.add("\u00AD", "");
+    builder.add("ґ", "г");
+    builder.add("Ґ", "Г");

    NormalizeCharMap normMap = builder.build();
    reader = new MappingCharFilter(normMap, reader);
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
@ -52,10 +52,17 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase {
  public void testCapsTokenStream() throws Exception {
    Analyzer a = new UkrainianMorfologikAnalyzer();
    assertAnalyzesTo(a, "Цих Чайковського і Ґете.",
-                     new String[] { "Чайковське", "Чайковський", "Ґете" });
+                     new String[] { "Чайковське", "Чайковський", "Гете" });
    a.close();
  }

+  public void testCharNormalization() throws Exception {
+    Analyzer a = new UkrainianMorfologikAnalyzer();
+    assertAnalyzesTo(a, "Ґюмрі та Гюмрі.",
+                     new String[] { "Гюмрі", "Гюмрі" });
+    a.close();
+  }
+  
  public void testSampleSentence() throws Exception {
    Analyzer a = new UkrainianMorfologikAnalyzer();
    assertAnalyzesTo(a, "Це — проект генерування словника з тегами частин мови для української мови.",
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -60,10 +60,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.legacy.LegacyIntField;
-import org.apache.lucene.legacy.LegacyLongField;
-import org.apache.lucene.legacy.LegacyNumericRangeQuery;
-import org.apache.lucene.legacy.LegacyNumericUtils;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
@ -299,7 +295,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
    "6.4.2-cfs",
    "6.4.2-nocfs",
    "6.5.0-cfs",
-    "6.5.0-nocfs"
+    "6.5.0-nocfs",
+    "6.5.1-cfs",
+    "6.5.1-nocfs"
  };
  
  final String[] unsupportedNames = {
@ -1112,9 +1110,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
    doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
    doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
    doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2));
-    // add numeric fields, to test if flex preserves encoding
-    doc.add(new LegacyIntField("trieInt", id, Field.Store.NO));
-    doc.add(new LegacyLongField("trieLong", (long) id, Field.Store.NO));

    // add docvalues fields
    doc.add(new NumericDocValuesField("dvByte", (byte) id));
@ -1292,51 +1287,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
    }
  }
  
-  public void testNumericFields() throws Exception {
-    for (String name : oldNames) {
-      
-      Directory dir = oldIndexDirs.get(name);
-      IndexReader reader = DirectoryReader.open(dir);
-      IndexSearcher searcher = newSearcher(reader);
-      
-      for (int id=10; id<15; id++) {
-        ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs;
-        assertEquals("wrong number of hits", 1, hits.length);
-        Document d = searcher.doc(hits[0].doc);
-        assertEquals(String.valueOf(id), d.get("id"));
-        
-        hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs;
-        assertEquals("wrong number of hits", 1, hits.length);
-        d = searcher.doc(hits[0].doc);
-        assertEquals(String.valueOf(id), d.get("id"));
-      }
-      
-      // check that also lower-precision fields are ok
-      ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs;
-      assertEquals("wrong number of hits", 34, hits.length);
-      
-      hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs;
-      assertEquals("wrong number of hits", 34, hits.length);
-      
-      // check decoding of terms
-      Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "trieInt");
-      TermsEnum termsEnum = LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
-      while (termsEnum.next() != null) {
-        int val = LegacyNumericUtils.prefixCodedToInt(termsEnum.term());
-        assertTrue("value in id bounds", val >= 0 && val < 35);
-      }
-      
-      terms = MultiFields.getTerms(searcher.getIndexReader(), "trieLong");
-      termsEnum = LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
-      while (termsEnum.next() != null) {
-        long val = LegacyNumericUtils.prefixCodedToLong(termsEnum.term());
-        assertTrue("value in id bounds", val >= 0L && val < 35L);
-      }
-      
-      reader.close();
-    }
-  }
-  
  private int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) throws IOException {
    final SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
    if (VERBOSE) {
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.5.1-cfs.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.5.1-cfs.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.5.1-nocfs.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.5.1-nocfs.zip
--- a/lucene/benchmark/conf/highlighters-postings.alg
+++ b/lucene/benchmark/conf/highlighters-postings.alg
@ -38,7 +38,7 @@ file.query.maker.file=conf/query-terms.txt
 log.queries=false
 log.step.SearchTravRetHighlight=-1

-highlighter=HlImpl:NONE:SH_A:UH_A:PH_P:UH_P:UH_PV
+highlighter=HlImpl:NONE:SH_A:UH_A:UH_P:UH_PV

 { "Populate"
        CreateIndex
@ -60,6 +60,6 @@ highlighter=HlImpl:NONE:SH_A:UH_A:PH_P:UH_P:UH_PV
        CloseReader

        NewRound
-} : 6
+} : 5

 RepSumByPrefRound HL
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
@ -42,7 +42,6 @@ import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.TokenSources;
-import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
 import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
 import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
@ -133,8 +132,6 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
      case "UH_P": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS); break;
      case "UH_PV": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS_WITH_TERM_VECTORS); break;

-      case "PH_P": hlImpl = new PostingsHLImpl(); break;
-
      default: throw new Exception("unrecognized highlighter type: " + type + " (try 'UH')");
    }
  }
@ -224,33 +221,6 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
    return clone;
  }

-  private class PostingsHLImpl implements HLImpl {
-    PostingsHighlighter highlighter;
-    String[] fields = hlFields.toArray(new String[hlFields.size()]);
-    int[] maxPassages;
-    PostingsHLImpl() {
-      highlighter = new PostingsHighlighter(maxDocCharsToAnalyze) {
-        @Override
-        protected Analyzer getIndexAnalyzer(String field) { // thus support wildcards
-          return analyzer;
-        }
-
-        @Override
-        protected BreakIterator getBreakIterator(String field) {
-          return BreakIterator.getSentenceInstance(Locale.ENGLISH);
-        }
-      };
-      maxPassages = new int[hlFields.size()];
-      Arrays.fill(maxPassages, maxFrags);
-    }
-
-    @Override
-    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
-      Map<String, String[]> result = highlighter.highlightFields(fields, q, searcher, hits, maxPassages);
-      preventOptimizeAway = result.size();
-    }
-  }
-
  private class UnifiedHLImpl implements HLImpl {
    UnifiedHighlighter highlighter;
    IndexSearcher lastSearcher;
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt
--- a/lucene/classification/build.xml
+++ b/lucene/classification/build.xml
@ -28,6 +28,8 @@
    <path refid="base.classpath"/>
    <pathelement path="${queries.jar}"/>
    <pathelement path="${grouping.jar}"/>
+    <pathelement path="${sandbox.jar}"/>
+    <pathelement path="${analyzers-common.jar}"/>
  </path>

  <path id="test.classpath">
@ -36,16 +38,18 @@
    <path refid="test.base.classpath"/>
  </path>

-  <target name="compile-core" depends="jar-grouping,jar-queries,jar-analyzers-common,common.compile-core" />
+  <target name="compile-core" depends="jar-sandbox,jar-grouping,jar-queries,jar-analyzers-common,common.compile-core" />

  <target name="jar-core" depends="common.jar-core" />

-  <target name="javadocs" depends="javadocs-grouping,compile-core,check-javadocs-uptodate"
+  <target name="javadocs" depends="javadocs-sandbox,javadocs-grouping,compile-core,check-javadocs-uptodate"
          unless="javadocs-uptodate-${name}">
    <invoke-module-javadoc>
      <links>
        <link href="../queries"/>
+        <link href="../analyzers-common"/>
        <link href="../grouping"/>
+        <link href="../sandbox"/>
      </links>
    </invoke-module-javadoc>
  </target>
--- a/lucene/classification/src/java/org/apache/lucene/classification/BM25NBClassifier.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/BM25NBClassifier.java
@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.classification;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A classifier approximating naive bayes classifier by using pure queries on BM25.
+ *
+ * @lucene.experimental
+ */
+public class BM25NBClassifier implements Classifier<BytesRef> {
+
+  /**
+   * {@link IndexReader} used to access the {@link Classifier}'s
+   * index
+   */
+  private final IndexReader indexReader;
+
+  /**
+   * names of the fields to be used as input text
+   */
+  private final String[] textFieldNames;
+
+  /**
+   * name of the field to be used as a class / category output
+   */
+  private final String classFieldName;
+
+  /**
+   * {@link Analyzer} to be used for tokenizing unseen input text
+   */
+  private final Analyzer analyzer;
+
+  /**
+   * {@link IndexSearcher} to run searches on the index for retrieving frequencies
+   */
+  private final IndexSearcher indexSearcher;
+
+  /**
+   * {@link Query} used to eventually filter the document set to be used to classify
+   */
+  private final Query query;
+
+  /**
+   * Creates a new NaiveBayes classifier.
+   *
+   * @param indexReader    the reader on the index to be used for classification
+   * @param analyzer       an {@link Analyzer} used to analyze unseen text
+   * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
+   *                       if all the indexed docs should be used
+   * @param classFieldName the name of the field used as the output for the classifier NOTE: must not be havely analyzed
+   *                       as the returned class will be a token indexed for this field
+   * @param textFieldNames the name of the fields used as the inputs for the classifier, NO boosting supported per field
+   */
+  public BM25NBClassifier(IndexReader indexReader, Analyzer analyzer, Query query, String classFieldName, String... textFieldNames) {
+    this.indexReader = indexReader;
+    this.indexSearcher = new IndexSearcher(this.indexReader);
+    this.indexSearcher.setSimilarity(new BM25Similarity());
+    this.textFieldNames = textFieldNames;
+    this.classFieldName = classFieldName;
+    this.analyzer = analyzer;
+    this.query = query;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
+    return assignClassNormalizedList(inputDocument).get(0);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public List<ClassificationResult<BytesRef>> getClasses(String text) throws IOException {
+    List<ClassificationResult<BytesRef>> assignedClasses = assignClassNormalizedList(text);
+    Collections.sort(assignedClasses);
+    return assignedClasses;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public List<ClassificationResult<BytesRef>> getClasses(String text, int max) throws IOException {
+    List<ClassificationResult<BytesRef>> assignedClasses = assignClassNormalizedList(text);
+    Collections.sort(assignedClasses);
+    return assignedClasses.subList(0, max);
+  }
+
+  /**
+   * Calculate probabilities for all classes for a given input text
+   *
+   * @param inputDocument the input text as a {@code String}
+   * @return a {@code List} of {@code ClassificationResult}, one for each existing class
+   * @throws IOException if assigning probabilities fails
+   */
+  private List<ClassificationResult<BytesRef>> assignClassNormalizedList(String inputDocument) throws IOException {
+    List<ClassificationResult<BytesRef>> assignedClasses = new ArrayList<>();
+
+    Terms classes = MultiFields.getTerms(indexReader, classFieldName);
+    TermsEnum classesEnum = classes.iterator();
+    BytesRef next;
+    String[] tokenizedText = tokenize(inputDocument);
+    while ((next = classesEnum.next()) != null) {
+      if (next.length > 0) {
+        Term term = new Term(this.classFieldName, next);
+        assignedClasses.add(new ClassificationResult<>(term.bytes(), calculateLogPrior(term) + calculateLogLikelihood(tokenizedText, term)));
+      }
+    }
+
+    return normClassificationResults(assignedClasses);
+  }
+
+  /**
+   * Normalize the classification results based on the max score available
+   *
+   * @param assignedClasses the list of assigned classes
+   * @return the normalized results
+   */
+  private ArrayList<ClassificationResult<BytesRef>> normClassificationResults(List<ClassificationResult<BytesRef>> assignedClasses) {
+    // normalization; the values transforms to a 0-1 range
+    ArrayList<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
+    if (!assignedClasses.isEmpty()) {
+      Collections.sort(assignedClasses);
+      // this is a negative number closest to 0 = a
+      double smax = assignedClasses.get(0).getScore();
+
+      double sumLog = 0;
+      // log(sum(exp(x_n-a)))
+      for (ClassificationResult<BytesRef> cr : assignedClasses) {
+        // getScore-smax <=0 (both negative, smax is the smallest abs()
+        sumLog += Math.exp(cr.getScore() - smax);
+      }
+      // loga=a+log(sum(exp(x_n-a))) = log(sum(exp(x_n)))
+      double loga = smax;
+      loga += Math.log(sumLog);
+
+      // 1/sum*x = exp(log(x))*1/sum = exp(log(x)-log(sum))
+      for (ClassificationResult<BytesRef> cr : assignedClasses) {
+        double scoreDiff = cr.getScore() - loga;
+        returnList.add(new ClassificationResult<>(cr.getAssignedClass(), Math.exp(scoreDiff)));
+      }
+    }
+    return returnList;
+  }
+
+  /**
+   * tokenize a <code>String</code> on this classifier's text fields and analyzer
+   *
+   * @param text the <code>String</code> representing an input text (to be classified)
+   * @return a <code>String</code> array of the resulting tokens
+   * @throws IOException if tokenization fails
+   */
+  private String[] tokenize(String text) throws IOException {
+    Collection<String> result = new LinkedList<>();
+    for (String textFieldName : textFieldNames) {
+      try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, text)) {
+        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
+        tokenStream.reset();
+        while (tokenStream.incrementToken()) {
+          result.add(charTermAttribute.toString());
+        }
+        tokenStream.end();
+      }
+    }
+    return result.toArray(new String[result.size()]);
+  }
+
+  private double calculateLogLikelihood(String[] tokens, Term term) throws IOException {
+    double result = 0d;
+    for (String word : tokens) {
+      result += Math.log(getTermProbForClass(term, word));
+    }
+    return result;
+  }
+
+  private double getTermProbForClass(Term classTerm, String... words) throws IOException {
+    BooleanQuery.Builder builder = new BooleanQuery.Builder();
+    builder.add(new BooleanClause(new TermQuery(classTerm), BooleanClause.Occur.MUST));
+    for (String textFieldName : textFieldNames) {
+      for (String word : words) {
+        builder.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
+      }
+    }
+    if (query != null) {
+      builder.add(query, BooleanClause.Occur.MUST);
+    }
+    TopDocs search = indexSearcher.search(builder.build(), 1);
+    return search.totalHits > 0 ? search.getMaxScore() : 1;
+  }
+
+  private double calculateLogPrior(Term term) throws IOException {
+    TermQuery termQuery = new TermQuery(term);
+    BooleanQuery.Builder bq = new BooleanQuery.Builder();
+    bq.add(termQuery, BooleanClause.Occur.MUST);
+    if (query != null) {
+      bq.add(query, BooleanClause.Occur.MUST);
+    }
+    TopDocs topDocs = indexSearcher.search(bq.build(), 1);
+    return topDocs.totalHits > 0 ? Math.log(topDocs.getMaxScore()) : 0;
+  }
+
+}
--- a/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java
@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.classification;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A k-Nearest Neighbor classifier based on {@link FuzzyLikeThisQuery}.
+ *
+ * @lucene.experimental
+ */
+public class KNearestFuzzyClassifier implements Classifier<BytesRef> {
+
+  /**
+   * the name of the fields used as the input text
+   */
+  protected final String[] textFieldNames;
+
+  /**
+   * the name of the field used as the output text
+   */
+  protected final String classFieldName;
+
+  /**
+   * an {@link IndexSearcher} used to perform queries
+   */
+  protected final IndexSearcher indexSearcher;
+
+  /**
+   * the no. of docs to compare in order to find the nearest neighbor to the input text
+   */
+  protected final int k;
+
+  /**
+   * a {@link Query} used to filter the documents that should be used from this classifier's underlying {@link LeafReader}
+   */
+  protected final Query query;
+  private final Analyzer analyzer;
+
+  /**
+   * Creates a {@link KNearestFuzzyClassifier}.
+   *
+   * @param indexReader    the reader on the index to be used for classification
+   * @param analyzer       an {@link Analyzer} used to analyze unseen text
+   * @param similarity     the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
+   *                       (defaults to {@link BM25Similarity})
+   * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
+   *                       if all the indexed docs should be used
+   * @param k              the no. of docs to select in the MLT results to find the nearest neighbor
+   * @param classFieldName the name of the field used as the output for the classifier
+   * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
+   */
+  public KNearestFuzzyClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k,
+                                 String classFieldName, String... textFieldNames) {
+    this.textFieldNames = textFieldNames;
+    this.classFieldName = classFieldName;
+    this.analyzer = analyzer;
+    this.indexSearcher = new IndexSearcher(indexReader);
+    if (similarity != null) {
+      this.indexSearcher.setSimilarity(similarity);
+    } else {
+      this.indexSearcher.setSimilarity(new BM25Similarity());
+    }
+    this.query = query;
+    this.k = k;
+  }
+
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
+    TopDocs knnResults = knnSearch(text);
+    List<ClassificationResult<BytesRef>> assignedClasses = buildListFromTopDocs(knnResults);
+    ClassificationResult<BytesRef> assignedClass = null;
+    double maxscore = -Double.MAX_VALUE;
+    for (ClassificationResult<BytesRef> cl : assignedClasses) {
+      if (cl.getScore() > maxscore) {
+        assignedClass = cl;
+        maxscore = cl.getScore();
+      }
+    }
+    return assignedClass;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public List<ClassificationResult<BytesRef>> getClasses(String text) throws IOException {
+    TopDocs knnResults = knnSearch(text);
+    List<ClassificationResult<BytesRef>> assignedClasses = buildListFromTopDocs(knnResults);
+    Collections.sort(assignedClasses);
+    return assignedClasses;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public List<ClassificationResult<BytesRef>> getClasses(String text, int max) throws IOException {
+    TopDocs knnResults = knnSearch(text);
+    List<ClassificationResult<BytesRef>> assignedClasses = buildListFromTopDocs(knnResults);
+    Collections.sort(assignedClasses);
+    return assignedClasses.subList(0, max);
+  }
+
+  private TopDocs knnSearch(String text) throws IOException {
+    BooleanQuery.Builder bq = new BooleanQuery.Builder();
+    FuzzyLikeThisQuery fuzzyLikeThisQuery = new FuzzyLikeThisQuery(300, analyzer);
+    for (String fieldName : textFieldNames) {
+      fuzzyLikeThisQuery.addTerms(text, fieldName, 1f, 2); // TODO: make this parameters configurable
+    }
+    bq.add(fuzzyLikeThisQuery, BooleanClause.Occur.MUST);
+    Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
+    bq.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
+    if (query != null) {
+      bq.add(query, BooleanClause.Occur.MUST);
+    }
+    return indexSearcher.search(bq.build(), k);
+  }
+
+  /**
+   * build a list of classification results from search results
+   *
+   * @param topDocs the search results as a {@link TopDocs} object
+   * @return a {@link List} of {@link ClassificationResult}, one for each existing class
+   * @throws IOException if it's not possible to get the stored value of class field
+   */
+  protected List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException {
+    Map<BytesRef, Integer> classCounts = new HashMap<>();
+    Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
+    float maxScore = topDocs.getMaxScore();
+    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
+      IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName);
+      if (storableField != null) {
+        BytesRef cl = new BytesRef(storableField.stringValue());
+        //update count
+        Integer count = classCounts.get(cl);
+        if (count != null) {
+          classCounts.put(cl, count + 1);
+        } else {
+          classCounts.put(cl, 1);
+        }
+        //update boost, the boost is based on the best score
+        Double totalBoost = classBoosts.get(cl);
+        double singleBoost = scoreDoc.score / maxScore;
+        if (totalBoost != null) {
+          classBoosts.put(cl, totalBoost + singleBoost);
+        } else {
+          classBoosts.put(cl, singleBoost);
+        }
+      }
+    }
+    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
+    List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
+    int sumdoc = 0;
+    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
+      Integer count = entry.getValue();
+      Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
+      temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
+      sumdoc += count;
+    }
+
+    //correction
+    if (sumdoc < k) {
+      for (ClassificationResult<BytesRef> cr : temporaryList) {
+        returnList.add(new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
+      }
+    } else {
+      returnList = temporaryList;
+    }
+    return returnList;
+  }
+
+  @Override
+  public String toString() {
+    return "KNearestFuzzyClassifier{" +
+        "textFieldNames=" + Arrays.toString(textFieldNames) +
+        ", classFieldName='" + classFieldName + '\'' +
+        ", k=" + k +
+        ", query=" + query +
+        ", similarity=" + indexSearcher.getSimilarity(true) +
+        '}';
+  }
+}
--- a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
@ -121,7 +121,7 @@ public class DatasetSplitter {
      int b = 0;

      // iterate over existing documents
-      for (GroupDocs group : topGroups.groups) {
+      for (GroupDocs<Object> group : topGroups.groups) {
        int totalHits = group.totalHits;
        double testSize = totalHits * testRatio;
        int tc = 0;
--- a/lucene/classification/src/test/org/apache/lucene/classification/BM25NBClassifierTest.java
+++ b/lucene/classification/src/test/org/apache/lucene/classification/BM25NBClassifierTest.java
@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.classification;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
+import org.apache.lucene.analysis.reverse.ReverseStringFilter;
+import org.apache.lucene.classification.utils.ConfusionMatrixGenerator;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.BytesRef;
+import org.junit.Test;
+
+/**
+ * Tests for {@link BM25NBClassifier}
+ */
+public class BM25NBClassifierTest extends ClassificationTestBase<BytesRef> {
+
+  @Test
+  public void testBasicUsage() throws Exception {
+    LeafReader leafReader = null;
+    try {
+      MockAnalyzer analyzer = new MockAnalyzer(random());
+      leafReader = getSampleIndex(analyzer);
+      BM25NBClassifier classifier = new BM25NBClassifier(leafReader, analyzer, null, categoryFieldName, textFieldName);
+      checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
+    } finally {
+      if (leafReader != null) {
+        leafReader.close();
+      }
+    }
+  }
+
+  @Test
+  public void testBasicUsageWithQuery() throws Exception {
+    LeafReader leafReader = null;
+    try {
+      MockAnalyzer analyzer = new MockAnalyzer(random());
+      leafReader = getSampleIndex(analyzer);
+      TermQuery query = new TermQuery(new Term(textFieldName, "not"));
+      BM25NBClassifier classifier = new BM25NBClassifier(leafReader, analyzer, query, categoryFieldName, textFieldName);
+      checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
+    } finally {
+      if (leafReader != null) {
+        leafReader.close();
+      }
+    }
+  }
+
+  @Test
+  public void testNGramUsage() throws Exception {
+    LeafReader leafReader = null;
+    try {
+      Analyzer analyzer = new NGramAnalyzer();
+      leafReader = getSampleIndex(analyzer);
+      BM25NBClassifier classifier = new BM25NBClassifier(leafReader, analyzer, null, categoryFieldName, textFieldName);
+      checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
+    } finally {
+      if (leafReader != null) {
+        leafReader.close();
+      }
+    }
+  }
+
+  private class NGramAnalyzer extends Analyzer {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName) {
+      final Tokenizer tokenizer = new KeywordTokenizer();
+      return new TokenStreamComponents(tokenizer, new ReverseStringFilter(new EdgeNGramTokenFilter(new ReverseStringFilter(tokenizer), 10, 20)));
+    }
+  }
+
+  @Test
+  public void testPerformance() throws Exception {
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    LeafReader leafReader = getRandomIndex(analyzer, 100);
+    try {
+      long trainStart = System.currentTimeMillis();
+      BM25NBClassifier classifier = new BM25NBClassifier(leafReader,
+          analyzer, null, categoryFieldName, textFieldName);
+      long trainEnd = System.currentTimeMillis();
+      long trainTime = trainEnd - trainStart;
+      assertTrue("training took more than 10s: " + trainTime / 1000 + "s", trainTime < 10000);
+
+      long evaluationStart = System.currentTimeMillis();
+      ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(leafReader,
+          classifier, categoryFieldName, textFieldName, -1);
+      assertNotNull(confusionMatrix);
+      long evaluationEnd = System.currentTimeMillis();
+      long evaluationTime = evaluationEnd - evaluationStart;
+      assertTrue("evaluation took more than 2m: " + evaluationTime / 1000 + "s", evaluationTime < 120000);
+      double avgClassificationTime = confusionMatrix.getAvgClassificationTime();
+      assertTrue("avg classification time: " + avgClassificationTime, 5000 > avgClassificationTime);
+
+      double f1 = confusionMatrix.getF1Measure();
+      assertTrue(f1 >= 0d);
+      assertTrue(f1 <= 1d);
+
+      double accuracy = confusionMatrix.getAccuracy();
+      assertTrue(accuracy >= 0d);
+      assertTrue(accuracy <= 1d);
+
+      double recall = confusionMatrix.getRecall();
+      assertTrue(recall >= 0d);
+      assertTrue(recall <= 1d);
+
+      double precision = confusionMatrix.getPrecision();
+      assertTrue(precision >= 0d);
+      assertTrue(precision <= 1d);
+
+      Terms terms = MultiFields.getTerms(leafReader, categoryFieldName);
+      TermsEnum iterator = terms.iterator();
+      BytesRef term;
+      while ((term = iterator.next()) != null) {
+        String s = term.utf8ToString();
+        recall = confusionMatrix.getRecall(s);
+        assertTrue(recall >= 0d);
+        assertTrue(recall <= 1d);
+        precision = confusionMatrix.getPrecision(s);
+        assertTrue(precision >= 0d);
+        assertTrue(precision <= 1d);
+        double f1Measure = confusionMatrix.getF1Measure(s);
+        assertTrue(f1Measure >= 0d);
+        assertTrue(f1Measure <= 1d);
+      }
+
+    } finally {
+      leafReader.close();
+    }
+
+  }
+
+}
--- a/lucene/classification/src/test/org/apache/lucene/classification/KNearestFuzzyClassifierTest.java
+++ b/lucene/classification/src/test/org/apache/lucene/classification/KNearestFuzzyClassifierTest.java
@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.classification;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.classification.utils.ConfusionMatrixGenerator;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.BytesRef;
+import org.junit.Test;
+
+/**
+ * Testcase for {@link KNearestFuzzyClassifier}
+ */
+public class KNearestFuzzyClassifierTest extends ClassificationTestBase<BytesRef> {
+
+  @Test
+  public void testBasicUsage() throws Exception {
+    LeafReader leafReader = null;
+    try {
+      MockAnalyzer analyzer = new MockAnalyzer(random());
+      leafReader = getSampleIndex(analyzer);
+      Classifier<BytesRef> classifier = new KNearestFuzzyClassifier(leafReader, null, analyzer, null, 3, categoryFieldName, textFieldName);
+      checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
+      checkCorrectClassification(classifier, POLITICS_INPUT, POLITICS_RESULT);
+    } finally {
+      if (leafReader != null) {
+        leafReader.close();
+      }
+    }
+  }
+
+  @Test
+  public void testBasicUsageWithQuery() throws Exception {
+    LeafReader leafReader = null;
+    try {
+      MockAnalyzer analyzer = new MockAnalyzer(random());
+      leafReader = getSampleIndex(analyzer);
+      TermQuery query = new TermQuery(new Term(textFieldName, "not"));
+      Classifier<BytesRef> classifier = new KNearestFuzzyClassifier(leafReader, null, analyzer, query, 3, categoryFieldName, textFieldName);
+      checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
+    } finally {
+      if (leafReader != null) {
+        leafReader.close();
+      }
+    }
+  }
+
+  @Test
+  public void testPerformance() throws Exception {
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    LeafReader leafReader = getRandomIndex(analyzer, 100);
+    try {
+      long trainStart = System.currentTimeMillis();
+      Classifier<BytesRef> classifier = new KNearestFuzzyClassifier(leafReader, null, analyzer, null, 3, categoryFieldName, textFieldName);
+      long trainEnd = System.currentTimeMillis();
+      long trainTime = trainEnd - trainStart;
+      assertTrue("training took more than 10s: " + trainTime / 1000 + "s", trainTime < 10000);
+
+      long evaluationStart = System.currentTimeMillis();
+      ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(leafReader,
+          classifier, categoryFieldName, textFieldName, -1);
+      assertNotNull(confusionMatrix);
+      long evaluationEnd = System.currentTimeMillis();
+      long evaluationTime = evaluationEnd - evaluationStart;
+      assertTrue("evaluation took more than 2m: " + evaluationTime / 1000 + "s", evaluationTime < 120000);
+      double avgClassificationTime = confusionMatrix.getAvgClassificationTime();
+      assertTrue(5000 > avgClassificationTime);
+      double accuracy = confusionMatrix.getAccuracy();
+      assertTrue(accuracy >= 0d);
+      assertTrue(accuracy <= 1d);
+
+      double recall = confusionMatrix.getRecall();
+      assertTrue(recall >= 0d);
+      assertTrue(recall <= 1d);
+
+      double precision = confusionMatrix.getPrecision();
+      assertTrue(precision >= 0d);
+      assertTrue(precision <= 1d);
+
+      Terms terms = MultiFields.getTerms(leafReader, categoryFieldName);
+      TermsEnum iterator = terms.iterator();
+      BytesRef term;
+      while ((term = iterator.next()) != null) {
+        String s = term.utf8ToString();
+        recall = confusionMatrix.getRecall(s);
+        assertTrue(recall >= 0d);
+        assertTrue(recall <= 1d);
+        precision = confusionMatrix.getPrecision(s);
+        assertTrue(precision >= 0d);
+        assertTrue(precision <= 1d);
+        double f1Measure = confusionMatrix.getF1Measure(s);
+        assertTrue(f1Measure >= 0d);
+        assertTrue(f1Measure <= 1d);
+      }
+    } finally {
+      leafReader.close();
+    }
+  }
+
+}
--- a/lucene/classification/src/test/org/apache/lucene/classification/utils/ConfusionMatrixGeneratorTest.java
+++ b/lucene/classification/src/test/org/apache/lucene/classification/utils/ConfusionMatrixGeneratorTest.java
@ -21,11 +21,13 @@ import java.io.IOException;
 import java.util.List;

 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.classification.BM25NBClassifier;
 import org.apache.lucene.classification.BooleanPerceptronClassifier;
 import org.apache.lucene.classification.CachingNaiveBayesClassifier;
 import org.apache.lucene.classification.ClassificationResult;
 import org.apache.lucene.classification.ClassificationTestBase;
 import org.apache.lucene.classification.Classifier;
+import org.apache.lucene.classification.KNearestFuzzyClassifier;
 import org.apache.lucene.classification.KNearestNeighborClassifier;
 import org.apache.lucene.classification.SimpleNaiveBayesClassifier;
 import org.apache.lucene.index.LeafReader;
@ -94,22 +96,43 @@ public class ConfusionMatrixGeneratorTest extends ClassificationTestBase<Object>
      Classifier<BytesRef> classifier = new SimpleNaiveBayesClassifier(reader, analyzer, null, categoryFieldName, textFieldName);
      ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
          classifier, categoryFieldName, textFieldName, -1);
-      assertNotNull(confusionMatrix);
-      assertNotNull(confusionMatrix.getLinearizedMatrix());
-      assertEquals(7, confusionMatrix.getNumberOfEvaluatedDocs());
-      assertTrue(confusionMatrix.getAvgClassificationTime() >= 0d);
-      double accuracy = confusionMatrix.getAccuracy();
-      assertTrue(accuracy >= 0d);
-      assertTrue(accuracy <= 1d);
-      double precision = confusionMatrix.getPrecision();
-      assertTrue(precision >= 0d);
-      assertTrue(precision <= 1d);
-      double recall = confusionMatrix.getRecall();
-      assertTrue(recall >= 0d);
-      assertTrue(recall <= 1d);
-      double f1Measure = confusionMatrix.getF1Measure();
-      assertTrue(f1Measure >= 0d);
-      assertTrue(f1Measure <= 1d);
+      checkCM(confusionMatrix);
+    } finally {
+      if (reader != null) {
+        reader.close();
+      }
+    }
+  }
+
+  private void checkCM(ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix) {
+    assertNotNull(confusionMatrix);
+    assertNotNull(confusionMatrix.getLinearizedMatrix());
+    assertEquals(7, confusionMatrix.getNumberOfEvaluatedDocs());
+    assertTrue(confusionMatrix.getAvgClassificationTime() >= 0d);
+    double accuracy = confusionMatrix.getAccuracy();
+    assertTrue(accuracy >= 0d);
+    assertTrue(accuracy <= 1d);
+    double precision = confusionMatrix.getPrecision();
+    assertTrue(precision >= 0d);
+    assertTrue(precision <= 1d);
+    double recall = confusionMatrix.getRecall();
+    assertTrue(recall >= 0d);
+    assertTrue(recall <= 1d);
+    double f1Measure = confusionMatrix.getF1Measure();
+    assertTrue(f1Measure >= 0d);
+    assertTrue(f1Measure <= 1d);
+  }
+
+  @Test
+  public void testGetConfusionMatrixWithBM25NB() throws Exception {
+    LeafReader reader = null;
+    try {
+      MockAnalyzer analyzer = new MockAnalyzer(random());
+      reader = getSampleIndex(analyzer);
+      Classifier<BytesRef> classifier = new BM25NBClassifier(reader, analyzer, null, categoryFieldName, textFieldName);
+      ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
+          classifier, categoryFieldName, textFieldName, -1);
+      checkCM(confusionMatrix);
    } finally {
      if (reader != null) {
        reader.close();
@ -126,22 +149,7 @@ public class ConfusionMatrixGeneratorTest extends ClassificationTestBase<Object>
      Classifier<BytesRef> classifier = new CachingNaiveBayesClassifier(reader, analyzer, null, categoryFieldName, textFieldName);
      ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
          classifier, categoryFieldName, textFieldName, -1);
-      assertNotNull(confusionMatrix);
-      assertNotNull(confusionMatrix.getLinearizedMatrix());
-      assertEquals(7, confusionMatrix.getNumberOfEvaluatedDocs());
-      assertTrue(confusionMatrix.getAvgClassificationTime() >= 0d);
-      double accuracy = confusionMatrix.getAccuracy();
-      assertTrue(accuracy >= 0d);
-      assertTrue(accuracy <= 1d);
-      double precision = confusionMatrix.getPrecision();
-      assertTrue(precision >= 0d);
-      assertTrue(precision <= 1d);
-      double recall = confusionMatrix.getRecall();
-      assertTrue(recall >= 0d);
-      assertTrue(recall <= 1d);
-      double f1Measure = confusionMatrix.getF1Measure();
-      assertTrue(f1Measure >= 0d);
-      assertTrue(f1Measure <= 1d);
+      checkCM(confusionMatrix);
    } finally {
      if (reader != null) {
        reader.close();
@ -158,22 +166,24 @@ public class ConfusionMatrixGeneratorTest extends ClassificationTestBase<Object>
      Classifier<BytesRef> classifier = new KNearestNeighborClassifier(reader, null, analyzer, null, 1, 0, 0, categoryFieldName, textFieldName);
      ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
          classifier, categoryFieldName, textFieldName, -1);
-      assertNotNull(confusionMatrix);
-      assertNotNull(confusionMatrix.getLinearizedMatrix());
-      assertEquals(7, confusionMatrix.getNumberOfEvaluatedDocs());
-      assertTrue(confusionMatrix.getAvgClassificationTime() >= 0d);
-      double accuracy = confusionMatrix.getAccuracy();
-      assertTrue(accuracy >= 0d);
-      assertTrue(accuracy <= 1d);
-      double precision = confusionMatrix.getPrecision();
-      assertTrue(precision >= 0d);
-      assertTrue(precision <= 1d);
-      double recall = confusionMatrix.getRecall();
-      assertTrue(recall >= 0d);
-      assertTrue(recall <= 1d);
-      double f1Measure = confusionMatrix.getF1Measure();
-      assertTrue(f1Measure >= 0d);
-      assertTrue(f1Measure <= 1d);
+      checkCM(confusionMatrix);
+    } finally {
+      if (reader != null) {
+        reader.close();
+      }
+    }
+  }
+
+  @Test
+  public void testGetConfusionMatrixWithFLTKNN() throws Exception {
+    LeafReader reader = null;
+    try {
+      MockAnalyzer analyzer = new MockAnalyzer(random());
+      reader = getSampleIndex(analyzer);
+      Classifier<BytesRef> classifier = new KNearestFuzzyClassifier(reader, null, analyzer, null, 1, categoryFieldName, textFieldName);
+      ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
+          classifier, categoryFieldName, textFieldName, -1);
+      checkCM(confusionMatrix);
    } finally {
      if (reader != null) {
        reader.close();
@ -190,22 +200,7 @@ public class ConfusionMatrixGeneratorTest extends ClassificationTestBase<Object>
      Classifier<Boolean> classifier = new BooleanPerceptronClassifier(reader, analyzer, null, 1, null, booleanFieldName, textFieldName);
      ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
          classifier, booleanFieldName, textFieldName, -1);
-      assertNotNull(confusionMatrix);
-      assertNotNull(confusionMatrix.getLinearizedMatrix());
-      assertEquals(7, confusionMatrix.getNumberOfEvaluatedDocs());
-      assertTrue(confusionMatrix.getAvgClassificationTime() >= 0d);
-      double accuracy = confusionMatrix.getAccuracy();
-      assertTrue(accuracy >= 0d);
-      assertTrue(accuracy <= 1d);
-      double precision = confusionMatrix.getPrecision();
-      assertTrue(precision >= 0d);
-      assertTrue(precision <= 1d);
-      double recall = confusionMatrix.getRecall();
-      assertTrue(recall >= 0d);
-      assertTrue(recall <= 1d);
-      double f1Measure = confusionMatrix.getF1Measure();
-      assertTrue(f1Measure >= 0d);
-      assertTrue(f1Measure <= 1d);
+      checkCM(confusionMatrix);
      assertTrue(confusionMatrix.getPrecision("true") >= 0d);
      assertTrue(confusionMatrix.getPrecision("true") <= 1d);
      assertTrue(confusionMatrix.getPrecision("false") >= 0d);
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
@ -877,7 +877,7 @@ final class SimpleTextBKDWriter implements Closeable {
        };
      }

-      OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc) {
+      OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc, null, 0) {

          /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
          @Override
@ -1170,7 +1170,8 @@ final class SimpleTextBKDWriter implements Closeable {

  /** Called on exception, to check whether the checksum is also corrupt in this source, and add that
   *  information (checksum matched or didn't) as a suppressed exception. */
-  private void verifyChecksum(Throwable priorException, PointWriter writer) throws IOException {
+  private Error verifyChecksum(Throwable priorException, PointWriter writer) throws IOException {
+    assert priorException != null;
    // TODO: we could improve this, to always validate checksum as we recurse, if we shared left and
    // right reader after recursing to children, and possibly within recursed children,
    // since all together they make a single pass through the file.  But this is a sizable re-org,
@ -1181,10 +1182,10 @@ final class SimpleTextBKDWriter implements Closeable {
      try (ChecksumIndexInput in = tempDir.openChecksumInput(tempFileName, IOContext.READONCE)) {
        CodecUtil.checkFooter(in, priorException);
      }
-    } else {
-      // We are reading from heap; nothing to add:
-      IOUtils.reThrow(priorException);
    }
+
+    // We are reading from heap; nothing to add:
+    throw IOUtils.rethrowAlways(priorException);
  }

  /** Marks bits for the ords (points) that belong in the right sub tree (those docs that have values >= the splitValue). */
@ -1206,7 +1207,7 @@ final class SimpleTextBKDWriter implements Closeable {
        reader.markOrds(rightCount-1, ordBitSet);
      }
    } catch (Throwable t) {
-      verifyChecksum(t, source.writer);
+      throw verifyChecksum(t, source.writer);
    }

    return scratch1;
@ -1255,10 +1256,7 @@ final class SimpleTextBKDWriter implements Closeable {
      }
      return new PathSlice(writer, 0, count);
    } catch (Throwable t) {
-      verifyChecksum(t, source.writer);
-
-      // Dead code but javac disagrees:
-      return null;
+      throw verifyChecksum(t, source.writer);
    }
  }

@ -1564,7 +1562,7 @@ final class SimpleTextBKDWriter implements Closeable {
          leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
          rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);
        } catch (Throwable t) {
-          verifyChecksum(t, slices[dim].writer);
+          throw verifyChecksum(t, slices[dim].writer);
        }
      }

--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@ -331,6 +331,9 @@ public final class CodecUtil {
  /** Retrieves the full footer from the provided {@link IndexInput}.  This throws
   *  {@link CorruptIndexException} if this file does not have a valid footer. */
  public static byte[] readFooter(IndexInput in) throws IOException {
+    if (in.length() < footerLength()) {
+      throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), in);
+    }
    in.seek(in.length() - footerLength());
    validateFooter(in);
    in.seek(in.length() - footerLength());
@ -467,7 +470,7 @@ public final class CodecUtil {
        // catch-all for things that shouldn't go wrong (e.g. OOM during readInt) but could...
        priorException.addSuppressed(new CorruptIndexException("checksum status indeterminate: unexpected exception", in, t));
      }
-      IOUtils.reThrow(priorException);
+      throw IOUtils.rethrowAlways(priorException);
    }
  }
  
@ -516,6 +519,9 @@ public final class CodecUtil {
    clone.seek(0);
    ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
    assert in.getFilePointer() == 0;
+    if (in.length() < footerLength()) {
+      throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), input);
+    }
    in.seek(in.length() - footerLength());
    return checkFooter(in);
  }
--- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java
@ -112,6 +112,7 @@ abstract class RangeFieldQuery extends Query {
  public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {
      final RangeFieldComparator target = new RangeFieldComparator();
+
      private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
        values.intersect(
@ -133,25 +134,29 @@ abstract class RangeFieldQuery extends Query {
              }
              @Override
              public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
-                byte[] node = getInternalRange(minPackedValue, maxPackedValue);
-                // compute range relation for BKD traversal
-                if (target.intersects(node) == false) {
-                  return Relation.CELL_OUTSIDE_QUERY;
-                } else if (target.within(node)) {
-                  // target within cell; continue traversing:
-                  return Relation.CELL_CROSSES_QUERY;
-                } else if (target.contains(node)) {
-                  // target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
-                  return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ?
-                      Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
-                }
-                // target intersects cell; continue traversing:
-                return Relation.CELL_CROSSES_QUERY;
+                return compareRange(minPackedValue, maxPackedValue);
              }
            });
        return result.build();
      }

+      private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
+        byte[] node = getInternalRange(minPackedValue, maxPackedValue);
+        // compute range relation for BKD traversal
+        if (target.intersects(node) == false) {
+          return Relation.CELL_OUTSIDE_QUERY;
+        } else if (target.within(node)) {
+          // target within cell; continue traversing:
+          return Relation.CELL_CROSSES_QUERY;
+        } else if (target.contains(node)) {
+          // target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
+          return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ?
+              Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
+        }
+        // target intersects cell; continue traversing:
+        return Relation.CELL_CROSSES_QUERY;
+      }
+
      @Override
      public Scorer scorer(LeafReaderContext context) throws IOException {
        LeafReader reader = context.reader();
@ -166,17 +171,10 @@ abstract class RangeFieldQuery extends Query {
          return null;
        }
        checkFieldInfo(fieldInfo);
-        boolean allDocsMatch = true;
-        if (values.getDocCount() == reader.maxDoc()) {
-          // if query crosses, docs need to be further scrutinized
-          byte[] range = getInternalRange(values.getMinPackedValue(), values.getMaxPackedValue());
-          // if the internal node is not equal and not contained by the query, all docs do not match
-          if (queryType == QueryType.CROSSES || (!Arrays.equals(ranges, range)
-              && (target.contains(range) == false || queryType != QueryType.WITHIN))) {
-            allDocsMatch = false;
-          }
-        } else {
-          allDocsMatch = false;
+        boolean allDocsMatch = false;
+        if (values.getDocCount() == reader.maxDoc()
+            && compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
+          allDocsMatch = true;
        }

        DocIdSetIterator iterator = allDocsMatch == true ?
--- a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
@ -463,8 +463,9 @@ class BufferedUpdatesStream implements Accountable {
    }

    if (success) {
-      // Does nothing if firstExc is null:
-      IOUtils.reThrow(firstExc);
+      if (firstExc != null) {
+        throw IOUtils.rethrowAlways(firstExc);
+      }
    }

    if (infoStream.isEnabled("BD")) {
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -529,7 +529,7 @@ public final class CheckIndex implements Closeable {
      sis = SegmentInfos.readCommit(dir, lastSegmentsFile);
    } catch (Throwable t) {
      if (failFast) {
-        IOUtils.reThrow(t);
+        throw IOUtils.rethrowAlways(t);
      }
      msg(infoStream, "ERROR: could not read any segments file in directory");
      result.missingSegments = true;
@ -565,11 +565,12 @@ public final class CheckIndex implements Closeable {
      input = dir.openInput(segmentsFileName, IOContext.READONCE);
    } catch (Throwable t) {
      if (failFast) {
-        IOUtils.reThrow(t);
+        throw IOUtils.rethrowAlways(t);
      }
      msg(infoStream, "ERROR: could not open segments file in directory");
-      if (infoStream != null)
+      if (infoStream != null) {
        t.printStackTrace(infoStream);
+      }
      result.cantOpenSegments = true;
      return result;
    }
@ -577,11 +578,12 @@ public final class CheckIndex implements Closeable {
      /*int format =*/ input.readInt();
    } catch (Throwable t) {
      if (failFast) {
-        IOUtils.reThrow(t);
+        throw IOUtils.rethrowAlways(t);
      }
      msg(infoStream, "ERROR: could not read segment file version in directory");
-      if (infoStream != null)
+      if (infoStream != null) {
        t.printStackTrace(infoStream);
+      }
      result.missingSegmentVersion = true;
      return result;
    } finally {
@ -789,7 +791,7 @@ public final class CheckIndex implements Closeable {

      } catch (Throwable t) {
        if (failFast) {
-          IOUtils.reThrow(t);
+          throw IOUtils.rethrowAlways(t);
        }
        msg(infoStream, "FAILED");
        String comment;
@ -883,7 +885,7 @@ public final class CheckIndex implements Closeable {
        msg(infoStream, String.format(Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime()-startNS)));
      } catch (Throwable e) {
        if (failFast) {
-          IOUtils.reThrow(e);
+          throw IOUtils.rethrowAlways(e);
        }
        msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
        status.error = e;
@ -941,7 +943,7 @@ public final class CheckIndex implements Closeable {
      
    } catch (Throwable e) {
      if (failFast) {
-        IOUtils.reThrow(e);
+        throw IOUtils.rethrowAlways(e);
      }
      msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
      status.error = e;
@ -974,7 +976,7 @@ public final class CheckIndex implements Closeable {
      status.totFields = fieldInfos.size();
    } catch (Throwable e) {
      if (failFast) {
-        IOUtils.reThrow(e);
+        throw IOUtils.rethrowAlways(e);
      }
      msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
      status.error = e;
@ -1013,7 +1015,7 @@ public final class CheckIndex implements Closeable {
      msg(infoStream, String.format(Locale.ROOT, "OK [%d fields] [took %.3f sec]", status.totFields, nsToSec(System.nanoTime()-startNS)));
    } catch (Throwable e) {
      if (failFast) {
-        IOUtils.reThrow(e);
+        throw IOUtils.rethrowAlways(e);
      }
      msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
      status.error = e;
@ -1769,7 +1771,7 @@ public final class CheckIndex implements Closeable {
      status = checkFields(fields, reader.getLiveDocs(), maxDoc, fieldInfos, true, false, infoStream, verbose, version);
    } catch (Throwable e) {
      if (failFast) {
-        IOUtils.reThrow(e);
+        throw IOUtils.rethrowAlways(e);
      }
      msg(infoStream, "ERROR: " + e);
      status = new Status.TermIndexStatus();
@ -1845,7 +1847,7 @@ public final class CheckIndex implements Closeable {

    } catch (Throwable e) {
      if (failFast) {
-        IOUtils.reThrow(e);
+        throw IOUtils.rethrowAlways(e);
      }
      msg(infoStream, "ERROR: " + e);
      status.error = e;
@ -2079,7 +2081,7 @@ public final class CheckIndex implements Closeable {
                                    nsToSec(System.nanoTime() - startNS)));
    } catch (Throwable e) {
      if (failFast) {
-        IOUtils.reThrow(e);
+        throw IOUtils.rethrowAlways(e);
      }
      msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
      status.error = e;
@ -2126,7 +2128,7 @@ public final class CheckIndex implements Closeable {
                                    nsToSec(System.nanoTime()-startNS)));
    } catch (Throwable e) {
      if (failFast) {
-        IOUtils.reThrow(e);
+        throw IOUtils.rethrowAlways(e);
      }
      msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
      status.error = e;
@ -2567,7 +2569,7 @@ public final class CheckIndex implements Closeable {
                                    status.totVectors, vectorAvg, nsToSec(System.nanoTime() - startNS)));
    } catch (Throwable e) {
      if (failFast) {
-        IOUtils.reThrow(e);
+        throw IOUtils.rethrowAlways(e);
      }
      msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
      status.error = e;
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@ -603,7 +603,7 @@ final class DefaultIndexingChain extends DocConsumer {
      // PerField.invert to allow for later downgrading of the index options:
      fi.setIndexOptions(fieldType.indexOptions());
      
-      fp = new PerField(fi, invert);
+      fp = new PerField(docWriter.getIndexCreatedVersionMajor(), fi, invert);
      fp.next = fieldHash[hashPos];
      fieldHash[hashPos] = fp;
      totalFieldCount++;
@ -633,6 +633,7 @@ final class DefaultIndexingChain extends DocConsumer {
  /** NOTE: not static: accesses at least docState, termsHash. */
  private final class PerField implements Comparable<PerField> {

+    final int indexCreatedVersionMajor;
    final FieldInfo fieldInfo;
    final Similarity similarity;

@ -659,7 +660,8 @@ final class DefaultIndexingChain extends DocConsumer {
    // reused
    TokenStream tokenStream;

-    public PerField(FieldInfo fieldInfo, boolean invert) {
+    public PerField(int indexCreatedVersionMajor, FieldInfo fieldInfo, boolean invert) {
+      this.indexCreatedVersionMajor = indexCreatedVersionMajor;
      this.fieldInfo = fieldInfo;
      similarity = docState.similarity;
      if (invert) {
@ -668,7 +670,7 @@ final class DefaultIndexingChain extends DocConsumer {
    }

    void setInvertState() {
-      invertState = new FieldInvertState(fieldInfo.name);
+      invertState = new FieldInvertState(indexCreatedVersionMajor, fieldInfo.name);
      termsHashPerField = termsHash.addField(invertState, fieldInfo);
      if (fieldInfo.omitsNorms() == false) {
        assert norms == null;
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@ -193,6 +193,10 @@ class DocumentsWriterPerThread {
    return fieldInfos;
  }

+  public int getIndexCreatedVersionMajor() {
+    return indexWriter.segmentInfos.getIndexCreatedVersionMajor();
+  }
+
  final void testPoint(String message) {
    if (enableTestPoints) {
      assert infoStream.isEnabled("TP"); // don't enable unless you need them.
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java
@ -31,7 +31,8 @@ import org.apache.lucene.util.AttributeSource;
 * @lucene.experimental
 */
 public final class FieldInvertState {
-  String name;
+  final int indexCreatedVersionMajor;
+  final String name;
  int position;
  int length;
  int numOverlap;
@ -50,14 +51,15 @@ public final class FieldInvertState {

  /** Creates {code FieldInvertState} for the specified
   *  field name. */
-  public FieldInvertState(String name) {
+  public FieldInvertState(int indexCreatedVersionMajor, String name) {
+    this.indexCreatedVersionMajor = indexCreatedVersionMajor;
    this.name = name;
  }
  
  /** Creates {code FieldInvertState} for the specified
   *  field name and values for all fields. */
-  public FieldInvertState(String name, int position, int length, int numOverlap, int offset) {
-    this.name = name;
+  public FieldInvertState(int indexCreatedVersionMajor, String name, int position, int length, int numOverlap, int offset) {
+    this(indexCreatedVersionMajor, name);
    this.position = position;
    this.length = length;
    this.numOverlap = numOverlap;
@ -164,4 +166,11 @@ public final class FieldInvertState {
  public String getName() {
    return name;
  }
+
+  /**
+   * Return the version that was used to create the index, or 6 if it was created before 7.0.
+   */
+  public int getIndexCreatedVersionMajor() {
+    return indexCreatedVersionMajor;
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
@ -364,7 +364,7 @@ final class IndexFileDeleter implements Closeable {
   * Remove the CommitPoints in the commitsToDelete List by
   * DecRef'ing all files from each SegmentInfos.
   */
-  private void deleteCommits() {
+  private void deleteCommits() throws IOException {

    int size = commitsToDelete.size();

@ -388,8 +388,9 @@ final class IndexFileDeleter implements Closeable {
      }
      commitsToDelete.clear();

-      // NOTE: does nothing if firstThrowable is null
-      IOUtils.reThrowUnchecked(firstThrowable);
+      if (firstThrowable != null) {
+        throw IOUtils.rethrowAlways(firstThrowable);
+      }

      // Now compact commits to remove deleted ones (preserving the sort):
      size = commits.size();
@ -599,8 +600,9 @@ final class IndexFileDeleter implements Closeable {
      }
    }

-    // NOTE: does nothing if firstThrowable is null
-    IOUtils.reThrow(firstThrowable);
+    if (firstThrowable != null) {
+      throw IOUtils.rethrowAlways(firstThrowable);
+    }
  }

  /** Decrefs all provided files, ignoring any exceptions hit; call this if
--- a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
@ -144,7 +144,9 @@ public abstract class IndexReader implements Closeable {
  // overridden by StandardDirectoryReader and SegmentReader
  void notifyReaderClosedListeners(Throwable th) throws IOException {
    // nothing to notify in the base impl, just rethrow
-    IOUtils.reThrow(th);
+    if (th != null) {
+      throw IOUtils.rethrowAlways(th);
+    }
  }

  private void reportCloseToParentReaders() {
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@ -611,7 +611,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
          }
        } catch (Throwable t) {
          if (doSave) {
-            IOUtils.reThrow(t);
+            throw IOUtils.rethrowAlways(t);
          } else if (priorE == null) {
            priorE = t;
          }
@ -631,14 +631,16 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
          rld.dropReaders();
        } catch (Throwable t) {
          if (doSave) {
-            IOUtils.reThrow(t);
+            throw IOUtils.rethrowAlways(t);
          } else if (priorE == null) {
            priorE = t;
          }
        }
      }
      assert readerMap.size() == 0;
-      IOUtils.reThrow(priorE);
+      if (priorE != null) {
+        throw IOUtils.rethrowAlways(priorE);
+      }
    }

    /**
@ -3330,7 +3332,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
      if (commitCompleted) {
        tragicEvent(t, "finishCommit");
      } else {
-        IOUtils.reThrow(t);
+        throw IOUtils.rethrowAlways(t);
      }
    }

@ -3898,7 +3900,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
        throw (MergePolicy.MergeAbortedException) t;
      }
    } else {
-      IOUtils.reThrow(t);
+      assert t != null;
+      throw IOUtils.rethrowAlways(t);
    }
  }

@ -4238,8 +4241,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
    }
    
    // If any error occurred, throw it.
-    if (!suppressExceptions) {
-      IOUtils.reThrow(th);
+    if (!suppressExceptions && th != null) {
+      throw IOUtils.rethrowAlways(th);
    }
  }

@ -4815,7 +4818,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
      // It's possible you could have a really bad day
      if (this.tragedy != null) {
        // Another thread is already dealing / has dealt with the tragedy:
-        IOUtils.reThrow(tragedy);
+        throw IOUtils.rethrowAlways(tragedy);
      }

      this.tragedy = tragedy;
@ -4826,7 +4829,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
      rollbackInternal();
    }

-    IOUtils.reThrow(tragedy);
+    throw IOUtils.rethrowAlways(tragedy);
  }

  /** If this {@code IndexWriter} was closed as a side-effect of a tragic exception,
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
@ -210,7 +210,10 @@ final class SegmentCoreReaders {
          }
        }
      }
-      IOUtils.reThrow(th);
+      
+      if (th != null) {
+        throw IOUtils.rethrowAlways(th);
+      }
    }
  }

--- a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
@ -90,8 +90,9 @@ final class SegmentDocValues {
        }
      }
    }
+
    if (t != null) {
-      IOUtils.reThrow(t);
+      throw IOUtils.rethrowAlways(t);
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
@ -303,7 +303,10 @@ public final class SegmentReader extends CodecReader {
          }
        }
      }
-      IOUtils.reThrow(th);
+      
+      if (th != null) {
+        IOUtils.rethrowAlways(th);
+      }
    }
  }

--- a/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
@ -391,7 +391,9 @@ public final class StandardDirectoryReader extends DirectoryReader {
    }

    // throw the first exception
-    IOUtils.reThrow(firstExc);
+    if (firstExc != null) {
+      throw IOUtils.rethrowAlways(firstExc);
+    }
  }

  @Override
@ -504,7 +506,10 @@ public final class StandardDirectoryReader extends DirectoryReader {
          }
        }
      }
-      IOUtils.reThrow(th);
+      
+      if (th != null) {
+        throw IOUtils.rethrowAlways(th);
+      }
    }
  }

--- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java
@ -58,6 +58,16 @@ public abstract class DoubleValuesSource {
   */
  public abstract boolean needsScores();

+  /**
+   * An explanation of the value for the named document.
+   *
+   * @param ctx the readers context to create the {@link Explanation} for.
+   * @param docId the document's id relative to the given context's reader
+   * @return an Explanation for the value
+   * @throws IOException if an {@link IOException} occurs
+   */
+  public abstract Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation) throws IOException;
+
  /**
   * Create a sort field based on the value of this producer
   * @param reverse true if the sort should be decreasing
@ -149,6 +159,11 @@ public abstract class DoubleValuesSource {
    public boolean needsScores() {
      return true;
    }
+
+    @Override
+    public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation) {
+      return scoreExplanation;
+    }
  };

  /**
@ -176,6 +191,11 @@ public abstract class DoubleValuesSource {
        return false;
      }

+      @Override
+      public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation) {
+        return Explanation.match((float) value, "constant(" + value + ")");
+      }
+
      @Override
      public String toString() {
        return "constant(" + value + ")";
@ -186,7 +206,7 @@ public abstract class DoubleValuesSource {
  /**
   * Creates a DoubleValuesSource that is a function of another DoubleValuesSource
   */
-  public static DoubleValuesSource function(DoubleValuesSource in, DoubleUnaryOperator function) {
+  public static DoubleValuesSource function(DoubleValuesSource in, String description, DoubleUnaryOperator function) {
    return new DoubleValuesSource() {
      @Override
      public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
@ -208,15 +228,22 @@ public abstract class DoubleValuesSource {
      public boolean needsScores() {
        return in.needsScores();
      }
+
+      @Override
+      public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation) throws IOException {
+        Explanation inner = in.explain(ctx, docId, scoreExplanation);
+        return Explanation.match((float) function.applyAsDouble(inner.getValue()), description + ", computed from:", inner, scoreExplanation);
+      }
    };
  }

  /**
   * Creates a DoubleValuesSource that is a function of another DoubleValuesSource and a score
   * @param in        the DoubleValuesSource to use as an input
+   * @param description a description of the function
   * @param function  a function of the form (source, score) == result
   */
-  public static DoubleValuesSource scoringFunction(DoubleValuesSource in, ToDoubleBiFunction<Double, Double> function) {
+  public static DoubleValuesSource scoringFunction(DoubleValuesSource in, String description, ToDoubleBiFunction<Double, Double> function) {
    return new DoubleValuesSource() {
      @Override
      public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
@ -238,6 +265,13 @@ public abstract class DoubleValuesSource {
      public boolean needsScores() {
        return true;
      }
+
+      @Override
+      public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation) throws IOException {
+        Explanation inner = in.explain(ctx, docId, scoreExplanation);
+        return Explanation.match((float) function.applyAsDouble((double)inner.getValue(), (double)scoreExplanation.getValue()),
+                    description + ", computed from:", inner, scoreExplanation);
+      }
    };
  }

@ -303,6 +337,15 @@ public abstract class DoubleValuesSource {
    public boolean needsScores() {
      return false;
    }
+
+    @Override
+    public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation) throws IOException {
+      DoubleValues values = getValues(ctx, null);
+      if (values.advanceExact(docId))
+        return Explanation.match((float)values.doubleValue(), "double(" + field + ")");
+      else
+        return Explanation.noMatch("double(" + field + ")");
+    }
  }

  private static class DoubleValuesSortField extends SortField {
--- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@ -298,7 +298,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
    try {
      Query singleton = uniqueQueries.putIfAbsent(query, query);
      if (singleton == null) {
-        onQueryCache(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(query));
+        onQueryCache(query, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(query));
      } else {
        query = singleton;
      }
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
@ -96,20 +96,6 @@ public class BM25Similarity extends Similarity {
    }
  }
  
-  /** The default implementation encodes <code>1 / sqrt(length)</code>
-   * with {@link SmallFloat#floatToByte315(float)}.  This is compatible with 
-   * Lucene's historic implementation: {@link ClassicSimilarity}.  If you
-   * change this, then you should change {@link #decodeNormValue(byte)} to match. */
-  protected byte encodeNormValue(int fieldLength) {
-    return SmallFloat.floatToByte315((float) (1 / Math.sqrt(fieldLength)));
-  }
-
-  /** The default implementation returns <code>1 / f<sup>2</sup></code>
-   * where <code>f</code> is {@link SmallFloat#byte315ToFloat(byte)}. */
-  protected float decodeNormValue(byte b) {
-    return NORM_TABLE[b & 0xFF];
-  }
-  
  /** 
   * True if overlap tokens (tokens with a position of increment of zero) are
   * discounted from the document's length.
@ -132,21 +118,31 @@ public class BM25Similarity extends Similarity {
  }
  
  /** Cache of decoded bytes. */
-  private static final float[] NORM_TABLE = new float[256];
+  private static final float[] OLD_LENGTH_TABLE = new float[256];
+  private static final float[] LENGTH_TABLE = new float[256];

  static {
    for (int i = 1; i < 256; i++) {
      float f = SmallFloat.byte315ToFloat((byte)i);
-      NORM_TABLE[i] = 1.0f / (f*f);
+      OLD_LENGTH_TABLE[i] = 1.0f / (f*f);
+    }
+    OLD_LENGTH_TABLE[0] = 1.0f / OLD_LENGTH_TABLE[255]; // otherwise inf
+
+    for (int i = 0; i < 256; i++) {
+      LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
    }
-    NORM_TABLE[0] = 1.0f / NORM_TABLE[255]; // otherwise inf
  }


  @Override
  public final long computeNorm(FieldInvertState state) {
    final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
-    return encodeNormValue(numTerms);
+    int indexCreatedVersionMajor = state.getIndexCreatedVersionMajor();
+    if (indexCreatedVersionMajor >= 7) {
+      return SmallFloat.intToByte4(numTerms);
+    } else {
+      return SmallFloat.floatToByte315((float) (1 / Math.sqrt(numTerms)));
+    }
  }

  /**
@ -207,34 +203,43 @@ public class BM25Similarity extends Similarity {
  @Override
  public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
    Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
-
    float avgdl = avgFieldLength(collectionStats);

-    // compute freq-independent part of bm25 equation across all norm values
-    float cache[] = new float[256];
+    float[] oldCache = new float[256];
+    float[] cache = new float[256];
    for (int i = 0; i < cache.length; i++) {
-      cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
+      oldCache[i] = k1 * ((1 - b) + b * OLD_LENGTH_TABLE[i] / avgdl);
+      cache[i] = k1 * ((1 - b) + b * LENGTH_TABLE[i] / avgdl);
    }
-    return new BM25Stats(collectionStats.field(), boost, idf, avgdl, cache);
+    return new BM25Stats(collectionStats.field(), boost, idf, avgdl, oldCache, cache);
  }

  @Override
  public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
    BM25Stats bm25stats = (BM25Stats) stats;
-    return new BM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field));
+    return new BM25DocScorer(bm25stats, context.reader().getMetaData().getCreatedVersionMajor(), context.reader().getNormValues(bm25stats.field));
  }
  
  private class BM25DocScorer extends SimScorer {
    private final BM25Stats stats;
    private final float weightValue; // boost * idf * (k1 + 1)
    private final NumericDocValues norms;
+    /** precomputed cache for all length values */
+    private final float[] lengthCache;
+    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
    private final float[] cache;
    
-    BM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
+    BM25DocScorer(BM25Stats stats, int indexCreatedVersionMajor, NumericDocValues norms) throws IOException {
      this.stats = stats;
      this.weightValue = stats.weight * (k1 + 1);
-      this.cache = stats.cache;
      this.norms = norms;
+      if (indexCreatedVersionMajor >= 7) {
+        lengthCache = LENGTH_TABLE;
+        cache = stats.cache;
+      } else {
+        lengthCache = OLD_LENGTH_TABLE;
+        cache = stats.oldCache;
+      }
    }
    
    @Override
@ -245,7 +250,7 @@ public class BM25Similarity extends Similarity {
        norm = k1;
      } else {
        if (norms.advanceExact(doc)) {
-          norm = cache[(byte)norms.longValue() & 0xFF];
+          norm = cache[((byte) norms.longValue()) & 0xFF];
        } else {
          norm = cache[0];
        }
@ -255,7 +260,7 @@ public class BM25Similarity extends Similarity {
    
    @Override
    public Explanation explain(int doc, Explanation freq) throws IOException {
-      return explainScore(doc, freq, stats, norms);
+      return explainScore(doc, freq, stats, norms, lengthCache);
    }

    @Override
@ -281,21 +286,23 @@ public class BM25Similarity extends Similarity {
    private final float weight;
    /** field name, for pulling norms */
    private final String field;
-    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
-    private final float cache[];
+    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl)
+     *  for both OLD_LENGTH_TABLE and LENGTH_TABLE */
+    private final float[] oldCache, cache;

-    BM25Stats(String field, float boost, Explanation idf, float avgdl, float cache[]) {
+    BM25Stats(String field, float boost, Explanation idf, float avgdl, float[] oldCache, float[] cache) {
      this.field = field;
      this.boost = boost;
      this.idf = idf;
      this.avgdl = avgdl;
-      this.cache = cache;
      this.weight = idf.getValue() * boost;
+      this.oldCache = oldCache;
+      this.cache = cache;
    }

  }

-  private Explanation explainTFNorm(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) throws IOException {
+  private Explanation explainTFNorm(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms, float[] lengthCache) throws IOException {
    List<Explanation> subs = new ArrayList<>();
    subs.add(freq);
    subs.add(Explanation.match(k1, "parameter k1"));
@ -311,7 +318,7 @@ public class BM25Similarity extends Similarity {
      } else {
        norm = 0;
      }
-      float doclen = decodeNormValue(norm);
+      float doclen = lengthCache[norm & 0xff];
      subs.add(Explanation.match(b, "parameter b"));
      subs.add(Explanation.match(stats.avgdl, "avgFieldLength"));
      subs.add(Explanation.match(doclen, "fieldLength"));
@ -321,13 +328,13 @@ public class BM25Similarity extends Similarity {
    }
  }

-  private Explanation explainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) throws IOException {
+  private Explanation explainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms, float[] lengthCache) throws IOException {
    Explanation boostExpl = Explanation.match(stats.boost, "boost");
    List<Explanation> subs = new ArrayList<>();
    if (boostExpl.getValue() != 1.0f)
      subs.add(boostExpl);
    subs.add(stats.idf);
-    Explanation tfNormExpl = explainTFNorm(doc, freq, stats, norms);
+    Explanation tfNormExpl = explainTFNorm(doc, freq, stats, norms, lengthCache);
    subs.add(tfNormExpl);
    return Explanation.match(
        boostExpl.getValue() * stats.idf.getValue() * tfNormExpl.getValue(),
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java
@ -17,91 +17,27 @@
 package org.apache.lucene.search.similarities;


-import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.SmallFloat;

 /**
- * Expert: Default scoring implementation which {@link #encodeNormValue(float)
- * encodes} norm values as a single byte before being stored. At search time,
- * the norm byte value is read from the index
- * {@link org.apache.lucene.store.Directory directory} and
- * {@link #decodeNormValue(long) decoded} back to a float <i>norm</i> value.
- * This encoding/decoding, while reducing index size, comes with the price of
- * precision loss - it is not guaranteed that <i>decode(encode(x)) = x</i>. For
- * instance, <i>decode(encode(0.89)) = 0.875</i>.
- * <p>
- * Compression of norm values to a single byte saves memory at search time,
- * because once a field is referenced at search time, its norms - for all
- * documents - are maintained in memory.
- * <p>
- * The rationale supporting such lossy compression of norm values is that given
- * the difficulty (and inaccuracy) of users to express their true information
- * need by a query, only big differences matter. <br>
- * &nbsp;<br>
- * Last, note that search time is too late to modify this <i>norm</i> part of
- * scoring, e.g. by using a different {@link Similarity} for search.
+ * Expert: Historical scoring implementation. You might want to consider using
+ * {@link BM25Similarity} instead, which is generally considered superior to
+ * TF-IDF.
 */
 public class ClassicSimilarity extends TFIDFSimilarity {
-  
-  /** Cache of decoded bytes. */
-  private static final float[] NORM_TABLE = new float[256];
-
-  static {
-    for (int i = 0; i < 256; i++) {
-      NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
-    }
-  }

  /** Sole constructor: parameter-free */
  public ClassicSimilarity() {}
-  
-  /**
-   * Encodes a normalization factor for storage in an index.
-   * <p>
-   * The encoding uses a three-bit mantissa, a five-bit exponent, and the
-   * zero-exponent point at 15, thus representing values from around 7x10^9 to
-   * 2x10^-9 with about one significant decimal digit of accuracy. Zero is also
-   * represented. Negative numbers are rounded up to zero. Values too large to
-   * represent are rounded down to the largest representable value. Positive
-   * values too small to represent are rounded up to the smallest positive
-   * representable value.
-   *
-   * @see org.apache.lucene.util.SmallFloat
-   */
-  @Override
-  public final long encodeNormValue(float f) {
-    return SmallFloat.floatToByte315(f);
-  }
-
-  /**
-   * Decodes the norm value, assuming it is a single byte.
-   * 
-   * @see #encodeNormValue(float)
-   */
-  @Override
-  public final float decodeNormValue(long norm) {
-    return NORM_TABLE[(int) (norm & 0xFF)];  // & 0xFF maps negative bytes to positive above 127
-  }

  /** Implemented as
-   *  <code>state.getBoost()*lengthNorm(numTerms)</code>, where
-   *  <code>numTerms</code> is {@link FieldInvertState#getLength()} if {@link
-   *  #setDiscountOverlaps} is false, else it's {@link
-   *  FieldInvertState#getLength()} - {@link
-   *  FieldInvertState#getNumOverlap()}.
+   *  <code>1/sqrt(length)</code>.
   *
   *  @lucene.experimental */
  @Override
-  public float lengthNorm(FieldInvertState state) {
-    final int numTerms;
-    if (discountOverlaps)
-      numTerms = state.getLength() - state.getNumOverlap();
-    else
-      numTerms = state.getLength();
+  public float lengthNorm(int numTerms) {
    return (float) (1.0 / Math.sqrt(numTerms));
  }

@ -138,33 +74,6 @@ public class ClassicSimilarity extends TFIDFSimilarity {
  public float idf(long docFreq, long docCount) {
    return (float)(Math.log((docCount+1)/(double)(docFreq+1)) + 1.0);
  }
-    
-  /** 
-   * True if overlap tokens (tokens with a position of increment of zero) are
-   * discounted from the document's length.
-   */
-  protected boolean discountOverlaps = true;
-
-  /** Determines whether overlap tokens (Tokens with
-   *  0 position increment) are ignored when computing
-   *  norm.  By default this is true, meaning overlap
-   *  tokens do not count when computing norms.
-   *
-   *  @lucene.experimental
-   *
-   *  @see #computeNorm
-   */
-  public void setDiscountOverlaps(boolean v) {
-    discountOverlaps = v;
-  }
-
-  /**
-   * Returns true if overlap tokens are discounted from the document's length. 
-   * @see #setDiscountOverlaps 
-   */
-  public boolean getDiscountOverlaps() {
-    return discountOverlaps;
-  }

  @Override
  public String toString() {
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
@ -190,7 +190,8 @@ public abstract class SimilarityBase extends Similarity {
  }
  
  @Override
-  public SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
+  public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
+    int indexCreatedVersionMajor = context.reader().getMetaData().getCreatedVersionMajor();
    if (stats instanceof MultiSimilarity.MultiStats) {
      // a multi term query (e.g. phrase). return the summation, 
      // scoring almost as if it were boolean query
@ -198,12 +199,12 @@ public abstract class SimilarityBase extends Similarity {
      SimScorer subScorers[] = new SimScorer[subStats.length];
      for (int i = 0; i < subScorers.length; i++) {
        BasicStats basicstats = (BasicStats) subStats[i];
-        subScorers[i] = new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
+        subScorers[i] = new BasicSimScorer(basicstats, indexCreatedVersionMajor, context.reader().getNormValues(basicstats.field));
      }
      return new MultiSimilarity.MultiSimScorer(subScorers);
    } else {
      BasicStats basicstats = (BasicStats) stats;
-      return new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
+      return new BasicSimScorer(basicstats, indexCreatedVersionMajor, context.reader().getNormValues(basicstats.field));
    }
  }
  
@ -216,40 +217,38 @@ public abstract class SimilarityBase extends Similarity {

  // ------------------------------ Norm handling ------------------------------
  
-  /** Norm to document length map. */
-  private static final float[] NORM_TABLE = new float[256];
+  /** Cache of decoded bytes. */
+  private static final float[] OLD_LENGTH_TABLE = new float[256];
+  private static final float[] LENGTH_TABLE = new float[256];

  static {
    for (int i = 1; i < 256; i++) {
-      float floatNorm = SmallFloat.byte315ToFloat((byte)i);
-      NORM_TABLE[i] = 1.0f / (floatNorm * floatNorm);
+      float f = SmallFloat.byte315ToFloat((byte)i);
+      OLD_LENGTH_TABLE[i] = 1.0f / (f*f);
+    }
+    OLD_LENGTH_TABLE[0] = 1.0f / OLD_LENGTH_TABLE[255]; // otherwise inf
+
+    for (int i = 0; i < 256; i++) {
+      LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
    }
-    NORM_TABLE[0] = 1.0f / NORM_TABLE[255]; // otherwise inf
  }

-  /** Encodes the document length in the same way as {@link TFIDFSimilarity}. */
+  /** Encodes the document length in the same way as {@link BM25Similarity}. */
  @Override
-  public long computeNorm(FieldInvertState state) {
-    final float numTerms;
+  public final long computeNorm(FieldInvertState state) {
+    final int numTerms;
    if (discountOverlaps)
      numTerms = state.getLength() - state.getNumOverlap();
    else
      numTerms = state.getLength();
-    return encodeNormValue(numTerms);
+    int indexCreatedVersionMajor = state.getIndexCreatedVersionMajor();
+    if (indexCreatedVersionMajor >= 7) {
+      return SmallFloat.intToByte4(numTerms);
+    } else {
+      return SmallFloat.floatToByte315((float) (1 / Math.sqrt(numTerms)));
+    }
  }
-  
-  /** Decodes a normalization factor (document length) stored in an index.
-   * @see #encodeNormValue(float)
-   */
-  protected float decodeNormValue(byte norm) {
-    return NORM_TABLE[norm & 0xFF];  // & 0xFF maps negative bytes to positive above 127
-  }
-  
-  /** Encodes the length to a byte via SmallFloat. */
-  protected byte encodeNormValue(float length) {
-    return SmallFloat.floatToByte315((float) (1 / Math.sqrt(length)));
-  }
-  
+
  // ----------------------------- Static methods ------------------------------
  
  /** Returns the base two logarithm of {@code x}. */
@ -266,35 +265,37 @@ public abstract class SimilarityBase extends Similarity {
   * {@link SimilarityBase#explain(BasicStats, int, Explanation, float)},
   * respectively.
   */
-  private class BasicSimScorer extends SimScorer {
+  final class BasicSimScorer extends SimScorer {
    private final BasicStats stats;
    private final NumericDocValues norms;
+    private final float[] normCache;
    
-    BasicSimScorer(BasicStats stats, NumericDocValues norms) throws IOException {
+    BasicSimScorer(BasicStats stats, int indexCreatedVersionMajor, NumericDocValues norms) throws IOException {
      this.stats = stats;
      this.norms = norms;
+      this.normCache = indexCreatedVersionMajor >= 7 ? LENGTH_TABLE : OLD_LENGTH_TABLE;
    }

-    private float getNormValue(int doc) throws IOException {
+    float getLengthValue(int doc) throws IOException {
      if (norms == null) {
        return 1F;
      }
      if (norms.advanceExact(doc)) {
-        return decodeNormValue((byte) norms.longValue());
+        return normCache[Byte.toUnsignedInt((byte) norms.longValue())];
      } else {
-        return decodeNormValue((byte) 0);
+        return 0;
      }
    }
    
    @Override
    public float score(int doc, float freq) throws IOException {
      // We have to supply something in case norms are omitted
-      return SimilarityBase.this.score(stats, freq, getNormValue(doc));
+      return SimilarityBase.this.score(stats, freq, getLengthValue(doc));
    }

    @Override
    public Explanation explain(int doc, Explanation freq) throws IOException {
-      return SimilarityBase.this.explain(stats, doc, freq, getNormValue(doc));
+      return SimilarityBase.this.explain(stats, doc, freq, getLengthValue(doc));
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
@ -30,6 +30,7 @@ import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.SmallFloat;


 /**
@ -233,11 +234,6 @@ import org.apache.lucene.util.BytesRef;
 *   And this is exactly what normalizing the query vector <i>V(q)</i>
 *   provides: comparability (to a certain extent) of two or more queries.
 *   </li>
- *
- *   <li>Applying query normalization on the scores helps to keep the
- *   scores around the unit vector, hence preventing loss of score data
- *   because of floating point precision limitations.
- *   </li>
 *  </ul>
 *  </li>
 *
@ -379,13 +375,49 @@ import org.apache.lucene.util.BytesRef;
 * @see IndexSearcher#setSimilarity(Similarity)
 */
 public abstract class TFIDFSimilarity extends Similarity {
-  
+
+  /** Cache of decoded bytes. */
+  static final float[] OLD_NORM_TABLE = new float[256];
+
+  static {
+    for (int i = 0; i < 256; i++) {
+      OLD_NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
+    }
+  }
+
  /**
   * Sole constructor. (For invocation by subclass 
   * constructors, typically implicit.)
   */
  public TFIDFSimilarity() {}
-  
+
+  /** 
+   * True if overlap tokens (tokens with a position of increment of zero) are
+   * discounted from the document's length.
+   */
+  protected boolean discountOverlaps = true;
+
+  /** Determines whether overlap tokens (Tokens with
+   *  0 position increment) are ignored when computing
+   *  norm.  By default this is true, meaning overlap
+   *  tokens do not count when computing norms.
+   *
+   *  @lucene.experimental
+   *
+   *  @see #computeNorm
+   */
+  public void setDiscountOverlaps(boolean v) {
+    discountOverlaps = v;
+  }
+
+  /**
+   * Returns true if overlap tokens are discounted from the document's length. 
+   * @see #setDiscountOverlaps 
+   */
+  public boolean getDiscountOverlaps() {
+    return discountOverlaps;
+  }
+
  /** Computes a score factor based on a term or phrase's frequency in a
   * document.  This value is multiplied by the {@link #idf(long, long)}
   * factor for each term in the query and these products are then summed to
@ -471,30 +503,25 @@ public abstract class TFIDFSimilarity extends Similarity {

  /**
   * Compute an index-time normalization value for this field instance.
-   * <p>
-   * This value will be stored in a single byte lossy representation by 
-   * {@link #encodeNormValue(float)}.
   * 
-   * @param state statistics of the current field (such as length, boost, etc)
-   * @return an index-time normalization value
+   * @param length the number of terms in the field, optionally {@link #setDiscountOverlaps(boolean) discounting overlaps}
+   * @return a length normalization value
   */
-  public abstract float lengthNorm(FieldInvertState state);
+  public abstract float lengthNorm(int length);
  
  @Override
  public final long computeNorm(FieldInvertState state) {
-    float normValue = lengthNorm(state);
-    return encodeNormValue(normValue);
+    final int numTerms;
+    if (discountOverlaps)
+      numTerms = state.getLength() - state.getNumOverlap();
+    else
+      numTerms = state.getLength();
+    if (state.getIndexCreatedVersionMajor() >= 7) {
+      return SmallFloat.intToByte4(numTerms);
+    } else {
+      return SmallFloat.floatToByte315(lengthNorm(numTerms));
+    }
  }
-  
-  /**
-   * Decodes a normalization factor stored in an index.
-   * 
-   * @see #encodeNormValue(float)
-   */
-  public abstract float decodeNormValue(long norm);
-
-  /** Encodes a normalization factor for storage in an index. */
-  public abstract long encodeNormValue(float f);
 
  /** Computes the amount of a sloppy phrase match, based on an edit distance.
   * This value is summed for each sloppy phrase match in a document to form
@ -529,24 +556,41 @@ public abstract class TFIDFSimilarity extends Similarity {
    final Explanation idf = termStats.length == 1
    ? idfExplain(collectionStats, termStats[0])
    : idfExplain(collectionStats, termStats);
-    return new IDFStats(collectionStats.field(), boost, idf);
+    float[] normTable = new float[256];
+    for (int i = 1; i < 256; ++i) {
+      int length = SmallFloat.byte4ToInt((byte) i);
+      float norm = lengthNorm(length);
+      normTable[i] = norm;
+    }
+    normTable[0] = 1f / normTable[255];
+    return new IDFStats(collectionStats.field(), boost, idf, normTable);
  }

  @Override
  public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
    IDFStats idfstats = (IDFStats) stats;
-    return new TFIDFSimScorer(idfstats, context.reader().getNormValues(idfstats.field));
+    final float[] normTable;
+    if (context.reader().getMetaData().getCreatedVersionMajor() >= 7) {
+      // the norms only encode the length, we need a translation table that depends on how lengthNorm is implemented
+      normTable = idfstats.normTable;
+    } else {
+      // the norm is directly encoded in the index
+      normTable = OLD_NORM_TABLE;
+    }
+    return new TFIDFSimScorer(idfstats, context.reader().getNormValues(idfstats.field), normTable);
  }
  
  private final class TFIDFSimScorer extends SimScorer {
    private final IDFStats stats;
    private final float weightValue;
    private final NumericDocValues norms;
+    private final float[] normTable;
    
-    TFIDFSimScorer(IDFStats stats, NumericDocValues norms) throws IOException {
+    TFIDFSimScorer(IDFStats stats, NumericDocValues norms, float[] normTable) throws IOException {
      this.stats = stats;
      this.weightValue = stats.queryWeight;
      this.norms = norms;
+      this.normTable = normTable;
    }
    
    @Override
@ -556,13 +600,13 @@ public abstract class TFIDFSimilarity extends Similarity {
      if (norms == null) {
        return raw;
      } else {
-        long normValue;
+        float normValue;
        if (norms.advanceExact(doc)) {
-          normValue = norms.longValue();
+          normValue = normTable[(int) (norms.longValue() & 0xFF)];
        } else {
          normValue = 0;
        }
-        return raw * decodeNormValue(normValue);  // normalize for field
+        return raw * normValue;  // normalize for field
      }
    }
    
@ -578,35 +622,39 @@ public abstract class TFIDFSimilarity extends Similarity {

    @Override
    public Explanation explain(int doc, Explanation freq) throws IOException {
-      return explainScore(doc, freq, stats, norms);
+      return explainScore(doc, freq, stats, norms, normTable);
    }
  }
  
  /** Collection statistics for the TF-IDF model. The only statistic of interest
   * to this model is idf. */
-  private static class IDFStats extends SimWeight {
+  static class IDFStats extends SimWeight {
    private final String field;
    /** The idf and its explanation */
    private final Explanation idf;
    private final float boost;
    private final float queryWeight;
+    final float[] normTable;
    
-    public IDFStats(String field, float boost, Explanation idf) {
+    public IDFStats(String field, float boost, Explanation idf, float[] normTable) {
      // TODO: Validate?
      this.field = field;
      this.idf = idf;
      this.boost = boost;
      this.queryWeight = boost * idf.getValue();
+      this.normTable = normTable;
    }
  }  

-  private Explanation explainField(int doc, Explanation freq, IDFStats stats, NumericDocValues norms) throws IOException {
+  private Explanation explainField(int doc, Explanation freq, IDFStats stats, NumericDocValues norms, float[] normTable) throws IOException {
    Explanation tfExplanation = Explanation.match(tf(freq.getValue()), "tf(freq="+freq.getValue()+"), with freq of:", freq);
    float norm;
-    if (norms != null && norms.advanceExact(doc)) {
-      norm = decodeNormValue(norms.longValue());
-    } else {
+    if (norms == null) {
      norm = 1f;
+    } else if (norms.advanceExact(doc) == false) {
+      norm = 0f;
+    } else {
+      norm = normTable[(int) (norms.longValue() & 0xFF)];
    }
    
    Explanation fieldNormExpl = Explanation.match(
@ -619,9 +667,9 @@ public abstract class TFIDFSimilarity extends Similarity {
        tfExplanation, stats.idf, fieldNormExpl);
  }

-  private Explanation explainScore(int doc, Explanation freq, IDFStats stats, NumericDocValues norms) throws IOException {
+  private Explanation explainScore(int doc, Explanation freq, IDFStats stats, NumericDocValues norms, float[] normTable) throws IOException {
    Explanation queryExpl = Explanation.match(stats.boost, "boost");
-    Explanation fieldExpl = explainField(doc, freq, stats, norms);
+    Explanation fieldExpl = explainField(doc, freq, stats, norms, normTable);
    if (stats.boost == 1f) {
      return fieldExpl;
    }
--- a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
@ -215,7 +215,7 @@ public abstract class FSDirectory extends BaseDirectory {
    try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
      for (Path path : stream) {
        String name = path.getFileName().toString();
-        if (skipNames != null && skipNames.contains(name) == false) {
+        if (skipNames == null || skipNames.contains(name) == false) {
          entries.add(name);
        }
      }
--- a/lucene/core/src/java/org/apache/lucene/util/AttributeFactory.java
+++ b/lucene/core/src/java/org/apache/lucene/util/AttributeFactory.java
@ -20,6 +20,7 @@ package org.apache.lucene.util;
 import java.lang.invoke.MethodHandle;
 import java.lang.invoke.MethodHandles;
 import java.lang.invoke.MethodType;
+import java.lang.reflect.UndeclaredThrowableException;

 /**
 * An AttributeFactory creates instances of {@link AttributeImpl}s.
@ -28,8 +29,14 @@ public abstract class AttributeFactory {
  
  /**
   * Returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
+   * 
+   * @throws UndeclaredThrowableException A wrapper runtime exception thrown if the 
+   *         constructor of the attribute class throws a checked exception. 
+   *         Note that attributes should not throw or declare 
+   *         checked exceptions; this may be verified and fail early in the future. 
   */
-  public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass);
+  public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass)
+      throws UndeclaredThrowableException;
  
  /**
   * Returns a correctly typed {@link MethodHandle} for the no-arg ctor of the given class.
@ -61,17 +68,18 @@ public abstract class AttributeFactory {
    };

    DefaultAttributeFactory() {}
-  
+
    @Override
    public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
      try {
        return (AttributeImpl) constructors.get(attClass).invokeExact();
-      } catch (Throwable t) {
-        rethrow(t);
-        throw new AssertionError();
+      } catch (Error | RuntimeException e) {
+        throw e;
+      } catch (Throwable e) {
+        throw new UndeclaredThrowableException(e);
      }
    }
-    
+
    private Class<? extends AttributeImpl> findImplClass(Class<? extends Attribute> attClass) {
      try {
        return Class.forName(attClass.getName() + "Impl", true, attClass.getClassLoader()).asSubclass(AttributeImpl.class);
@ -138,23 +146,12 @@ public abstract class AttributeFactory {
      protected A createInstance() {
        try {
          return (A) constr.invokeExact();
-        } catch (Throwable t) {
-          rethrow(t);
-          throw new AssertionError();
+        } catch (Error | RuntimeException e) {
+          throw e;
+        } catch (Throwable e) {
+          throw new UndeclaredThrowableException(e);
        }
      }
    };
  }
-  
-  // Hack to rethrow unknown Exceptions from {@link MethodHandle#invoke}:
-  // TODO: remove the impl in test-framework, this one is more elegant :-)
-  static void rethrow(Throwable t) {
-    AttributeFactory.<Error>rethrow0(t);
-  }
-  
-  @SuppressWarnings("unchecked")
-  private static <T extends Throwable> void rethrow0(Throwable t) throws T {
-    throw (T) t;
-  }
-  
 }
--- a/lucene/core/src/java/org/apache/lucene/util/IOUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IOUtils.java
@ -96,7 +96,9 @@ public final class IOUtils {
      }
    }

-    reThrow(th);
+    if (th != null) {
+      throw rethrowAlways(th);
+    }
  }

  /**
@ -229,7 +231,9 @@ public final class IOUtils {
      }
    }

-    reThrow(th);
+    if (th != null) {
+      throw rethrowAlways(th);
+    }
  }

  public static void deleteFiles(Directory dir, String... files) throws IOException {
@ -300,7 +304,9 @@ public final class IOUtils {
      }
    }

-    reThrow(th);
+    if (th != null) {
+      throw rethrowAlways(th);
+    }
  }
  
  /**
@ -376,37 +382,83 @@ public final class IOUtils {
  }

  /**
-   * Simple utility method that takes a previously caught
-   * {@code Throwable} and rethrows either {@code
-   * IOException} or an unchecked exception.  If the
-   * argument is null then this method does nothing.
+   * This utility method takes a previously caught (non-null)
+   * {@code Throwable} and rethrows either the original argument
+   * if it was a subclass of the {@code IOException} or an 
+   * {@code RuntimeException} with the cause set to the argument.
+   * 
+   * <p>This method <strong>never returns any value</strong>, even though it declares
+   * a return value of type {@link Error}. The return value declaration
+   * is very useful to let the compiler know that the code path following
+   * the invocation of this method is unreachable. So in most cases the
+   * invocation of this method will be guarded by an {@code if} and
+   * used together with a {@code throw} statement, as in:
+   * </p>
+   * <pre>{@code
+   *   if (t != null) throw IOUtils.rethrowAlways(t)
+   * }
+   * </pre>
+   * 
+   * @param th The throwable to rethrow, <strong>must not be null</strong>.
+   * @return This method always results in an exception, it never returns any value. 
+   *         See method documentation for detailsa and usage example.
+   * @throws IOException if the argument was an instance of IOException
+   * @throws RuntimeException with the {@link RuntimeException#getCause()} set
+   *         to the argument, if it was not an instance of IOException. 
   */
-  public static void reThrow(Throwable th) throws IOException {
-    if (th != null) {
-      if (th instanceof IOException) {
-        throw (IOException) th;
-      }
-      reThrowUnchecked(th);
+  public static Error rethrowAlways(Throwable th) throws IOException, RuntimeException {
+    if (th == null) {
+      throw new AssertionError("rethrow argument must not be null.");
    }
+
+    if (th instanceof IOException) {
+      throw (IOException) th;
+    }
+
+    if (th instanceof RuntimeException) {
+      throw (RuntimeException) th;
+    }
+
+    if (th instanceof Error) {
+      throw (Error) th;
+    }
+
+    throw new RuntimeException(th);
  }

  /**
-   * Simple utility method that takes a previously caught
-   * {@code Throwable} and rethrows it as an unchecked exception.
-   * If the argument is null then this method does nothing.
+   * Rethrows the argument as {@code IOException} or {@code RuntimeException} 
+   * if it's not null.
+   * 
+   * @deprecated This method is deprecated in favor of {@link #rethrowAlways}. Code should
+   * be updated to {@link #rethrowAlways} and guarded with an additional null-argument check
+   * (because {@link #rethrowAlways} is not accepting null arguments). 
   */
+  @Deprecated
+  public static void reThrow(Throwable th) throws IOException {
+    if (th != null) {
+      throw rethrowAlways(th);
+    }
+  }
+  
+  /**
+   * @deprecated This method is deprecated in favor of {@link #rethrowAlways}. Code should
+   * be updated to {@link #rethrowAlways} and guarded with an additional null-argument check
+   * (because {@link #rethrowAlways} is not accepting null arguments). 
+   */
+  @Deprecated
  public static void reThrowUnchecked(Throwable th) {
    if (th != null) {
-      if (th instanceof RuntimeException) {
-        throw (RuntimeException) th;
-      }
      if (th instanceof Error) {
        throw (Error) th;
      }
+      if (th instanceof RuntimeException) {
+        throw (RuntimeException) th;
+      }
      throw new RuntimeException(th);
-    }
+    }    
  }
-
+  
  /**
   * Ensure that any writes to the given file is written to the storage device that contains it.
   * @param fileToSync the file to fsync
--- a/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
@ -24,7 +24,12 @@ import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Locale;
-import java.util.stream.Collectors;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicLong;

 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.store.ChecksumIndexInput;
@ -73,6 +78,9 @@ public class OfflineSorter {
  private final int valueLength;
  private final String tempFileNamePrefix;

+  private final ExecutorService exec;
+  private final Semaphore partitionsInRAM;
+
  /** 
   * A bit more descriptive unit for constructors.
   * 
@ -145,13 +153,13 @@ public class OfflineSorter {
    /** number of lines of data read */
    public int lineCount;
    /** time spent merging sorted partitions (in milliseconds) */
-    public long mergeTime;
+    public final AtomicLong mergeTimeMS = new AtomicLong();
    /** time spent sorting data (in milliseconds) */
-    public long sortTime;
+    public final AtomicLong sortTimeMS = new AtomicLong();
    /** total time spent (in milliseconds) */
-    public long totalTime;
+    public long totalTimeMS;
    /** time spent in i/o read (in milliseconds) */
-    public long readTime;
+    public long readTimeMS;
    /** read buffer size (in bytes) */
    public final long bufferSize = ramBufferSize.bytes;
    
@ -161,17 +169,15 @@ public class OfflineSorter {
    @Override
    public String toString() {
      return String.format(Locale.ROOT,
-          "time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB",
-          totalTime / 1000.0d, readTime / 1000.0d, sortTime / 1000.0d, mergeTime / 1000.0d,
-          lineCount, tempMergeFiles, mergeRounds,
-          (double) bufferSize / MB);
+                           "time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB",
+                           totalTimeMS / 1000.0d, readTimeMS / 1000.0d, sortTimeMS.get() / 1000.0d, mergeTimeMS.get() / 1000.0d,
+                           lineCount, tempMergeFiles, mergeRounds,
+                           (double) bufferSize / MB);
    }
  }

  private final BufferSize ramBufferSize;
  
-  private final Counter bufferBytesUsed = Counter.newCounter();
-  private final SortableBytesRefArray buffer;
  SortInfo sortInfo;
  private int maxTempFiles;
  private final Comparator<BytesRef> comparator;
@ -185,7 +191,7 @@ public class OfflineSorter {
   * @see BufferSize#automatic()
   */
  public OfflineSorter(Directory dir, String tempFileNamePrefix) throws IOException {
-    this(dir, tempFileNamePrefix, DEFAULT_COMPARATOR, BufferSize.automatic(), MAX_TEMPFILES, -1);
+    this(dir, tempFileNamePrefix, DEFAULT_COMPARATOR, BufferSize.automatic(), MAX_TEMPFILES, -1, null, 0);
  }
  
  /**
@ -194,14 +200,30 @@ public class OfflineSorter {
   * @see BufferSize#automatic()
   */
  public OfflineSorter(Directory dir, String tempFileNamePrefix, Comparator<BytesRef> comparator) throws IOException {
-    this(dir, tempFileNamePrefix, comparator, BufferSize.automatic(), MAX_TEMPFILES, -1);
+    this(dir, tempFileNamePrefix, comparator, BufferSize.automatic(), MAX_TEMPFILES, -1, null, 0);
  }

  /**
   * All-details constructor.  If {@code valueLength} is -1 (the default), the length of each value differs; otherwise,
-   * all values have the specified length.
+   * all values have the specified length.  If you pass a non-null {@code ExecutorService} then it will be
+   * used to run sorting operations that can be run concurrently, and maxPartitionsInRAM is the maximum
+   * concurrent in-memory partitions.  Thus the maximum possible RAM used by this class while sorting is
+   * {@code maxPartitionsInRAM * ramBufferSize}.
   */
-  public OfflineSorter(Directory dir, String tempFileNamePrefix, Comparator<BytesRef> comparator, BufferSize ramBufferSize, int maxTempfiles, int valueLength) {
+  public OfflineSorter(Directory dir, String tempFileNamePrefix, Comparator<BytesRef> comparator,
+                       BufferSize ramBufferSize, int maxTempfiles, int valueLength, ExecutorService exec,
+                       int maxPartitionsInRAM) {
+    if (exec != null) {
+      this.exec = exec;
+      if (maxPartitionsInRAM <= 0) {
+        throw new IllegalArgumentException("maxPartitionsInRAM must be > 0; got " + maxPartitionsInRAM);
+      }
+    } else {
+      this.exec = new SameThreadExecutorService();
+      maxPartitionsInRAM = 1;
+    }
+    this.partitionsInRAM = new Semaphore(maxPartitionsInRAM);
+
    if (ramBufferSize.bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE) {
      throw new IllegalArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.bytes);
    }
@ -209,14 +231,11 @@ public class OfflineSorter {
    if (maxTempfiles < 2) {
      throw new IllegalArgumentException("maxTempFiles must be >= 2");
    }
-    if (valueLength == -1) {
-      buffer = new BytesRefArray(bufferBytesUsed);
-    } else {
-      if (valueLength == 0 || valueLength > Short.MAX_VALUE) {
-        throw new IllegalArgumentException("valueLength must be 1 .. " + Short.MAX_VALUE + "; got: " + valueLength);
-      }
-      buffer = new FixedLengthBytesRefArray(valueLength);
+
+    if (valueLength != -1 && (valueLength == 0 || valueLength > Short.MAX_VALUE)) {
+      throw new IllegalArgumentException("valueLength must be 1 .. " + Short.MAX_VALUE + "; got: " + valueLength);
    }
+    
    this.valueLength = valueLength;
    this.ramBufferSize = ramBufferSize;
    this.maxTempFiles = maxTempfiles;
@ -241,26 +260,31 @@ public class OfflineSorter {
  public String sort(String inputFileName) throws IOException {
    
    sortInfo = new SortInfo();
-    sortInfo.totalTime = System.currentTimeMillis();
+    long startMS = System.currentTimeMillis();

-    List<PartitionAndCount> segments = new ArrayList<>();
+    List<Future<Partition>> segments = new ArrayList<>();
    int[] levelCounts = new int[1];

    // So we can remove any partially written temp files on exception:
    TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

    boolean success = false;
-    boolean[] isExhausted = new boolean[1];
    try (ByteSequencesReader is = getReader(dir.openChecksumInput(inputFileName, IOContext.READONCE), inputFileName)) {
-      while (isExhausted[0] == false) {
-        int lineCount = readPartition(is, isExhausted);
-        if (lineCount == 0) {
-          assert isExhausted[0];
+      while (true) {
+        Partition part = readPartition(is);
+        if (part.count == 0) {
+          if (partitionsInRAM != null) {
+            partitionsInRAM.release();
+          }
+          assert part.exhausted;
          break;
        }
-        segments.add(sortPartition(trackingDir, lineCount));
+
+        Callable<Partition> job = new SortPartitionTask(trackingDir, part);
+
+        segments.add(exec.submit(job));
        sortInfo.tempMergeFiles++;
-        sortInfo.lineCount += lineCount;
+        sortInfo.lineCount += part.count;
        levelCounts[0]++;

        // Handle intermediate merges; we need a while loop to "cascade" the merge when necessary:
@ -274,6 +298,10 @@ public class OfflineSorter {
          levelCounts[mergeLevel] = 0;
          mergeLevel++;
        }
+
+        if (part.exhausted) {
+          break;
+        }
      }
      
      // TODO: we shouldn't have to do this?  Can't we return a merged reader to
@ -292,13 +320,13 @@ public class OfflineSorter {
          result = out.getName();
        }
      } else {
-        result = segments.get(0).fileName;
+        result = getPartition(segments.get(0)).fileName;
      }

      // We should be explicitly removing all intermediate files ourselves unless there is an exception:
      assert trackingDir.getCreatedFiles().size() == 1 && trackingDir.getCreatedFiles().contains(result);

-      sortInfo.totalTime = System.currentTimeMillis() - sortInfo.totalTime; 
+      sortInfo.totalTimeMS = System.currentTimeMillis() - startMS;

      CodecUtil.checkFooter(is.in);

@ -306,6 +334,8 @@ public class OfflineSorter {

      return result;

+    } catch (InterruptedException ie) {
+      throw new ThreadInterruptedException(ie);
    } finally {
      if (success == false) {
        IOUtils.deleteFilesIgnoringExceptions(trackingDir, trackingDir.getCreatedFiles());
@ -313,36 +343,6 @@ public class OfflineSorter {
    }
  }

-  /** Sort a single partition in-memory. */
-  protected PartitionAndCount sortPartition(TrackingDirectoryWrapper trackingDir, int lineCount) throws IOException {
-
-    try (IndexOutput tempFile = trackingDir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT);
-         ByteSequencesWriter out = getWriter(tempFile, lineCount);) {
-      
-      BytesRef spare;
-
-      long start = System.currentTimeMillis();
-      BytesRefIterator iter = buffer.iterator(comparator);
-      sortInfo.sortTime += System.currentTimeMillis() - start;
-
-      int count = 0;
-      while ((spare = iter.next()) != null) {
-        assert spare.length <= Short.MAX_VALUE;
-        out.write(spare);
-        count++;
-      }
-
-      assert count == lineCount;
-      
-      // Clean up the buffer for the next partition.
-      buffer.clear();
-
-      CodecUtil.writeFooter(out.out);
-
-      return new PartitionAndCount(lineCount, tempFile.getName());
-    }
-  }
-
  /** Called on exception, to check whether the checksum is also corrupt in this source, and add that 
   *  information (checksum matched or didn't) as a suppressed exception. */
  private void verifyChecksum(Throwable priorException, ByteSequencesReader reader) throws IOException {
@ -352,129 +352,107 @@ public class OfflineSorter {
  }

  /** Merge the most recent {@code maxTempFile} partitions into a new partition. */
-  void mergePartitions(Directory trackingDir, List<PartitionAndCount> segments) throws IOException {
+  void mergePartitions(Directory trackingDir, List<Future<Partition>> segments) throws IOException {
    long start = System.currentTimeMillis();
-
-    List<PartitionAndCount> segmentsToMerge;
+    List<Future<Partition>> segmentsToMerge;
    if (segments.size() > maxTempFiles) {
      segmentsToMerge = segments.subList(segments.size() - maxTempFiles, segments.size());
    } else {
      segmentsToMerge = segments;
    }

-    long totalCount = 0;
-    for (PartitionAndCount segment : segmentsToMerge) {
-      totalCount += segment.count;
-    }
+    sortInfo.mergeRounds++;

-    PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(segmentsToMerge.size()) {
-      @Override
-      protected boolean lessThan(FileAndTop a, FileAndTop b) {
-        return comparator.compare(a.current, b.current) < 0;
-      }
-    };
-
-    ByteSequencesReader[] streams = new ByteSequencesReader[segmentsToMerge.size()];
-
-    String newSegmentName = null;
-
-    try (ByteSequencesWriter writer = getWriter(trackingDir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT), totalCount)) {
-
-      newSegmentName = writer.out.getName();
-      
-      // Open streams and read the top for each file
-      for (int i = 0; i < segmentsToMerge.size(); i++) {
-        streams[i] = getReader(dir.openChecksumInput(segmentsToMerge.get(i).fileName, IOContext.READONCE), segmentsToMerge.get(i).fileName);
-        BytesRef item = null;
-        try {
-          item = streams[i].next();
-        } catch (Throwable t) {
-          verifyChecksum(t, streams[i]);
-        }
-        assert item != null;
-        queue.insertWithOverflow(new FileAndTop(i, item));
-      }
-  
-      // Unix utility sort() uses ordered array of files to pick the next line from, updating
-      // it as it reads new lines. The PQ used here is a more elegant solution and has 
-      // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
-      // so it shouldn't make much of a difference (didn't check).
-      FileAndTop top;
-      while ((top = queue.top()) != null) {
-        writer.write(top.current);
-        try {
-          top.current = streams[top.fd].next();
-        } catch (Throwable t) {
-          verifyChecksum(t, streams[top.fd]);
-        }
-
-        if (top.current != null) {
-          queue.updateTop();
-        } else {
-          queue.pop();
-        }
-      }
-
-      CodecUtil.writeFooter(writer.out);
-
-      for(ByteSequencesReader reader : streams) {
-        CodecUtil.checkFooter(reader.in);
-      }
-  
-      sortInfo.mergeTime += System.currentTimeMillis() - start;
-      sortInfo.mergeRounds++;
-    } finally {
-      IOUtils.close(streams);
-    }
-
-    IOUtils.deleteFiles(trackingDir, segmentsToMerge.stream().map(segment -> segment.fileName).collect(Collectors.toList()));
+    MergePartitionsTask task = new MergePartitionsTask(trackingDir, new ArrayList<>(segmentsToMerge));

    segmentsToMerge.clear();
-    segments.add(new PartitionAndCount(totalCount, newSegmentName));
+    segments.add(exec.submit(task));

    sortInfo.tempMergeFiles++;
  }

+  /** Holds one partition of items, either loaded into memory or based on a file. */
+  private static class Partition {
+    public final SortableBytesRefArray buffer;
+    public final boolean exhausted;
+    public final long count;
+    public final String fileName;
+
+    /** A partition loaded into memory. */
+    public Partition(SortableBytesRefArray buffer, boolean exhausted) {
+      this.buffer = buffer;
+      this.fileName = null;
+      this.count = buffer.size();
+      this.exhausted = exhausted;
+    }
+
+    /** An on-disk partition. */
+    public Partition(String fileName, long count) {
+      this.buffer = null;
+      this.fileName = fileName;
+      this.count = count;
+      this.exhausted = true;
+    }
+  }
+
  /** Read in a single partition of data, setting isExhausted[0] to true if there are no more items. */
-  int readPartition(ByteSequencesReader reader, boolean[] isExhausted) throws IOException {
-    long start = System.currentTimeMillis();
-    if (valueLength != -1) {
-      int limit = ramBufferSize.bytes / valueLength;
-      for(int i=0;i<limit;i++) {
-        BytesRef item = null;
-        try {
-          item = reader.next();
-        } catch (Throwable t) {
-          verifyChecksum(t, reader);
+  Partition readPartition(ByteSequencesReader reader) throws IOException, InterruptedException {
+    if (partitionsInRAM != null) {
+      partitionsInRAM.acquire();
+    }
+    boolean success = false;
+    try {
+      long start = System.currentTimeMillis();
+      SortableBytesRefArray buffer;
+      boolean exhausted = false;
+      int count;
+      if (valueLength != -1) {
+        // fixed length case
+        buffer = new FixedLengthBytesRefArray(valueLength);
+        int limit = ramBufferSize.bytes / valueLength;
+        for(int i=0;i<limit;i++) {
+          BytesRef item = null;
+          try {
+            item = reader.next();
+          } catch (Throwable t) {
+            verifyChecksum(t, reader);
+          }
+          if (item == null) {
+            exhausted = true;
+            break;
+          }
+          buffer.append(item);
        }
-        if (item == null) {
-          isExhausted[0] = true;
-          break;
+      } else {
+        Counter bufferBytesUsed = Counter.newCounter();
+        buffer = new BytesRefArray(bufferBytesUsed);
+        while (true) {
+          BytesRef item = null;
+          try {
+            item = reader.next();
+          } catch (Throwable t) {
+            verifyChecksum(t, reader);
+          }
+          if (item == null) {
+            exhausted = true;
+            break;
+          }
+          buffer.append(item);
+          // Account for the created objects.
+          // (buffer slots do not account to buffer size.) 
+          if (bufferBytesUsed.get() > ramBufferSize.bytes) {
+            break;
+          }
        }
-        buffer.append(item);
      }
-    } else {
-      while (true) {
-        BytesRef item = null;
-        try {
-          item = reader.next();
-        } catch (Throwable t) {
-          verifyChecksum(t, reader);
-        }
-        if (item == null) {
-          isExhausted[0] = true;
-          break;
-        }
-        buffer.append(item);
-        // Account for the created objects.
-        // (buffer slots do not account to buffer size.) 
-        if (bufferBytesUsed.get() > ramBufferSize.bytes) {
-          break;
-        }
+      sortInfo.readTimeMS += System.currentTimeMillis() - start;
+      success = true;
+      return new Partition(buffer, exhausted);
+    } finally {
+      if (success == false && partitionsInRAM != null) {
+        partitionsInRAM.release();
      }
    }
-    sortInfo.readTime += System.currentTimeMillis() - start;
-    return buffer.size();
  }

  static class FileAndTop {
@ -606,13 +584,146 @@ public class OfflineSorter {
    return comparator;
  }

-  private static class PartitionAndCount {
-    final long count;
-    final String fileName;
+  /** Sorts one in-memory partition, writes it to disk, and returns the resulting file-based partition. */
+  private class SortPartitionTask implements Callable<Partition> {

-    public PartitionAndCount(long count, String fileName) {
-      this.count = count;
-      this.fileName = fileName;
+    private final Directory dir;
+    private final Partition part;
+      
+    public SortPartitionTask(Directory dir, Partition part) {
+      this.dir = dir;
+      this.part = part;
+    }
+    
+    @Override
+    public Partition call() throws IOException {
+      try (IndexOutput tempFile = dir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT);
+           ByteSequencesWriter out = getWriter(tempFile, part.buffer.size());) {
+      
+        BytesRef spare;
+
+        long startMS = System.currentTimeMillis();
+        BytesRefIterator iter = part.buffer.iterator(comparator);
+        sortInfo.sortTimeMS.addAndGet(System.currentTimeMillis() - startMS);
+
+        int count = 0;
+        while ((spare = iter.next()) != null) {
+          assert spare.length <= Short.MAX_VALUE;
+          out.write(spare);
+          count++;
+        }
+
+        assert count == part.count;
+
+        CodecUtil.writeFooter(out.out);
+        part.buffer.clear();
+
+        return new Partition(tempFile.getName(), part.count);
+      } finally {
+        if (partitionsInRAM != null) {
+          partitionsInRAM.release();
+        }
+      }
+    }
+  }
+
+  private Partition getPartition(Future<Partition> future) throws IOException {
+    try {
+      return future.get();
+    } catch (InterruptedException ie) {
+      throw new ThreadInterruptedException(ie);
+    } catch (ExecutionException ee) {
+      // Theoretically cause can be null; guard against that.
+      Throwable cause = ee.getCause();
+      throw IOUtils.rethrowAlways(cause != null ? cause : ee);
+    }
+  }
+
+  /** Merges multiple file-based partitions to a single on-disk partition. */
+  private class MergePartitionsTask implements Callable<Partition> {
+    private final Directory dir;
+    private final List<Future<Partition>> segmentsToMerge;
+    
+    public MergePartitionsTask(Directory dir, List<Future<Partition>> segmentsToMerge) {
+      this.dir = dir;
+      this.segmentsToMerge = segmentsToMerge;
+    }
+
+    @Override
+    public Partition call() throws IOException {
+      long totalCount = 0;
+      for (Future<Partition> segment : segmentsToMerge) {
+        totalCount += getPartition(segment).count;
+      }
+
+      PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(segmentsToMerge.size()) {
+          @Override
+          protected boolean lessThan(FileAndTop a, FileAndTop b) {
+            return comparator.compare(a.current, b.current) < 0;
+          }
+        };
+
+      ByteSequencesReader[] streams = new ByteSequencesReader[segmentsToMerge.size()];
+
+      String newSegmentName = null;
+
+      long startMS = System.currentTimeMillis();
+      try (ByteSequencesWriter writer = getWriter(dir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT), totalCount)) {
+
+        newSegmentName = writer.out.getName();
+      
+        // Open streams and read the top for each file
+        for (int i = 0; i < segmentsToMerge.size(); i++) {
+          Partition segment = getPartition(segmentsToMerge.get(i));
+          streams[i] = getReader(dir.openChecksumInput(segment.fileName, IOContext.READONCE), segment.fileName);
+              
+          BytesRef item = null;
+          try {
+            item = streams[i].next();
+          } catch (Throwable t) {
+            verifyChecksum(t, streams[i]);
+          }
+          assert item != null;
+          queue.insertWithOverflow(new FileAndTop(i, item));
+        }
+  
+        // Unix utility sort() uses ordered array of files to pick the next line from, updating
+        // it as it reads new lines. The PQ used here is a more elegant solution and has 
+        // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
+        // so it shouldn't make much of a difference (didn't check).
+        FileAndTop top;
+        while ((top = queue.top()) != null) {
+          writer.write(top.current);
+          try {
+            top.current = streams[top.fd].next();
+          } catch (Throwable t) {
+            verifyChecksum(t, streams[top.fd]);
+          }
+
+          if (top.current != null) {
+            queue.updateTop();
+          } else {
+            queue.pop();
+          }
+        }
+
+        CodecUtil.writeFooter(writer.out);
+
+        for(ByteSequencesReader reader : streams) {
+          CodecUtil.checkFooter(reader.in);
+        }
+
+        sortInfo.mergeTimeMS.addAndGet(System.currentTimeMillis() - startMS);
+      } finally {
+        IOUtils.close(streams);
+      }
+      List<String> toDelete = new ArrayList<>();
+      for (Future<Partition> segment : segmentsToMerge) {
+        toDelete.add(getPartition(segment).fileName);
+      }
+      IOUtils.deleteFiles(dir, toDelete);
+
+      return new Partition(newSegmentName, totalCount);
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/util/SameThreadExecutorService.java
+++ b/lucene/core/src/java/org/apache/lucene/util/SameThreadExecutorService.java
@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.AbstractExecutorService;
+import java.util.concurrent.RejectedExecutionException;
+import java.util.concurrent.TimeUnit;
+
+/** An {@code ExecutorService} that executes tasks immediately in the calling thread during submit.
+ *
+ *  @lucene.internal */
+public final class SameThreadExecutorService extends AbstractExecutorService {
+  private volatile boolean shutdown;
+
+  @Override
+  public void execute(Runnable command) {
+    checkShutdown();
+    command.run();
+  }
+
+  @Override
+  public List<Runnable> shutdownNow() {
+    shutdown();
+    return Collections.emptyList();
+  }
+
+  @Override
+  public void shutdown() {
+    this.shutdown = true;
+  }
+
+  @Override
+  public boolean isTerminated() {
+    // Simplified: we don't check for any threads hanging in execute (we could
+    // introduce an atomic counter, but there seems to be no point).
+    return shutdown == true;
+  }
+
+  @Override
+  public boolean isShutdown() {
+    return shutdown == true;
+  }
+
+  @Override
+  public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException {
+    // See comment in isTerminated();
+    return true;
+  }
+
+  private void checkShutdown() {
+    if (shutdown) {
+      throw new RejectedExecutionException("Executor is shut down.");
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/util/SmallFloat.java
+++ b/lucene/core/src/java/org/apache/lucene/util/SmallFloat.java
@ -97,31 +97,74 @@ public class SmallFloat {
    return Float.intBitsToFloat(bits);
  }

-
-  /** floatToByte(b, mantissaBits=5, zeroExponent=2)
-   * <br>smallest nonzero value = 0.033203125
-   * <br>largest value = 1984.0
-   * <br>epsilon = 0.03125
-   */
-  public static byte floatToByte52(float f) {
-    int bits = Float.floatToRawIntBits(f);
-    int smallfloat = bits >> (24-5);
-    if (smallfloat <= (63-2)<<5) {
-      return (bits<=0) ? (byte)0 : (byte)1;
+  /** Float-like encoding for positive longs that preserves ordering and 4 significant bits. */
+  public static int longToInt4(long i) {
+    if (i < 0) {
+      throw new IllegalArgumentException("Only supports positive values, got " + i);
    }
-    if (smallfloat >= ((63-2)<<5) + 0x100) {
-      return -1;
+    int numBits = 64 - Long.numberOfLeadingZeros(i);
+    if (numBits < 4) {
+      // subnormal value
+      return Math.toIntExact(i);
+    } else {
+      // normal value
+      int shift = numBits - 4;
+      // only keep the 5 most significant bits
+      int encoded = Math.toIntExact(i >>> shift);
+      // clear the most significant bit, which is implicit
+      encoded &= 0x07;
+      // encode the shift, adding 1 because 0 is reserved for subnormal values
+      encoded |= (shift + 1) << 3;
+      return encoded;
    }
-    return (byte)(smallfloat - ((63-2)<<5));
  }

-  /** byteToFloat(b, mantissaBits=5, zeroExponent=2) */
-  public static float byte52ToFloat(byte b) {
-    // on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
-    // is only a little bit faster (anywhere from 0% to 7%)
-    if (b == 0) return 0.0f;
-    int bits = (b&0xff) << (24-5);
-    bits += (63-2) << 24;
-    return Float.intBitsToFloat(bits);
+  /**
+   * Decode values encoded with {@link #longToInt4(long)}.
+   */
+  public static final long int4ToLong(int i) {
+    long bits = i & 0x07;
+    int shift = (i >>> 3) - 1;
+    long decoded;
+    if (shift == -1) {
+      // subnormal value
+      decoded = bits;
+    } else {
+      // normal value
+      decoded = (bits | 0x08) << shift;
+    }
+    return decoded;
+  }
+
+  private static final int MAX_INT4 = longToInt4(Integer.MAX_VALUE);
+  private static final int NUM_FREE_VALUES = 255 - MAX_INT4;
+
+  /**
+   * Encode an integer to a byte. It is built upon {@link #longToInt4(long)}
+   * and leverages the fact that {@code longToInt4(Integer.MAX_VALUE)} is
+   * less than 255 to encode low values more accurately.
+   */
+  public static byte intToByte4(int i) {
+    if (i < 0) {
+      throw new IllegalArgumentException("Only supports positive values, got " + i);
+    }
+    if (i < NUM_FREE_VALUES) {
+      return (byte) i;
+    } else {
+      return (byte) (NUM_FREE_VALUES + longToInt4(i - NUM_FREE_VALUES));
+    }
+  }
+
+  /**
+   * Decode values that have been encoded with {@link #intToByte4(int)}.
+   */
+  public static int byte4ToInt(byte b) {
+    int i = Byte.toUnsignedInt(b);
+    if (i < NUM_FREE_VALUES) {
+      return i;
+    } else {
+      long decoded = NUM_FREE_VALUES + int4ToLong(i - NUM_FREE_VALUES);
+      return Math.toIntExact(decoded);
+    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/util/Version.java
+++ b/lucene/core/src/java/org/apache/lucene/util/Version.java
@ -115,6 +115,13 @@ public final class Version {
  @Deprecated
  public static final Version LUCENE_6_6_0 = new Version(6, 6, 0);

+  /**
+   * Match settings and bugs in Lucene's 6.7.0 release.
+   * @deprecated Use latest
+   */
+  @Deprecated
+  public static final Version LUCENE_6_7_0 = new Version(6, 7, 0);
+
  /**
   * Match settings and bugs in Lucene's 7.0.0 release.
   *  <p>
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@ -884,7 +884,7 @@ public class BKDWriter implements Closeable {
        };
      }

-      OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc) {
+      OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc, null, 0) {

          /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
          @Override
@ -1362,7 +1362,9 @@ public class BKDWriter implements Closeable {

  /** Called on exception, to check whether the checksum is also corrupt in this source, and add that
   *  information (checksum matched or didn't) as a suppressed exception. */
-  private void verifyChecksum(Throwable priorException, PointWriter writer) throws IOException {
+  private Error verifyChecksum(Throwable priorException, PointWriter writer) throws IOException {
+    assert priorException != null;
+
    // TODO: we could improve this, to always validate checksum as we recurse, if we shared left and
    // right reader after recursing to children, and possibly within recursed children,
    // since all together they make a single pass through the file.  But this is a sizable re-org,
@ -1373,10 +1375,10 @@ public class BKDWriter implements Closeable {
      try (ChecksumIndexInput in = tempDir.openChecksumInput(tempFileName, IOContext.READONCE)) {
        CodecUtil.checkFooter(in, priorException);
      }
-    } else {
-      // We are reading from heap; nothing to add:
-      IOUtils.reThrow(priorException);
    }
+    
+    // We are reading from heap; nothing to add:
+    throw IOUtils.rethrowAlways(priorException);
  }

  /** Marks bits for the ords (points) that belong in the right sub tree (those docs that have values >= the splitValue). */
@ -1398,7 +1400,7 @@ public class BKDWriter implements Closeable {
        reader.markOrds(rightCount-1, ordBitSet);
      }
    } catch (Throwable t) {
-      verifyChecksum(t, source.writer);
+      throw verifyChecksum(t, source.writer);
    }

    return scratch1;
@ -1469,10 +1471,7 @@ public class BKDWriter implements Closeable {
      }
      return new PathSlice(writer, 0, count);
    } catch (Throwable t) {
-      verifyChecksum(t, source.writer);
-
-      // Dead code but javac disagrees:
-      return null;
+      throw verifyChecksum(t, source.writer);
    }
  }

@ -1797,7 +1796,7 @@ public class BKDWriter implements Closeable {
          leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
          rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);
        } catch (Throwable t) {
-          verifyChecksum(t, slices[dim].writer);
+          throw verifyChecksum(t, slices[dim].writer);
        }
      }

--- a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java
+++ b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java
@ -48,7 +48,6 @@ import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZ
 * This class also provides helpers to explore the different paths of the {@link Automaton}.
 */
 public final class GraphTokenStreamFiniteStrings {
-  private final Map<BytesRef, Integer> termToID = new HashMap<>();
  private final Map<Integer, BytesRef> idToTerm = new HashMap<>();
  private final Map<Integer, Integer> idToInc = new HashMap<>();
  private final Automaton det;
@ -247,35 +246,18 @@ public final class GraphTokenStreamFiniteStrings {
  }

  /**
-   * Gets an integer id for a given term.
-   *
-   * If there is no position gaps for this token then we can reuse the id for the same term if it appeared at another
-   * position without a gap.  If we have a position gap generate a new id so we can keep track of the position
-   * increment.
+   * Gets an integer id for a given term and saves the position increment if needed.
   */
  private int getTermID(int incr, int prevIncr, BytesRef term) {
    assert term != null;
    boolean isStackedGap = incr == 0 && prevIncr > 1;
-    boolean hasGap = incr > 1;
-    Integer id;
-    if (hasGap || isStackedGap) {
-      id = idToTerm.size();
-      idToTerm.put(id, BytesRef.deepCopyOf(term));
-
-      // stacked token should have the same increment as original token at this position
-      if (isStackedGap) {
-        idToInc.put(id, prevIncr);
-      } else {
-        idToInc.put(id, incr);
-      }
-    } else {
-      id = termToID.get(term);
-      if (id == null) {
-        term = BytesRef.deepCopyOf(term);
-        id = idToTerm.size();
-        termToID.put(term, id);
-        idToTerm.put(id, term);
-      }
+    int id = idToTerm.size();
+    idToTerm.put(id, BytesRef.deepCopyOf(term));
+    // stacked token should have the same increment as original token at this position
+    if (isStackedGap) {
+      idToInc.put(id, prevIncr);
+    } else if (incr > 1) {
+      idToInc.put(id, incr);
    }
    return id;
  }
--- a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
@ -303,4 +303,17 @@ public class TestCodecUtil extends LuceneTestCase {
    fakeChecksum.set((1L << 32) - 1); // ok
    CodecUtil.writeCRC(fakeOutput);
  }
+
+  public void testTruncatedFileThrowsCorruptIndexException() throws IOException {
+    RAMFile file = new RAMFile();
+    IndexOutput output = new RAMOutputStream(file, false);
+    output.close();
+    IndexInput input = new RAMInputStream("file", file);
+    CorruptIndexException e = expectThrows(CorruptIndexException.class,
+        () -> CodecUtil.checksumEntireFile(input));
+    assertEquals("misplaced codec footer (file truncated?): length=0 but footerLength==16 (resource=RAMInputStream(name=file))", e.getMessage());
+    e = expectThrows(CorruptIndexException.class,
+        () -> CodecUtil.retrieveChecksum(input));
+    assertEquals("misplaced codec footer (file truncated?): length=0 but footerLength==16 (resource=RAMInputStream(name=file))", e.getMessage());
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
@ -237,8 +237,11 @@ public class TestDemoParallelLeafReader extends LuceneTestCase {
            firstExc = t;
          }
        }
+        
        // throw the first exception
-        IOUtils.reThrow(firstExc);
+        if (firstExc != null) {
+          throw IOUtils.rethrowAlways(firstExc);
+        }
      }

      @Override
@ -549,10 +552,11 @@ public class TestDemoParallelLeafReader extends LuceneTestCase {
            }
          }

-          // If any error occured, throw it.
-          IOUtils.reThrow(th);
+          if (th != null) {
+            throw IOUtils.rethrowAlways(th);
+          }
        }
-    
+
        @Override
        public void setMergeInfo(SegmentCommitInfo info) {
          // Record that this merged segment is current as of this schemaGen:
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@ -2403,4 +2403,86 @@ public class TestIndexSorting extends LuceneTestCase {
    }
    IOUtils.close(r, w, dir);
  }
+
+  public void testIndexSortWithSparseField() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    SortField sortField = new SortField("dense_int", SortField.Type.INT, true);
+    Sort indexSort = new Sort(sortField);
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    Field textField = newTextField("sparse_text", "", Field.Store.NO);
+    for (int i = 0; i < 128; i++) {
+      Document doc = new Document();
+      doc.add(new NumericDocValuesField("dense_int", i));
+      if (i < 64) {
+        doc.add(new NumericDocValuesField("sparse_int", i));
+        doc.add(new BinaryDocValuesField("sparse_binary", new BytesRef(Integer.toString(i))));
+        textField.setStringValue("foo");
+        doc.add(textField);
+      }
+      w.addDocument(doc);
+    }
+    w.commit();
+    w.forceMerge(1);
+    DirectoryReader r = DirectoryReader.open(w);
+    assertEquals(1, r.leaves().size());
+    LeafReader leafReader = r.leaves().get(0).reader();
+
+    NumericDocValues denseValues = leafReader.getNumericDocValues("dense_int");
+    NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse_int");
+    BinaryDocValues sparseBinaryValues = leafReader.getBinaryDocValues("sparse_binary");
+    NumericDocValues normsValues = leafReader.getNormValues("sparse_text");
+    for(int docID = 0; docID < 128; docID++) {
+      assertTrue(denseValues.advanceExact(docID));
+      assertEquals(127-docID, (int) denseValues.longValue());
+      if (docID >= 64) {
+        assertTrue(denseValues.advanceExact(docID));
+        assertTrue(sparseValues.advanceExact(docID));
+        assertTrue(sparseBinaryValues.advanceExact(docID));
+        assertTrue(normsValues.advanceExact(docID));
+        assertEquals(1, normsValues.longValue());
+        assertEquals(127-docID, (int) sparseValues.longValue());
+        assertEquals(new BytesRef(Integer.toString(127-docID)), sparseBinaryValues.binaryValue());
+      } else {
+        assertFalse(sparseBinaryValues.advanceExact(docID));
+        assertFalse(sparseValues.advanceExact(docID));
+        assertFalse(normsValues.advanceExact(docID));
+      }
+    }
+    IOUtils.close(r, w, dir);
+  }
+
+  public void testIndexSortOnSparseField() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    SortField sortField = new SortField("sparse", SortField.Type.INT, false);
+    sortField.setMissingValue(Integer.MIN_VALUE);
+    Sort indexSort = new Sort(sortField);
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    for (int i = 0; i < 128; i++) {
+      Document doc = new Document();
+      if (i < 64) {
+        doc.add(new NumericDocValuesField("sparse", i));
+      }
+      w.addDocument(doc);
+    }
+    w.commit();
+    w.forceMerge(1);
+    DirectoryReader r = DirectoryReader.open(w);
+    assertEquals(1, r.leaves().size());
+    LeafReader leafReader = r.leaves().get(0).reader();
+    NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse");
+    for(int docID = 0; docID < 128; docID++) {
+      if (docID >= 64) {
+        assertTrue(sparseValues.advanceExact(docID));
+        assertEquals(docID-64, (int) sparseValues.longValue());
+      } else {
+        assertFalse(sparseValues.advanceExact(docID));
+      }
+    }
+    IOUtils.close(r, w, dir);
+  }
+
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
@ -17,6 +17,7 @@
 package org.apache.lucene.index;


+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@ -26,7 +27,9 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.search.similarities.TFIDFSimilarity;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
@ -35,12 +38,12 @@ import org.apache.lucene.util.TestUtil;
 /**
 * Tests the maxTermFrequency statistic in FieldInvertState
 */
-public class TestMaxTermFrequency extends LuceneTestCase { 
+public class TestMaxTermFrequency extends LuceneTestCase {
  Directory dir;
  IndexReader reader;
  /* expected maxTermFrequency values for our documents */
  ArrayList<Integer> expected = new ArrayList<>();
-  
+
  @Override
  public void setUp() throws Exception {
    super.setUp();
@ -59,14 +62,14 @@ public class TestMaxTermFrequency extends LuceneTestCase {
    reader = writer.getReader();
    writer.close();
  }
-  
+
  @Override
  public void tearDown() throws Exception {
    reader.close();
    dir.close();
    super.tearDown();
  }
-  
+
  public void test() throws Exception {
    NumericDocValues fooNorms = MultiDocValues.getNormValues(reader, "foo");
    for (int i = 0; i < reader.maxDoc(); i++) {
@ -95,30 +98,42 @@ public class TestMaxTermFrequency extends LuceneTestCase {
    Collections.shuffle(terms, random());
    return Arrays.toString(terms.toArray(new String[terms.size()]));
  }
-  
+
  /**
   * Simple similarity that encodes maxTermFrequency directly as a byte
   */
-  static class TestSimilarity extends TFIDFSimilarity {
+  static class TestSimilarity extends Similarity {

    @Override
-    public float lengthNorm(FieldInvertState state) {
+    public long computeNorm(FieldInvertState state) {
      return state.getMaxTermFrequency();
    }

    @Override
-    public long encodeNormValue(float f) {
-      return (byte) f;
+    public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+      return new SimWeight() {};
    }

    @Override
-    public float decodeNormValue(long norm) {
-      return norm;
+    public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
+      return new SimScorer() {
+
+        @Override
+        public float score(int doc, float freq) throws IOException {
+          return 0;
+        }
+
+        @Override
+        public float computeSlopFactor(int distance) {
+          return 0;
+        }
+
+        @Override
+        public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+          return 0;
+        }
+      };
    }

-    @Override public float tf(float freq) { return 0; }
-    @Override public float idf(long docFreq, long docCount) { return 0; }
-    @Override public float sloppyFreq(int distance) { return 0; }
-    @Override public float scorePayload(int doc, int start, int end, BytesRef payload) { return 0; }
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
@ -32,13 +32,11 @@ import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
 import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
-import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;

 /**
@ -49,67 +47,6 @@ import org.apache.lucene.util.TestUtil;
@Slow
 public class TestNorms extends LuceneTestCase {
  static final String BYTE_TEST_FIELD = "normsTestByte";
-
-  static class CustomNormEncodingSimilarity extends TFIDFSimilarity {
-
-    @Override
-    public long encodeNormValue(float f) {
-      return (long) f;
-    }
-    
-    @Override
-    public float decodeNormValue(long norm) {
-      return norm;
-    }
-
-    @Override
-    public float lengthNorm(FieldInvertState state) {
-      return state.getLength();
-    }
-
-    @Override public float tf(float freq) { return 0; }
-    @Override public float idf(long docFreq, long docCount) { return 0; }
-    @Override public float sloppyFreq(int distance) { return 0; }
-    @Override public float scorePayload(int doc, int start, int end, BytesRef payload) { return 0; }
-  }
-  
-  // LUCENE-1260
-  public void testCustomEncoder() throws Exception {
-    Directory dir = newDirectory();
-    MockAnalyzer analyzer = new MockAnalyzer(random());
-
-    IndexWriterConfig config = newIndexWriterConfig(analyzer);
-    config.setSimilarity(new CustomNormEncodingSimilarity());
-    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
-    Document doc = new Document();
-    Field foo = newTextField("foo", "", Field.Store.NO);
-    Field bar = newTextField("bar", "", Field.Store.NO);
-    doc.add(foo);
-    doc.add(bar);
-    
-    for (int i = 0; i < 100; i++) {
-      bar.setStringValue("singleton");
-      writer.addDocument(doc);
-    }
-    
-    IndexReader reader = writer.getReader();
-    writer.close();
-    
-    NumericDocValues fooNorms = MultiDocValues.getNormValues(reader, "foo");
-    for (int i = 0; i < reader.maxDoc(); i++) {
-      assertEquals(i, fooNorms.nextDoc());
-      assertEquals(0, fooNorms.longValue());
-    }
-    
-    NumericDocValues barNorms = MultiDocValues.getNormValues(reader, "bar");
-    for (int i = 0; i < reader.maxDoc(); i++) {
-      assertEquals(i, barNorms.nextDoc());
-      assertEquals(1, barNorms.longValue());
-    }
-    
-    reader.close();
-    dir.close();
-  }
  
  public void testMaxByteNorms() throws IOException {
    Directory dir = newFSDirectory(createTempDir("TestNorms.testMaxByteNorms"));
--- a/lucene/core/src/test/org/apache/lucene/index/TestOmitTf.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestOmitTf.java
@ -44,9 +44,7 @@ import org.apache.lucene.util.LuceneTestCase;
 public class TestOmitTf extends LuceneTestCase {
  
  public static class SimpleSimilarity extends TFIDFSimilarity {
-    @Override public float decodeNormValue(long norm) { return norm; }
-    @Override public long encodeNormValue(float f) { return (long) f; }
-    @Override public float lengthNorm(FieldInvertState state) { return 1; }
+    @Override public float lengthNorm(int length) { return 1; }
    @Override public float tf(float freq) { return freq; }
    @Override public float sloppyFreq(int distance) { return 2.0f; }
    @Override public float idf(long docFreq, long docCount) { return 1.0f; }
--- a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
@ -30,7 +30,6 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@ -72,7 +71,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
    }
    
    @Override
-    public float lengthNorm(FieldInvertState state) {
+    public float lengthNorm(int length) {
      // Disable length norm
      return 1;
    }
--- a/lucene/core/src/test/org/apache/lucene/search/TestDoubleRangeFieldQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDoubleRangeFieldQueries.java
@ -31,11 +31,18 @@ public class TestDoubleRangeFieldQueries extends BaseRangeFieldQueryTestCase {
  private static final String FIELD_NAME = "doubleRangeField";

  private double nextDoubleInternal() {
-    if (rarely()) {
-      return random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
+    switch (random().nextInt(5)) {
+      case 0:
+        return Double.NEGATIVE_INFINITY;
+      case 1:
+        return Double.POSITIVE_INFINITY;
+      default:
+        if (random().nextBoolean()) {
+          return random().nextDouble();
+        } else {
+          return (random().nextInt(15) - 7) / 3d;
+        }
    }
-    double max = Double.MAX_VALUE / 2;
-    return (max + max) * random().nextDouble() - max;
  }

  @Override
--- a/lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDoubleValuesSource.java
@ -17,6 +17,7 @@

 package org.apache.lucene.search;

+import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;

@ -26,6 +27,7 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FloatDocValuesField;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
@ -164,4 +166,65 @@ public class TestDoubleValuesSource extends LuceneTestCase {
      CheckHits.checkEqual(query, expected.scoreDocs, actual.scoreDocs);
    }
  }
+
+  static final Query[] testQueries = new Query[]{
+      new MatchAllDocsQuery(),
+      new TermQuery(new Term("oddeven", "odd")),
+      new BooleanQuery.Builder()
+          .add(new TermQuery(new Term("english", "one")), BooleanClause.Occur.MUST)
+          .add(new TermQuery(new Term("english", "two")), BooleanClause.Occur.MUST)
+          .build()
+  };
+
+  public void testExplanations() throws Exception {
+    for (Query q : testQueries) {
+      testExplanations(q, DoubleValuesSource.fromIntField("int"));
+      testExplanations(q, DoubleValuesSource.fromLongField("long"));
+      testExplanations(q, DoubleValuesSource.fromFloatField("float"));
+      testExplanations(q, DoubleValuesSource.fromDoubleField("double"));
+      testExplanations(q, DoubleValuesSource.fromDoubleField("onefield"));
+      testExplanations(q, DoubleValuesSource.constant(5.45));
+      testExplanations(q, DoubleValuesSource.function(
+          DoubleValuesSource.fromDoubleField("double"), "v * 4 + 73",
+          v -> v * 4 + 73
+      ));
+      testExplanations(q, DoubleValuesSource.scoringFunction(
+          DoubleValuesSource.fromDoubleField("double"), "v * score", (v, s) -> v * s
+      ));
+    }
+  }
+
+  private void testExplanations(Query q, DoubleValuesSource vs) throws IOException {
+    searcher.search(q, new SimpleCollector() {
+
+      DoubleValues v;
+      LeafReaderContext ctx;
+
+      @Override
+      protected void doSetNextReader(LeafReaderContext context) throws IOException {
+        this.ctx = context;
+      }
+
+      @Override
+      public void setScorer(Scorer scorer) throws IOException {
+        this.v = vs.getValues(this.ctx, DoubleValuesSource.fromScorer(scorer));
+      }
+
+      @Override
+      public void collect(int doc) throws IOException {
+        Explanation scoreExpl = searcher.explain(q, ctx.docBase + doc);
+        if (this.v.advanceExact(doc)) {
+          CheckHits.verifyExplanation("", doc, (float) v.doubleValue(), true, vs.explain(ctx, doc, scoreExpl));
+        }
+        else {
+          assertFalse(vs.explain(ctx, doc, scoreExpl).isMatch());
+        }
+      }
+
+      @Override
+      public boolean needsScores() {
+        return vs.needsScores();
+      }
+    });
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java
@ -33,6 +33,7 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.FieldValueHitQueue.Entry;
+import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@ -63,7 +64,7 @@ public class TestElevationComparator extends LuceneTestCase {
    writer.close();

    IndexSearcher searcher = newSearcher(r);
-    searcher.setSimilarity(new ClassicSimilarity());
+    searcher.setSimilarity(new BM25Similarity());

    runTest(searcher, true);
    runTest(searcher, false);
@ -98,11 +99,11 @@ public class TestElevationComparator extends LuceneTestCase {
    assertEquals(3, topDocs.scoreDocs[1].doc);

    if (reversed) {
-      assertEquals(2, topDocs.scoreDocs[2].doc);
-      assertEquals(1, topDocs.scoreDocs[3].doc);
-    } else {
      assertEquals(1, topDocs.scoreDocs[2].doc);
      assertEquals(2, topDocs.scoreDocs[3].doc);
+    } else {
+      assertEquals(2, topDocs.scoreDocs[2].doc);
+      assertEquals(1, topDocs.scoreDocs[3].doc);
    }

    /*
--- a/lucene/core/src/test/org/apache/lucene/search/TestFloatRangeFieldQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestFloatRangeFieldQueries.java
@ -31,11 +31,18 @@ public class TestFloatRangeFieldQueries extends BaseRangeFieldQueryTestCase {
  private static final String FIELD_NAME = "floatRangeField";

  private float nextFloatInternal() {
-    if (rarely()) {
-      return random().nextBoolean() ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
+    switch (random().nextInt(5)) {
+      case 0:
+        return Float.NEGATIVE_INFINITY;
+      case 1:
+        return Float.POSITIVE_INFINITY;
+      default:
+        if (random().nextBoolean()) {
+          return random().nextFloat();
+        } else {
+          return (random().nextInt(15) - 7) / 3f;
+        }
    }
-    float max = Float.MAX_VALUE / 2;
-    return (max + max) * random().nextFloat() - max;
  }

  @Override
--- a/lucene/core/src/test/org/apache/lucene/search/TestIntRangeFieldQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestIntRangeFieldQueries.java
@ -23,6 +23,7 @@ import org.apache.lucene.document.IntRange;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.TestUtil;

 /**
 * Random testing for IntRange Queries.
@ -31,11 +32,25 @@ public class TestIntRangeFieldQueries extends BaseRangeFieldQueryTestCase {
  private static final String FIELD_NAME = "intRangeField";

  private int nextIntInternal() {
-    if (rarely()) {
-      return random().nextBoolean() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
+    switch (random().nextInt(5)) {
+      case 0:
+        return Integer.MIN_VALUE;
+      case 1:
+        return Integer.MAX_VALUE;
+      default:
+        int bpv = random().nextInt(32);
+        switch (bpv) {
+          case 32:
+            return random().nextInt();
+          default:
+            int v = TestUtil.nextInt(random(), 0, (1 << bpv) - 1);
+            if (bpv > 0) {
+              // negative values sometimes
+              v -= 1 << (bpv - 1);
+            }
+            return v;
+        }
    }
-    int max = Integer.MAX_VALUE / 2;
-    return (max + max) * random().nextInt() - max;
  }

  @Override
--- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
@ -660,12 +660,14 @@ public class TestLRUQueryCache extends LuceneTestCase {
      @Override
      protected void onQueryCache(Query query, long ramBytesUsed) {
        super.onQueryCache(query, ramBytesUsed);
+        assertNotNull("cached query is null", query);
        ramBytesUsage.addAndGet(ramBytesUsed);
      }

      @Override
      protected void onQueryEviction(Query query, long ramBytesUsed) {
        super.onQueryEviction(query, ramBytesUsed);
+        assertNotNull("evicted query is null", query);
        ramBytesUsage.addAndGet(-ramBytesUsed);
      }

--- a/lucene/core/src/test/org/apache/lucene/search/TestLongRangeFieldQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestLongRangeFieldQueries.java
@ -23,6 +23,7 @@ import org.apache.lucene.document.LongRange;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.TestUtil;

 /**
 * Random testing for LongRange Queries.
@ -31,11 +32,25 @@ public class TestLongRangeFieldQueries extends BaseRangeFieldQueryTestCase {
  private static final String FIELD_NAME = "longRangeField";

  private long nextLongInternal() {
-    if (rarely()) {
-      return random().nextBoolean() ? Long.MAX_VALUE : Long.MIN_VALUE;
+    switch (random().nextInt(5)) {
+      case 0:
+        return Long.MIN_VALUE;
+      case 1:
+        return Long.MAX_VALUE;
+      default:
+        int bpv = random().nextInt(64);
+        switch (bpv) {
+          case 64:
+            return random().nextLong();
+          default:
+            long v = TestUtil.nextLong(random(), 0, (1L << bpv) - 1);
+            if (bpv > 0) {
+              // negative values sometimes
+              v -= 1L << (bpv - 1);
+            }
+            return v;
+        }
    }
-    long max = Long.MAX_VALUE / 2;
-    return (max + max) * random().nextLong() - max;
  }

  @Override
--- a/Show More
+++ b/Show More