Merge branch 'apache-https-master' into jira/solr-8593

2017-01-18 10:45:57 -05:00 · 2017-01-18 10:45:57 -05:00 · db6a3393a9
parent 292e518878 71aa463d4b
commit db6a3393a9
72 changed files with 4061 additions and 537 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -63,16 +63,42 @@ Other
 ======================= Lucene 6.5.0 =======================
 API Changes
 * LUCENE-7624: TermsQuery has been renamed as TermInSetQuery and moved to core.
  (Alan Woodward)
 * LUCENE-7637: TermInSetQuery requires that all terms come from the same field.
  (Adrien Grand)
 New Features
 * LUCENE-7623: Add FunctionScoreQuery and FunctionMatchQuery (Alan Woodward,
  Adrien Grand, David Smiley)
 * LUCENE-7619: Add WordDelimiterGraphFilter, just like
  WordDelimiterFilter except it produces correct token graphs so that
  proximity queries at search time will produce correct results (Mike
  McCandless)
 Bug Fixes
 * LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads
  and preserve all attributes. (Nathan Gass via Uwe Schindler)
 Improvements
 * LUCENE-7055: Added Weight#scorerSupplier, which allows to estimate the cost
  of a Scorer before actually building it, in order to optimize how the query
  should be run, eg. using points or doc values depending on costs of other
  parts of the query. (Adrien Grand)
 Optimizations
 * LUCENE-7641: Optimized point range queries to compute documents that do not
  match the range on single-valued fields when more than half the documents in
  the index would match. (Adrien Grand)
 ======================= Lucene 6.4.0 =======================
 API Changes
@ -100,9 +126,6 @@ API Changes
 * LUCENE-7611: DocumentValueSourceDictionary now takes a LongValuesSource
  as a parameter, and the ValueSource equivalent is deprecated (Alan Woodward)
 * LUCENE-7624: TermsQuery has been renamed as TermInSetQuery and moved to core.
  (Alan Woodward)
 New features
 * LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilter.java
@ -15,7 +15,7 @@
 * limitations under the License.
 */
-package org.apache.lucene.analysis.synonym;
+package org.apache.lucene.analysis.core;
 import java.io.IOException;
 import java.util.ArrayList;
@ -23,6 +23,7 @@ import java.util.List;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
@ -410,8 +411,8 @@ public final class FlattenGraphFilter extends TokenFilter {
    maxLookaheadUsed = 0;
  }
-  // for testing
+  /** For testing */
-  int getMaxLookaheadUsed() {
+  public int getMaxLookaheadUsed() {
    return maxLookaheadUsed;
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FlattenGraphFilterFactory.java
@ -15,7 +15,7 @@
 * limitations under the License.
 */
-package org.apache.lucene.analysis.synonym;
+package org.apache.lucene.analysis.core;
 import java.util.Map;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.InPlaceMergeSorter;
@ -80,7 +81,12 @@ import org.apache.lucene.util.InPlaceMergeSorter;
 * the current {@link StandardTokenizer} immediately removes many intra-word
 * delimiters, it is recommended that this filter be used after a tokenizer that
 * does not do this (such as {@link WhitespaceTokenizer}).
 *
 * @deprecated Use {@link WordDelimiterGraphFilter} instead: it produces a correct
 * token graph so that e.g. {@link PhraseQuery} works correctly when it's used in
 * the search time analyzer.
 */
@Deprecated
 public final class WordDelimiterFilter extends TokenFilter {
  public static final int LOWER = 0x01;
@ -116,7 +122,7 @@ public final class WordDelimiterFilter extends TokenFilter {
  /**
   * Causes maximum runs of word parts to be catenated:
   * <p>
-   * "wi-fi" =&gt; "wifi"
+   * "500-42" =&gt; "50042"
   */
  public static final int CATENATE_NUMBERS = 8;
@ -494,7 +500,6 @@ public final class WordDelimiterFilter extends TokenFilter {
  private void generatePart(boolean isSingleWord) {
    clearAttributes();
    termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
    int startOffset = savedStartOffset + iterator.current;
    int endOffset = savedStartOffset + iterator.end;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.search.PhraseQuery;
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
@ -47,7 +48,12 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
 *             types="wdfftypes.txt" /&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
 *
 * @deprecated Use {@link WordDelimiterGraphFilterFactory} instead: it produces a correct
 * token graph so that e.g. {@link PhraseQuery} works correctly when it's used in
 * the search time analyzer.
 */
@Deprecated
 public class WordDelimiterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
  public static final String PROTECTED_TOKENS = "protected";
  public static final String TYPES = "types";
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
@ -0,0 +1,706 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */ 
 package org.apache.lucene.analysis.miscellaneous;
 import java.io.IOException;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.RamUsageEstimator;
 /**
 * Splits words into subwords and performs optional transformations on subword
 * groups, producing a correct token graph so that e.g. {@link PhraseQuery} can
 * work correctly when this filter is used in the search-time analyzer.  Unlike
 * the deprecated {@link WordDelimiterFilter}, this token filter produces a
 * correct token graph as output.  However, it cannot consume an input token
 * graph correctly.
 *
 * <p>
 * Words are split into subwords with the following rules:
 * <ul>
 * <li>split on intra-word delimiters (by default, all non alpha-numeric
 * characters): <code>"Wi-Fi"</code> &#8594; <code>"Wi", "Fi"</code></li>
 * <li>split on case transitions: <code>"PowerShot"</code> &#8594;
 * <code>"Power", "Shot"</code></li>
 * <li>split on letter-number transitions: <code>"SD500"</code> &#8594;
 * <code>"SD", "500"</code></li>
 * <li>leading and trailing intra-word delimiters on each subword are ignored:
 * <code>"//hello---there, 'dude'"</code> &#8594;
 * <code>"hello", "there", "dude"</code></li>
 * <li>trailing "'s" are removed for each subword: <code>"O'Neil's"</code>
 * &#8594; <code>"O", "Neil"</code>
 * <ul>
 * <li>Note: this step isn't performed in a separate filter because of possible
 * subword combinations.</li>
 * </ul>
 * </li>
 * </ul>
 * 
 * The <b>combinations</b> parameter affects how subwords are combined:
 * <ul>
 * <li>combinations="0" causes no subword combinations: <code>"PowerShot"</code>
 * &#8594; <code>0:"Power", 1:"Shot"</code> (0 and 1 are the token positions)</li>
 * <li>combinations="1" means that in addition to the subwords, maximum runs of
 * non-numeric subwords are catenated and produced at the same position of the
 * last subword in the run:
 * <ul>
 * <li><code>"PowerShot"</code> &#8594;
 * <code>0:"Power", 1:"Shot" 1:"PowerShot"</code></li>
 * <li><code>"A's+B's&amp;C's"</code> &gt; <code>0:"A", 1:"B", 2:"C", 2:"ABC"</code>
 * </li>
 * <li><code>"Super-Duper-XL500-42-AutoCoder!"</code> &#8594;
 * <code>0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"</code>
 * </li>
 * </ul>
 * </li>
 * </ul>
 * One use for {@link WordDelimiterGraphFilter} is to help match words with different
 * subword delimiters. For example, if the source text contained "wi-fi" one may
 * want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. One way of doing so
 * is to specify combinations="1" in the analyzer used for indexing, and
 * combinations="0" (the default) in the analyzer used for querying. Given that
 * the current {@link StandardTokenizer} immediately removes many intra-word
 * delimiters, it is recommended that this filter be used after a tokenizer that
 * does not do this (such as {@link WhitespaceTokenizer}).
 */
 public final class WordDelimiterGraphFilter extends TokenFilter {
  /**
   * Causes parts of words to be generated:
   * <p>
   * "PowerShot" =&gt; "Power" "Shot"
   */
  public static final int GENERATE_WORD_PARTS = 1;
  /**
   * Causes number subwords to be generated:
   * <p>
   * "500-42" =&gt; "500" "42"
   */
  public static final int GENERATE_NUMBER_PARTS = 2;
  /**
   * Causes maximum runs of word parts to be catenated:
   * <p>
   * "wi-fi" =&gt; "wifi"
   */
  public static final int CATENATE_WORDS = 4;
  /**
   * Causes maximum runs of number parts to be catenated:
   * <p>
   * "500-42" =&gt; "50042"
   */
  public static final int CATENATE_NUMBERS = 8;
  /**
   * Causes all subword parts to be catenated:
   * <p>
   * "wi-fi-4000" =&gt; "wifi4000"
   */
  public static final int CATENATE_ALL = 16;
  /**
   * Causes original words are preserved and added to the subword list (Defaults to false)
   * <p>
   * "500-42" =&gt; "500" "42" "500-42"
   */
  public static final int PRESERVE_ORIGINAL = 32;
  /**
   * Causes lowercase -&gt; uppercase transition to start a new subword.
   */
  public static final int SPLIT_ON_CASE_CHANGE = 64;
  /**
   * If not set, causes numeric changes to be ignored (subwords will only be generated
   * given SUBWORD_DELIM tokens).
   */
  public static final int SPLIT_ON_NUMERICS = 128;
  /**
   * Causes trailing "'s" to be removed for each subword
   * <p>
   * "O'Neil's" =&gt; "O", "Neil"
   */
  public static final int STEM_ENGLISH_POSSESSIVE = 256;
  /**
   * If not null is the set of tokens to protect from being delimited
   *
   */
  final CharArraySet protWords;
  private final int flags;
  // packs start pos, end pos, start part, end part (= slice of the term text) for each buffered part:
  private int[] bufferedParts = new int[16];
  private int bufferedLen;
  private int bufferedPos;
  // holds text for each buffered part, or null if it's a simple slice of the original term
  private char[][] bufferedTermParts = new char[4][];
  private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
  private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
  private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
  private final PositionLengthAttribute posLenAttribute = addAttribute(PositionLengthAttribute.class);
  private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
  // used for iterating word delimiter breaks
  private final WordDelimiterIterator iterator;
  // used for concatenating runs of similar typed subwords (word,number)
  private final WordDelimiterConcatenation concat = new WordDelimiterConcatenation();
  // number of subwords last output by concat.
  private int lastConcatCount;
  // used for catenate all
  private final WordDelimiterConcatenation concatAll = new WordDelimiterConcatenation();
  // used for accumulating position increment gaps so that we preserve incoming holes:
  private int accumPosInc;
  private char[] savedTermBuffer = new char[16];
  private int savedTermLength;
  private int savedStartOffset;
  private int savedEndOffset;
  private AttributeSource.State savedState;
  private int lastStartOffset;
  // if length by start + end offsets doesn't match the term text then assume
  // this is a synonym and don't adjust the offsets.
  private boolean hasIllegalOffsets;
  private int wordPos;
  /**
   * Creates a new WordDelimiterGraphFilter
   *
   * @param in TokenStream to be filtered
   * @param charTypeTable table containing character types
   * @param configurationFlags Flags configuring the filter
   * @param protWords If not null is the set of tokens to protect from being delimited
   */
  public WordDelimiterGraphFilter(TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) {
    super(in);
    if ((configurationFlags &
        ~(GENERATE_WORD_PARTS |
          GENERATE_NUMBER_PARTS |
          CATENATE_WORDS |
          CATENATE_NUMBERS |
          CATENATE_ALL |
          PRESERVE_ORIGINAL |
          SPLIT_ON_CASE_CHANGE |
          SPLIT_ON_NUMERICS |
          STEM_ENGLISH_POSSESSIVE)) != 0) {
      throw new IllegalArgumentException("flags contains unrecognized flag: " + configurationFlags);
    }
    this.flags = configurationFlags;
    this.protWords = protWords;
    this.iterator = new WordDelimiterIterator(
        charTypeTable, has(SPLIT_ON_CASE_CHANGE), has(SPLIT_ON_NUMERICS), has(STEM_ENGLISH_POSSESSIVE));
  }
  /**
   * Creates a new WordDelimiterGraphFilter using {@link WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE}
   * as its charTypeTable
   *
   * @param in TokenStream to be filtered
   * @param configurationFlags Flags configuring the filter
   * @param protWords If not null is the set of tokens to protect from being delimited
   */
  public WordDelimiterGraphFilter(TokenStream in, int configurationFlags, CharArraySet protWords) {
    this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords);
  }
  /** Iterates all words parts and concatenations, buffering up the term parts we should return. */
  private void bufferWordParts() throws IOException {
    saveState();
    // if length by start + end offsets doesn't match the term's text then set offsets for all our word parts/concats to the incoming
    // offsets.  this can happen if WDGF is applied to an injected synonym, or to a stem'd form, etc:
    hasIllegalOffsets = (savedEndOffset - savedStartOffset != savedTermLength);
    bufferedLen = 0;
    lastConcatCount = 0;
    wordPos = 0;
    if (iterator.isSingleWord()) {
      buffer(wordPos, wordPos+1, iterator.current, iterator.end);
      wordPos++;
      iterator.next();
    } else {
      // iterate all words parts, possibly buffering them, building up concatenations and possibly buffering them too:
      while (iterator.end != WordDelimiterIterator.DONE) {
        int wordType = iterator.type();
        // do we already have queued up incompatible concatenations?
        if (concat.isNotEmpty() && (concat.type & wordType) == 0) {
          flushConcatenation(concat);
        }
        // add subwords depending upon options
        if (shouldConcatenate(wordType)) {
          concatenate(concat);
        }
        // add all subwords (catenateAll)
        if (has(CATENATE_ALL)) {
          concatenate(concatAll);
        }
        // if we should output the word or number part
        if (shouldGenerateParts(wordType)) {
          buffer(wordPos, wordPos+1, iterator.current, iterator.end);
          wordPos++;
        }
        iterator.next();
      }
      if (concat.isNotEmpty()) {
        // flush final concatenation
        flushConcatenation(concat);
      }
      if (concatAll.isNotEmpty()) {
        // only if we haven't output this same combo above, e.g. PowerShot with CATENATE_WORDS:
        if (concatAll.subwordCount > lastConcatCount) {
          if (wordPos == concatAll.startPos) {
            // we are not generating parts, so we must advance wordPos now
            wordPos++;
          }
          concatAll.write();
        }
        concatAll.clear();
      }
    }
    if (has(PRESERVE_ORIGINAL)) {
      if (wordPos == 0) {
        // can happen w/ strange flag combos and inputs :)
        wordPos++;
      }
      // add the original token now so that we can set the correct end position
      buffer(0, wordPos, 0, savedTermLength);
    }
    sorter.sort(0, bufferedLen);
    wordPos = 0;
    // set back to 0 for iterating from the buffer
    bufferedPos = 0;
  }
  @Override
  public boolean incrementToken() throws IOException {
    while (true) {
      if (savedState == null) {
        // process a new input token
        if (input.incrementToken() == false) {
          return false;
        }
        int termLength = termAttribute.length();
        char[] termBuffer = termAttribute.buffer();
        accumPosInc += posIncAttribute.getPositionIncrement();
        // iterate & cache all word parts up front:
        iterator.setText(termBuffer, termLength);
        iterator.next();
        // word of no delimiters, or protected word: just return it
        if ((iterator.current == 0 && iterator.end == termLength) ||
            (protWords != null && protWords.contains(termBuffer, 0, termLength))) {
          posIncAttribute.setPositionIncrement(accumPosInc);
          accumPosInc = 0;
          return true;
        }
        // word of simply delimiters: swallow this token, creating a hole, and move on to next token
        if (iterator.end == WordDelimiterIterator.DONE) {
          if (has(PRESERVE_ORIGINAL) == false) {
            continue;
          } else {
            return true;
          }
        }
        // otherwise, we have delimiters, process & buffer all parts:
        bufferWordParts();
      }
      if (bufferedPos < bufferedLen) {
        clearAttributes();
        restoreState(savedState);
        char[] termPart = bufferedTermParts[bufferedPos];
        int startPos = bufferedParts[4*bufferedPos];
        int endPos = bufferedParts[4*bufferedPos+1];
        int startPart = bufferedParts[4*bufferedPos+2];
        int endPart = bufferedParts[4*bufferedPos+3];
        bufferedPos++;
        int startOffset;
        int endOffset;
        if (hasIllegalOffsets) {
          startOffset = savedStartOffset;
          endOffset = savedEndOffset;
        } else {
          startOffset = savedStartOffset + startPart;
          endOffset = savedStartOffset + endPart;
        }
        // never let offsets go backwards:
        startOffset = Math.max(startOffset, lastStartOffset);
        endOffset = Math.max(endOffset, lastStartOffset);
        offsetAttribute.setOffset(startOffset, endOffset);
        lastStartOffset = startOffset;
        if (termPart == null) {
          termAttribute.copyBuffer(savedTermBuffer, startPart, endPart - startPart);
        } else {
          termAttribute.copyBuffer(termPart, 0, termPart.length);
        }
        posIncAttribute.setPositionIncrement(accumPosInc + startPos - wordPos);
        accumPosInc = 0;
        posLenAttribute.setPositionLength(endPos - startPos);
        wordPos = startPos;
        return true;
      }
      // no saved concatenations, on to the next input word
      savedState = null;
    }
  }
  @Override
  public void reset() throws IOException {
    super.reset();
    accumPosInc = 0;
    savedState = null;
    lastStartOffset = 0;
    concat.clear();
    concatAll.clear();
  }
  // ================================================= Helper Methods ================================================
  private class PositionSorter extends InPlaceMergeSorter {
    @Override
    protected int compare(int i, int j) {
      // sort by smaller start position
      int iPosStart = bufferedParts[4*i];
      int jPosStart = bufferedParts[4*j];
      int cmp = Integer.compare(iPosStart, jPosStart);
      if (cmp != 0) {
        return cmp;
      }
      // tie break by longest pos length:
      int iPosEnd = bufferedParts[4*i+1];
      int jPosEnd = bufferedParts[4*j+1];
      return Integer.compare(jPosEnd, iPosEnd);
    }
    @Override
    protected void swap(int i, int j) {
      int iOffset = 4*i;
      int jOffset = 4*j;
      for(int x=0;x<4;x++) {
        int tmp = bufferedParts[iOffset+x];
        bufferedParts[iOffset+x] = bufferedParts[jOffset+x];
        bufferedParts[jOffset+x] = tmp;
      }
      char[] tmp2 = bufferedTermParts[i];
      bufferedTermParts[i] = bufferedTermParts[j];
      bufferedTermParts[j] = tmp2;
    }
  }
  final PositionSorter sorter = new PositionSorter();
  /** 
   * startPos, endPos -> graph start/end position
   * startPart, endPart -> slice of the original term for this part
   */
  void buffer(int startPos, int endPos, int startPart, int endPart) {
    buffer(null, startPos, endPos, startPart, endPart);
  }
  /** 
   * a null termPart means it's a simple slice of the original term
   */
  void buffer(char[] termPart, int startPos, int endPos, int startPart, int endPart) {
    /*
    System.out.println("buffer: pos=" + startPos + "-" + endPos + " part=" + startPart + "-" + endPart);
    if (termPart != null) {
      System.out.println("  termIn=" + new String(termPart));
    } else {
      System.out.println("  term=" + new String(savedTermBuffer, startPart, endPart-startPart));
    }
    */
    assert endPos > startPos: "startPos=" + startPos + " endPos=" + endPos;
    assert endPart > startPart || (endPart == 0 && startPart == 0 && savedTermLength == 0): "startPart=" + startPart + " endPart=" + endPart;
    if ((bufferedLen+1)*4 > bufferedParts.length) {
      bufferedParts = ArrayUtil.grow(bufferedParts, (bufferedLen+1)*4);
    }
    if (bufferedTermParts.length == bufferedLen) {
      int newSize = ArrayUtil.oversize(bufferedLen+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
      char[][] newArray = new char[newSize][];
      System.arraycopy(bufferedTermParts, 0, newArray, 0, bufferedTermParts.length);
      bufferedTermParts = newArray;
    }
    bufferedTermParts[bufferedLen] = termPart;
    bufferedParts[bufferedLen*4] = startPos;
    bufferedParts[bufferedLen*4+1] = endPos;
    bufferedParts[bufferedLen*4+2] = startPart;
    bufferedParts[bufferedLen*4+3] = endPart;
    bufferedLen++;
  }
  /**
   * Saves the existing attribute states
   */
  private void saveState() {
    savedTermLength = termAttribute.length();
    savedStartOffset = offsetAttribute.startOffset();
    savedEndOffset = offsetAttribute.endOffset();
    savedState = captureState();
    if (savedTermBuffer.length < savedTermLength) {
      savedTermBuffer = new char[ArrayUtil.oversize(savedTermLength, Character.BYTES)];
    }
    System.arraycopy(termAttribute.buffer(), 0, savedTermBuffer, 0, savedTermLength);
  }
  /**
   * Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
   *
   * @param concat WordDelimiterConcatenation that will be flushed
   */
  private void flushConcatenation(WordDelimiterConcatenation concat) {
    if (wordPos == concat.startPos) {
      // we are not generating parts, so we must advance wordPos now
      wordPos++;
    }
    lastConcatCount = concat.subwordCount;
    if (concat.subwordCount != 1 || shouldGenerateParts(concat.type) == false) {
      concat.write();
    }
    concat.clear();
  }
  /**
   * Determines whether to concatenate a word or number if the current word is the given type
   *
   * @param wordType Type of the current word used to determine if it should be concatenated
   * @return {@code true} if concatenation should occur, {@code false} otherwise
   */
  private boolean shouldConcatenate(int wordType) {
    return (has(CATENATE_WORDS) && WordDelimiterIterator.isAlpha(wordType)) || (has(CATENATE_NUMBERS) && WordDelimiterIterator.isDigit(wordType));
  }
  /**
   * Determines whether a word/number part should be generated for a word of the given type
   *
   * @param wordType Type of the word used to determine if a word/number part should be generated
   * @return {@code true} if a word/number part should be generated, {@code false} otherwise
   */
  private boolean shouldGenerateParts(int wordType) {
    return (has(GENERATE_WORD_PARTS) && WordDelimiterIterator.isAlpha(wordType)) || (has(GENERATE_NUMBER_PARTS) && WordDelimiterIterator.isDigit(wordType));
  }
  /**
   * Concatenates the saved buffer to the given WordDelimiterConcatenation
   *
   * @param concatenation WordDelimiterConcatenation to concatenate the buffer to
   */
  private void concatenate(WordDelimiterConcatenation concatenation) {
    if (concatenation.isEmpty()) {
      concatenation.type = iterator.type();
      concatenation.startPart = iterator.current;
      concatenation.startPos = wordPos;
    }
    concatenation.append(savedTermBuffer, iterator.current, iterator.end - iterator.current);
    concatenation.endPart = iterator.end;
  }
  /**
   * Determines whether the given flag is set
   *
   * @param flag Flag to see if set
   * @return {@code true} if flag is set
   */
  private boolean has(int flag) {
    return (flags & flag) != 0;
  }
  // ================================================= Inner Classes =================================================
  /**
   * A WDF concatenated 'run'
   */
  final class WordDelimiterConcatenation {
    final StringBuilder buffer = new StringBuilder();
    int startPart;
    int endPart;
    int startPos;
    int type;
    int subwordCount;
    /**
     * Appends the given text of the given length, to the concetenation at the given offset
     *
     * @param text Text to append
     * @param offset Offset in the concetenation to add the text
     * @param length Length of the text to append
     */
    void append(char text[], int offset, int length) {
      buffer.append(text, offset, length);
      subwordCount++;
    }
    /**
     * Writes the concatenation to part buffer
     */
    void write() {
      char[] termPart = new char[buffer.length()];
      buffer.getChars(0, buffer.length(), termPart, 0);
      buffer(termPart, startPos, wordPos, startPart, endPart);
    }
    /**
     * Determines if the concatenation is empty
     *
     * @return {@code true} if the concatenation is empty, {@code false} otherwise
     */
    boolean isEmpty() {
      return buffer.length() == 0;
    }
    boolean isNotEmpty() {
      return isEmpty() == false;
    }
    /**
     * Clears the concatenation and resets its state
     */
    void clear() {
      buffer.setLength(0);
      startPart = endPart = type = subwordCount = 0;
    }
  }
  /** Returns string representation of configuration flags */
  public static String flagsToString(int flags) {
    StringBuilder b = new StringBuilder();
    if ((flags & GENERATE_WORD_PARTS) != 0) {
      b.append("GENERATE_WORD_PARTS");
    }
    if ((flags & GENERATE_NUMBER_PARTS) != 0) {
      if (b.length() > 0) {
        b.append(" | ");
      }
      b.append("GENERATE_NUMBER_PARTS");
    }
    if ((flags & CATENATE_WORDS) != 0) {
      if (b.length() > 0) {
        b.append(" | ");
      }
      b.append("CATENATE_WORDS");
    }
    if ((flags & CATENATE_NUMBERS) != 0) {
      if (b.length() > 0) {
        b.append(" | ");
      }
      b.append("CATENATE_NUMBERS");
    }
    if ((flags & CATENATE_ALL) != 0) {
      if (b.length() > 0) {
        b.append(" | ");
      }
      b.append("CATENATE_ALL");
    }
    if ((flags & PRESERVE_ORIGINAL) != 0) {
      if (b.length() > 0) {
        b.append(" | ");
      }
      b.append("PRESERVE_ORIGINAL");
    }
    if ((flags & SPLIT_ON_CASE_CHANGE) != 0) {
      if (b.length() > 0) {
        b.append(" | ");
      }
      b.append("SPLIT_ON_CASE_CHANGE");
    }
    if ((flags & SPLIT_ON_NUMERICS) != 0) {
      if (b.length() > 0) {
        b.append(" | ");
      }
      b.append("SPLIT_ON_NUMERICS");
    }
    if ((flags & STEM_ENGLISH_POSSESSIVE) != 0) {
      if (b.length() > 0) {
        b.append(" | ");
      }
      b.append("STEM_ENGLISH_POSSESSIVE");
    }
    return b.toString();
  }
  @Override
  public String toString() {
    StringBuilder b = new StringBuilder();
    b.append("WordDelimiterGraphFilter(flags=");
    b.append(flagsToString(flags));
    b.append(')');
    return b.toString();
  }
  // questions:
  // negative numbers?  -42 indexed as just 42?
  // dollar sign?  $42
  // percent sign?  33%
  // downsides:  if source text is "powershot" then a query of "PowerShot" won't match!
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilterFactory.java
@ -0,0 +1,199 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.analysis.miscellaneous;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.*;
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator.*;
 /**
 * Factory for {@link WordDelimiterGraphFilter}.
 * <pre class="prettyprint">
 * &lt;fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100"&gt;
 *   &lt;analyzer&gt;
 *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
 *     &lt;filter class="solr.WordDelimiterGraphFilterFactory" protected="protectedword.txt"
 *             preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1"
 *             catenateWords="0" catenateNumbers="0" catenateAll="0"
 *             generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"
 *             types="wdfftypes.txt" /&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
 */
 public class WordDelimiterGraphFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
  public static final String PROTECTED_TOKENS = "protected";
  public static final String TYPES = "types";
  private final String wordFiles;
  private final String types;
  private final int flags;
  byte[] typeTable = null;
  private CharArraySet protectedWords = null;
  /** Creates a new WordDelimiterGraphFilterFactory */
  public WordDelimiterGraphFilterFactory(Map<String, String> args) {
    super(args);
    int flags = 0;
    if (getInt(args, "generateWordParts", 1) != 0) {
      flags |= GENERATE_WORD_PARTS;
    }
    if (getInt(args, "generateNumberParts", 1) != 0) {
      flags |= GENERATE_NUMBER_PARTS;
    }
    if (getInt(args, "catenateWords", 0) != 0) {
      flags |= CATENATE_WORDS;
    }
    if (getInt(args, "catenateNumbers", 0) != 0) {
      flags |= CATENATE_NUMBERS;
    }
    if (getInt(args, "catenateAll", 0) != 0) {
      flags |= CATENATE_ALL;
    }
    if (getInt(args, "splitOnCaseChange", 1) != 0) {
      flags |= SPLIT_ON_CASE_CHANGE;
    }
    if (getInt(args, "splitOnNumerics", 1) != 0) {
      flags |= SPLIT_ON_NUMERICS;
    }
    if (getInt(args, "preserveOriginal", 0) != 0) {
      flags |= PRESERVE_ORIGINAL;
    }
    if (getInt(args, "stemEnglishPossessive", 1) != 0) {
      flags |= STEM_ENGLISH_POSSESSIVE;
    }
    wordFiles = get(args, PROTECTED_TOKENS);
    types = get(args, TYPES);
    this.flags = flags;
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
  }
  @Override
  public void inform(ResourceLoader loader) throws IOException {
    if (wordFiles != null) {  
      protectedWords = getWordSet(loader, wordFiles, false);
    }
    if (types != null) {
      List<String> files = splitFileNames( types );
      List<String> wlist = new ArrayList<>();
      for( String file : files ){
        List<String> lines = getLines(loader, file.trim());
        wlist.addAll( lines );
      }
      typeTable = parseTypes(wlist);
    }
  }
  @Override
  public TokenFilter create(TokenStream input) {
    return new WordDelimiterGraphFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
                                        flags, protectedWords);
  }
  // source => type
  private static Pattern typePattern = Pattern.compile( "(.*)\\s*=>\\s*(.*)\\s*$" );
  // parses a list of MappingCharFilter style rules into a custom byte[] type table
  private byte[] parseTypes(List<String> rules) {
    SortedMap<Character,Byte> typeMap = new TreeMap<>();
    for( String rule : rules ){
      Matcher m = typePattern.matcher(rule);
      if( !m.find() )
        throw new IllegalArgumentException("Invalid Mapping Rule : [" + rule + "]");
      String lhs = parseString(m.group(1).trim());
      Byte rhs = parseType(m.group(2).trim());
      if (lhs.length() != 1)
        throw new IllegalArgumentException("Invalid Mapping Rule : [" + rule + "]. Only a single character is allowed.");
      if (rhs == null)
        throw new IllegalArgumentException("Invalid Mapping Rule : [" + rule + "]. Illegal type.");
      typeMap.put(lhs.charAt(0), rhs);
    }
    // ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance
    byte types[] = new byte[Math.max(typeMap.lastKey()+1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.length)];
    for (int i = 0; i < types.length; i++)
      types[i] = WordDelimiterIterator.getType(i);
    for (Map.Entry<Character,Byte> mapping : typeMap.entrySet())
      types[mapping.getKey()] = mapping.getValue();
    return types;
  }
  private Byte parseType(String s) {
    if (s.equals("LOWER"))
      return LOWER;
    else if (s.equals("UPPER"))
      return UPPER;
    else if (s.equals("ALPHA"))
      return ALPHA;
    else if (s.equals("DIGIT"))
      return DIGIT;
    else if (s.equals("ALPHANUM"))
      return ALPHANUM;
    else if (s.equals("SUBWORD_DELIM"))
      return SUBWORD_DELIM;
    else
      return null;
  }
  char[] out = new char[256];
  private String parseString(String s){
    int readPos = 0;
    int len = s.length();
    int writePos = 0;
    while( readPos < len ){
      char c = s.charAt( readPos++ );
      if( c == '\\' ){
        if( readPos >= len )
          throw new IllegalArgumentException("Invalid escaped char in [" + s + "]");
        c = s.charAt( readPos++ );
        switch( c ) {
          case '\\' : c = '\\'; break;
          case 'n' : c = '\n'; break;
          case 't' : c = '\t'; break;
          case 'r' : c = '\r'; break;
          case 'b' : c = '\b'; break;
          case 'f' : c = '\f'; break;
          case 'u' :
            if( readPos + 3 >= len )
              throw new IllegalArgumentException("Invalid escaped char in [" + s + "]");
            c = (char)Integer.parseInt( s.substring( readPos, readPos + 4 ), 16 );
            readPos += 4;
            break;
        }
      }
      out[writePos++] = c;
    }
    return new String( out, 0, writePos );
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java
@ -16,15 +16,21 @@
 */
 package org.apache.lucene.analysis.miscellaneous;
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
 /**
- * A BreakIterator-like API for iterating over subwords in text, according to WordDelimiterFilter rules.
+ * A BreakIterator-like API for iterating over subwords in text, according to WordDelimiterGraphFilter rules.
 * @lucene.internal
 */
 public final class WordDelimiterIterator {
  static final int LOWER = 0x01;
  static final int UPPER = 0x02;
  static final int DIGIT = 0x04;
  static final int SUBWORD_DELIM = 0x08;
  // combinations: for testing, not for setting bits
  public static final int ALPHA = 0x03;
  public static final int ALPHANUM = 0x07;
  /** Indicates the end of iteration */
  public static final int DONE = -1;
@ -97,7 +103,7 @@ public final class WordDelimiterIterator {
   * Create a new WordDelimiterIterator operating with the supplied rules.
   * 
   * @param charTypeTable table containing character types
-   * @param splitOnCaseChange if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
+   * @param splitOnCaseChange if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regardless)
   * @param splitOnNumerics if true, causes "j2se" to be three tokens; "j" "2" "se"
   * @param stemEnglishPossessive if true, causes trailing "'s" to be removed for each subword: "O'Neil's" =&gt; "O", "Neil"
   */
@ -323,4 +329,45 @@ public final class WordDelimiterIterator {
      default: return SUBWORD_DELIM;
    }
  }
  /**
   * Checks if the given word type includes {@link #ALPHA}
   *
   * @param type Word type to check
   * @return {@code true} if the type contains ALPHA, {@code false} otherwise
   */
  static boolean isAlpha(int type) {
    return (type & ALPHA) != 0;
  }
  /**
   * Checks if the given word type includes {@link #DIGIT}
   *
   * @param type Word type to check
   * @return {@code true} if the type contains DIGIT, {@code false} otherwise
   */
  static boolean isDigit(int type) {
    return (type & DIGIT) != 0;
  }
  /**
   * Checks if the given word type includes {@link #SUBWORD_DELIM}
   *
   * @param type Word type to check
   * @return {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise
   */
  static boolean isSubwordDelim(int type) {
    return (type & SUBWORD_DELIM) != 0;
  }
  /**
   * Checks if the given word type includes {@link #UPPER}
   *
   * @param type Word type to check
   * @return {@code true} if the type contains UPPER, {@code false} otherwise
   */
  static boolean isUpper(int type) {
    return (type & UPPER) != 0;
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
@ -21,6 +21,7 @@ import java.util.Arrays;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.FlattenGraphFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.FlattenGraphFilterFactory;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java
@ -17,8 +17,14 @@
 package org.apache.lucene.analysis.synonym;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.FlattenGraphFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -31,11 +37,6 @@ import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.RollingBuffer;
 import org.apache.lucene.util.fst.FST;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 // TODO: maybe we should resolve token -> wordID then run
 // FST on wordIDs, for better perf?
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@ -78,6 +78,7 @@ org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory
 org.apache.lucene.analysis.miscellaneous.TrimFilterFactory
 org.apache.lucene.analysis.miscellaneous.TruncateTokenFilterFactory
 org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory
 org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilterFactory
 org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory
 org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilterFactory
 org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory
@ -103,6 +104,6 @@ org.apache.lucene.analysis.standard.StandardFilterFactory
 org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory
 org.apache.lucene.analysis.synonym.SynonymFilterFactory
 org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory
-org.apache.lucene.analysis.synonym.FlattenGraphFilterFactory
+org.apache.lucene.analysis.core.FlattenGraphFilterFactory
 org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory
 org.apache.lucene.analysis.util.ElisionFilterFactory
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestFlattenGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestFlattenGraphFilter.java
@ -15,7 +15,7 @@
 * limitations under the License.
 */
-package org.apache.lucene.analysis.synonym;
+package org.apache.lucene.analysis.core;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
@ -446,4 +446,73 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
      a.close();
    }
  }
  /*
  public void testToDot() throws Exception {
    int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE | PRESERVE_ORIGINAL | CATENATE_WORDS | CATENATE_NUMBERS | STEM_ENGLISH_POSSESSIVE;
    String text = "PowerSystem2000-5-Shot's";
    WordDelimiterFilter wdf = new WordDelimiterFilter(new CannedTokenStream(new Token(text, 0, text.length())), DEFAULT_WORD_DELIM_TABLE, flags, null);
    //StringWriter sw = new StringWriter();
    // TokenStreamToDot toDot = new TokenStreamToDot(text, wdf, new PrintWriter(sw));
    PrintWriter pw = new PrintWriter("/x/tmp/before.dot");
    TokenStreamToDot toDot = new TokenStreamToDot(text, wdf, pw);
    toDot.toDot();
    pw.close();
    System.out.println("TEST DONE");
    //System.out.println("DOT:\n" + sw.toString());
  }
  */
  public void testOnlyNumbers() throws Exception {
    int flags = GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS;
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, null));
      }
    };
    assertAnalyzesTo(a, "7-586", 
                     new String[] {},
                     new int[] {},
                     new int[] {},
                     null,
                     new int[] {},
                     null,
                     false);
  }
  public void testNumberPunct() throws Exception {
    int flags = GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS;
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, null));
      }
    };
    assertAnalyzesTo(a, "6-", 
                     new String[] {"6"},
                     new int[] {0},
                     new int[] {1},
                     null,
                     new int[] {1},
                     null,
                     false);
  }
  private Analyzer getAnalyzer(final int flags) {
    return new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, null));
      }
    };
  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterGraphFilter.java
@ -0,0 +1,897 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.analysis.miscellaneous;
 import java.io.IOException;
 import java.util.*;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.TestUtil;
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.*;
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
 /**
 * New WordDelimiterGraphFilter tests... most of the tests are in ConvertedLegacyTest
 * TODO: should explicitly test things like protWords and not rely on
 * the factory tests in Solr.
 */
 public class TestWordDelimiterGraphFilter extends BaseTokenStreamTestCase {
  public void testOffsets() throws IOException {
    int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
    // test that subwords and catenated subwords have
    // the correct offsets.
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("foo-bar", 5, 12)), DEFAULT_WORD_DELIM_TABLE, flags, null);
    assertTokenStreamContents(wdf, 
                              new String[] { "foobar", "foo", "bar" },
                              new int[] { 5, 5, 9 }, 
                              new int[] { 12, 8, 12 });
    // with illegal offsets:
    wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("foo-bar", 5, 6)), DEFAULT_WORD_DELIM_TABLE, flags, null);
    assertTokenStreamContents(wdf,
                              new String[] { "foobar", "foo", "bar" },
                              new int[] { 5, 5, 5 },
                              new int[] { 6, 6, 6 });
  }
  public void testOffsetChange() throws Exception {
    int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("übelkeit)", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null);
    assertTokenStreamContents(wdf,
        new String[] { "übelkeit" },
        new int[] { 7 },
        new int[] { 15 });
  }
  public void testOffsetChange2() throws Exception {
    int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("(übelkeit", 7, 17)), DEFAULT_WORD_DELIM_TABLE, flags, null);
    // illegal offsets:
    assertTokenStreamContents(wdf,
                              new String[] { "übelkeit" },
                              new int[] { 7 },
                              new int[] { 17 });
  }
  public void testOffsetChange3() throws Exception {
    int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("(übelkeit", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null);
    assertTokenStreamContents(wdf,
                              new String[] { "übelkeit" },
                              new int[] { 8 },
                              new int[] { 16 });
  }
  public void testOffsetChange4() throws Exception {
    int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("(foo,bar)", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null);
    assertTokenStreamContents(wdf,
        new String[] { "foobar", "foo", "bar"},
        new int[] { 8, 8, 12 },
        new int[] { 15, 11, 15 });
  }
  public void doSplit(final String input, String... output) throws Exception {
    int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(keywordMockTokenizer(input),
        WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null);
    assertTokenStreamContents(wdf, output);
  }
  public void testSplits() throws Exception {
    doSplit("basic-split","basic","split");
    doSplit("camelCase","camel","Case");
    // non-space marking symbol shouldn't cause split
    // this is an example in Thai    
    doSplit("\u0e1a\u0e49\u0e32\u0e19","\u0e1a\u0e49\u0e32\u0e19");
    // possessive followed by delimiter
    doSplit("test's'", "test");
    // some russian upper and lowercase
    doSplit("Роберт", "Роберт");
    // now cause a split (russian camelCase)
    doSplit("РобЕрт", "Роб", "Ерт");
    // a composed titlecase character, don't split
    doSplit("aǅungla", "aǅungla");
    // a modifier letter, don't split
    doSplit("ســـــــــــــــــلام", "ســـــــــــــــــلام");
    // enclosing mark, don't split
    doSplit("test⃝", "test⃝");
    // combining spacing mark (the virama), don't split
    doSplit("हिन्दी", "हिन्दी");
    // don't split non-ascii digits
    doSplit("١٢٣٤", "١٢٣٤");
    // don't split supplementaries into unpaired surrogates
    doSplit("𠀀𠀀", "𠀀𠀀");
  }
  public void doSplitPossessive(int stemPossessive, final String input, final String... output) throws Exception {
    int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS;
    flags |= (stemPossessive == 1) ? STEM_ENGLISH_POSSESSIVE : 0;
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(keywordMockTokenizer(input), flags, null);
    assertTokenStreamContents(wdf, output);
  }
  /*
   * Test option that allows disabling the special "'s" stemming, instead treating the single quote like other delimiters. 
   */
  public void testPossessives() throws Exception {
    doSplitPossessive(1, "ra's", "ra");
    doSplitPossessive(0, "ra's", "ra", "s");
  }
  /*
   * Set a large position increment gap of 10 if the token is "largegap" or "/"
   */
  private final class LargePosIncTokenFilter extends TokenFilter {
    private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
    protected LargePosIncTokenFilter(TokenStream input) {
      super(input);
    }
    @Override
    public boolean incrementToken() throws IOException {
      if (input.incrementToken()) {
        if (termAtt.toString().equals("largegap") || termAtt.toString().equals("/"))
          posIncAtt.setPositionIncrement(10);
        return true;
      } else {
        return false;
      }
    }  
  }
  public void testPositionIncrements() throws Exception {
    final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
    final CharArraySet protWords = new CharArraySet(new HashSet<>(Arrays.asList("NUTCH")), false);
    /* analyzer that uses whitespace + wdf */
    Analyzer a = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String field) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(
            tokenizer,
            flags, protWords));
      }
    };
    /* in this case, works as expected. */
    assertAnalyzesTo(a, "LUCENE / SOLR", new String[] { "LUCENE", "SOLR" },
        new int[] { 0, 9 },
        new int[] { 6, 13 },
        null,
        new int[] { 1, 2 },
        null,
        false);
    /* only in this case, posInc of 2 ?! */
    assertAnalyzesTo(a, "LUCENE / solR", new String[] { "LUCENE", "solR", "sol", "R" },
        new int[] { 0, 9, 9, 12 },
        new int[] { 6, 13, 12, 13 },
        null,                     
        new int[] { 1, 2, 0, 1 },
        null,
        false);
    assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new String[] { "LUCENE", "NUTCH", "SOLR" },
        new int[] { 0, 9, 15 },
        new int[] { 6, 14, 19 },
        null,
        new int[] { 1, 2, 1 },
        null,
        false);
    /* analyzer that will consume tokens with large position increments */
    Analyzer a2 = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String field) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(
            new LargePosIncTokenFilter(tokenizer),
            flags, protWords));
      }
    };
    /* increment of "largegap" is preserved */
    assertAnalyzesTo(a2, "LUCENE largegap SOLR", new String[] { "LUCENE", "largegap", "SOLR" },
        new int[] { 0, 7, 16 },
        new int[] { 6, 15, 20 },
        null,
        new int[] { 1, 10, 1 },
        null,
        false);
    /* the "/" had a position increment of 10, where did it go?!?!! */
    assertAnalyzesTo(a2, "LUCENE / SOLR", new String[] { "LUCENE", "SOLR" },
        new int[] { 0, 9 },
        new int[] { 6, 13 },
        null,
        new int[] { 1, 11 },
        null,
        false);
    /* in this case, the increment of 10 from the "/" is carried over */
    assertAnalyzesTo(a2, "LUCENE / solR", new String[] { "LUCENE", "solR", "sol", "R" },
        new int[] { 0, 9, 9, 12 },
        new int[] { 6, 13, 12, 13 },
        null,
        new int[] { 1, 11, 0, 1 },
        null,
        false);
    assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new String[] { "LUCENE", "NUTCH", "SOLR" },
        new int[] { 0, 9, 15 },
        new int[] { 6, 14, 19 },
        null,
        new int[] { 1, 11, 1 },
        null,
        false);
    Analyzer a3 = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String field) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        StopFilter filter = new StopFilter(tokenizer, StandardAnalyzer.STOP_WORDS_SET);
        return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(filter, flags, protWords));
      }
    };
    assertAnalyzesTo(a3, "lucene.solr", 
        new String[] { "lucenesolr", "lucene", "solr" },
        new int[] { 0, 0, 7 },
        new int[] { 11, 6, 11 },
        null,
        new int[] { 1, 0, 1 },
        null,
        false);
    /* the stopword should add a gap here */
    assertAnalyzesTo(a3, "the lucene.solr", 
        new String[] { "lucenesolr", "lucene", "solr" }, 
        new int[] { 4, 4, 11 }, 
        new int[] { 15, 10, 15 },
        null,
        new int[] { 2, 0, 1 },
        null,
        false);
    IOUtils.close(a, a2, a3);
  }
  /** concat numbers + words + all */
  public void testLotsOfConcatenating() throws Exception {
    final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_WORDS | CATENATE_NUMBERS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;    
    /* analyzer that uses whitespace + wdf */
    Analyzer a = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String field) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(tokenizer, flags, null));
      }
    };
    assertAnalyzesTo(a, "abc-def-123-456", 
        new String[] { "abcdef123456", "abcdef", "abc", "def", "123456", "123", "456" }, 
        new int[] { 0, 0, 0, 4, 8, 8, 12 }, 
        new int[] { 15, 7, 3, 7, 15, 11, 15 },
        null,
        new int[] { 1, 0, 0, 1, 1, 0, 1 },
        null,
        false);
    a.close();
  }
  /** concat numbers + words + all + preserve original */
  public void testLotsOfConcatenating2() throws Exception {
    final int flags = PRESERVE_ORIGINAL | GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_WORDS | CATENATE_NUMBERS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;    
    /* analyzer that uses whitespace + wdf */
    Analyzer a = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String field) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(tokenizer, flags, null));
      }
    };
    assertAnalyzesTo(a, "abc-def-123-456", 
                     new String[] { "abcdef123456", "abc-def-123-456", "abcdef", "abc", "def", "123456", "123", "456" }, 
                     new int[] { 0, 0, 0, 0, 4, 8, 8, 12 }, 
                     new int[] { 15, 15, 7, 3, 7, 15, 11, 15 },
                     null,
                     new int[] { 1, 0, 0, 0, 1, 1, 0, 1 },
                     null,
                     false);
    a.close();
  }
  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
    int numIterations = atLeast(5);
    for (int i = 0; i < numIterations; i++) {
      final int flags = random().nextInt(512);
      final CharArraySet protectedWords;
      if (random().nextBoolean()) {
        protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
      } else {
        protectedWords = null;
      }
      Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
          Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
          return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(tokenizer, flags, protectedWords));
        }
      };
      // TODO: properly support positionLengthAttribute
      checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20, false, false);
      a.close();
    }
  }
  /** blast some enormous random strings through the analyzer */
  public void testRandomHugeStrings() throws Exception {
    int numIterations = atLeast(5);
    for (int i = 0; i < numIterations; i++) {
      final int flags = random().nextInt(512);
      final CharArraySet protectedWords;
      if (random().nextBoolean()) {
        protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
      } else {
        protectedWords = null;
      }
      Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
          Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
          TokenStream wdgf = new WordDelimiterGraphFilter(tokenizer, flags, protectedWords);
          return new TokenStreamComponents(tokenizer, wdgf);
        }
      };
      // TODO: properly support positionLengthAttribute
      checkRandomData(random(), a, 20*RANDOM_MULTIPLIER, 8192, false, false);
      a.close();
    }
  }
  public void testEmptyTerm() throws IOException {
    Random random = random();
    for (int i = 0; i < 512; i++) {
      final int flags = i;
      final CharArraySet protectedWords;
      if (random.nextBoolean()) {
        protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
      } else {
        protectedWords = null;
      }
      Analyzer a = new Analyzer() { 
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
          Tokenizer tokenizer = new KeywordTokenizer();
          return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(tokenizer, flags, protectedWords));
        }
      };
      // depending upon options, this thing may or may not preserve the empty term
      checkAnalysisConsistency(random, a, random.nextBoolean(), "");
      a.close();
    }
  }
  private Analyzer getAnalyzer(int flags) {
    return getAnalyzer(flags, null);
  }
  private Analyzer getAnalyzer(int flags, CharArraySet protectedWords) {
    return new Analyzer() { 
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new KeywordTokenizer();
        return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(tokenizer, flags, protectedWords));
      }
    };
  }
  private static boolean has(int flags, int flag) {
    return (flags & flag) != 0;
  }
  private static boolean isEnglishPossessive(String text, int pos) {
    if (pos > 2) {
      if ((text.charAt(pos-1) == 's' || text.charAt(pos-1) == 'S') &&
          (pos == text.length() || text.charAt(pos) != '-')) {
        text = text.substring(0, text.length()-2);
      }
    }
    return true;
  }
  private static class WordPart {
    final String part;
    final int startOffset;
    final int endOffset;
    final int type;
    public WordPart(String text, int startOffset, int endOffset) {
      this.part = text.substring(startOffset, endOffset);
      this.startOffset = startOffset;
      this.endOffset = endOffset;
      this.type = toType(part.charAt(0));
    }
    @Override
    public String toString() {
      return "WordPart(" + part + " " + startOffset + "-" + endOffset + ")";
    }
  }
  private static final int NUMBER = 0;
  private static final int LETTER = 1;
  private static final int DELIM = 2;
  private static int toType(char ch) {
    if (Character.isDigit(ch)) {
      // numbers
      return NUMBER;
    } else if (Character.isLetter(ch)) {
      // letters
      return LETTER;
    } else {
      // delimiter
      return DELIM;
    }
  }
  /** Does (hopefully) the same thing as WordDelimiterGraphFilter, according to the flags, but more slowly, returning all string paths combinations. */
  private Set<String> slowWDF(String text, int flags) {
    // first make word parts:
    List<WordPart> wordParts = new ArrayList<>();
    int lastCH = -1;
    int wordPartStart = 0;
    boolean inToken = false;
    for(int i=0;i<text.length();i++) {
      char ch = text.charAt(i);
      if (toType(ch) == DELIM) {
        // delimiter
        if (inToken) {
          // end current token
          wordParts.add(new WordPart(text, wordPartStart, i));
          inToken = false;
        }
        // strip english possessive at the end of this token?:
        if (has(flags, STEM_ENGLISH_POSSESSIVE) &&
            ch == '\'' && i > 0 &&
            i < text.length()-1 &&
            (text.charAt(i+1) == 's' || text.charAt(i+1) == 'S') &&
            toType(text.charAt(i-1)) == LETTER &&
            (i+2 == text.length() || toType(text.charAt(i+2)) == DELIM)) {
          i += 2;
        }
      } else if (inToken == false) {
        // start new token
        inToken = true;
        wordPartStart = i;
      } else {
        boolean newToken = false;
        if (Character.isLetter(lastCH)) {
          if (Character.isLetter(ch)) {
            if (has(flags, SPLIT_ON_CASE_CHANGE) && Character.isLowerCase(lastCH) && Character.isLowerCase(ch) == false) {
              // start new token on lower -> UPPER case change (but not vice versa!)
              newToken = true;
            }
          } else if (has(flags, SPLIT_ON_NUMERICS) && Character.isDigit(ch)) {
            // start new token on letter -> number change
            newToken = true;
          }
        } else {
          assert Character.isDigit(lastCH);
          if (Character.isLetter(ch) && has(flags, SPLIT_ON_NUMERICS) ) {
            // start new token on number -> letter change
            newToken = true;
          }
        }
        if (newToken) {
          wordParts.add(new WordPart(text, wordPartStart, i));
          wordPartStart = i;
        }
      }
      lastCH = ch;
    }
    if (inToken) {
      // add last token
      wordParts.add(new WordPart(text, wordPartStart, text.length()));
    }
    Set<String> paths = new HashSet<>();
    if (wordParts.isEmpty() == false) {
      enumerate(flags, 0, text, wordParts, paths, new StringBuilder());
    }
    if (has(flags, PRESERVE_ORIGINAL)) {
      paths.add(text);
    }
    if (has(flags, CATENATE_ALL) && wordParts.isEmpty() == false) {
      StringBuilder b = new StringBuilder();
      for(WordPart wordPart : wordParts) {
        b.append(wordPart.part);
      }
      paths.add(b.toString());
    }
    return paths;
  }
  private void add(StringBuilder path, String part) {
    if (path.length() != 0) {
      path.append(' ');
    }
    path.append(part);
  }
  private void add(StringBuilder path, List<WordPart> wordParts, int from, int to) {
    if (path.length() != 0) {
      path.append(' ');
    }
    // no spaces:
    for(int i=from;i<to;i++) {
      path.append(wordParts.get(i).part);
    }
  }
  private void addWithSpaces(StringBuilder path, List<WordPart> wordParts, int from, int to) {
    for(int i=from;i<to;i++) {
      add(path, wordParts.get(i).part);
    }
  }
  /** Finds the end (exclusive) of the series of part with the same type */
  private int endOfRun(List<WordPart> wordParts, int start) {
    int upto = start+1;
    while(upto < wordParts.size() && wordParts.get(upto).type == wordParts.get(start).type) {
      upto++;
    }
    return upto;
  }
  /** Recursively enumerates all paths through the word parts */
  private void enumerate(int flags, int upto, String text, List<WordPart> wordParts, Set<String> paths, StringBuilder path) {
    if (upto == wordParts.size()) {
      if (path.length() > 0) {
        paths.add(path.toString());
      }
    } else {
      int savLength = path.length();
      int end = endOfRun(wordParts, upto);
      if (wordParts.get(upto).type == NUMBER) {
        // always output single word, optionally surrounded by delims:
        if (has(flags, GENERATE_NUMBER_PARTS) || wordParts.size() == 1) {
          addWithSpaces(path, wordParts, upto, end);
          if (has(flags, CATENATE_NUMBERS)) {
            // recurse first with the parts
            enumerate(flags, end, text, wordParts, paths, path);
            path.setLength(savLength);
            // .. and second with the concat
            add(path, wordParts, upto, end);
          }
        } else if (has(flags, CATENATE_NUMBERS)) {
          add(path, wordParts, upto, end);
        }
        enumerate(flags, end, text, wordParts, paths, path);
        path.setLength(savLength);
      } else {
        assert wordParts.get(upto).type == LETTER;
        // always output single word, optionally surrounded by delims:
        if (has(flags, GENERATE_WORD_PARTS) || wordParts.size() == 1) {
          addWithSpaces(path, wordParts, upto, end);
          if (has(flags, CATENATE_WORDS)) {
            // recurse first with the parts
            enumerate(flags, end, text, wordParts, paths, path);
            path.setLength(savLength);
            // .. and second with the concat
            add(path, wordParts, upto, end);
          }
        } else if (has(flags, CATENATE_WORDS)) {
          add(path, wordParts, upto, end);
        }
        enumerate(flags, end, text, wordParts, paths, path);
        path.setLength(savLength);
      }
    }
  }
  public void testBasicGraphSplits() throws Exception {
    assertGraphStrings(getAnalyzer(0),
                       "PowerShotPlus",
                       "PowerShotPlus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS),
                       "PowerShotPlus",
                       "PowerShotPlus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE),
                       "PowerShotPlus",
                       "Power Shot Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE | PRESERVE_ORIGINAL),
                       "PowerShotPlus",
                       "PowerShotPlus",
                       "Power Shot Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS),
                       "Power-Shot-Plus",
                       "Power Shot Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE),
                       "Power-Shot-Plus",
                       "Power Shot Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE | PRESERVE_ORIGINAL),
                       "Power-Shot-Plus",
                       "Power-Shot-Plus",
                       "Power Shot Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE),
                       "PowerShotPlus",
                       "Power Shot Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE),
                       "PowerShot1000Plus",
                       "Power Shot1000Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE),
                       "Power-Shot-Plus",
                       "Power Shot Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE | CATENATE_WORDS),
                       "PowerShotPlus",
                       "Power Shot Plus",
                       "PowerShotPlus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE | CATENATE_WORDS),
                       "PowerShot1000Plus",
                       "Power Shot1000Plus",
                       "PowerShot1000Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | CATENATE_WORDS | CATENATE_NUMBERS),
                       "Power-Shot-1000-17-Plus",
                       "Power Shot 1000 17 Plus",
                       "Power Shot 100017 Plus",
                       "PowerShot 1000 17 Plus",
                       "PowerShot 100017 Plus");
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | CATENATE_WORDS | CATENATE_NUMBERS | PRESERVE_ORIGINAL),
                       "Power-Shot-1000-17-Plus",
                       "Power-Shot-1000-17-Plus",
                       "Power Shot 1000 17 Plus",
                       "Power Shot 100017 Plus",
                       "PowerShot 1000 17 Plus",
                       "PowerShot 100017 Plus");
  }
  /*
  public void testToDot() throws Exception {
    int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE | PRESERVE_ORIGINAL | CATENATE_WORDS | CATENATE_NUMBERS | STEM_ENGLISH_POSSESSIVE;
    String text = "PowerSystem2000-5-Shot's";
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token(text, 0, text.length())), DEFAULT_WORD_DELIM_TABLE, flags, null);
    //StringWriter sw = new StringWriter();
    // TokenStreamToDot toDot = new TokenStreamToDot(text, wdf, new PrintWriter(sw));
    PrintWriter pw = new PrintWriter("/tmp/foo2.dot");
    TokenStreamToDot toDot = new TokenStreamToDot(text, wdf, pw);
    toDot.toDot();
    pw.close();
    //System.out.println("DOT:\n" + sw.toString());
  }
  */
  private String randomWDFText() {
    StringBuilder b = new StringBuilder();
    int length = TestUtil.nextInt(random(), 1, 50);
    for(int i=0;i<length;i++) {
      int surpriseMe = random().nextInt(37);
      int lower = -1;
      int upper = -1;
      if (surpriseMe < 10) {
        // lowercase letter
        lower = 'a';
        upper = 'z';
      } else if (surpriseMe < 20) {
        // uppercase letter
        lower = 'A';
        upper = 'Z';
      } else if (surpriseMe < 30) {
        // digit
        lower = '0';
        upper = '9';
      } else if (surpriseMe < 35) {
        // punct
        lower = '-';
        upper = '-';
      } else {
        b.append("'s");
      }
      if (lower != -1) {
        b.append((char) TestUtil.nextInt(random(), lower, upper));
      }
    }
    return b.toString();
  }
  public void testInvalidFlag() throws Exception {
    expectThrows(IllegalArgumentException.class,
                 () -> {
                   new WordDelimiterGraphFilter(new CannedTokenStream(), 1 << 31, null);
                 });
  }
  public void testRandomPaths() throws Exception {
    int iters = atLeast(100);
    for(int iter=0;iter<iters;iter++) {
      String text = randomWDFText();
      if (VERBOSE) {
        System.out.println("\nTEST: text=" + text + " len=" + text.length());
      }
      int flags = 0;
      if (random().nextBoolean()) {
        flags |= GENERATE_WORD_PARTS;
      }
      if (random().nextBoolean()) {
        flags |= GENERATE_NUMBER_PARTS;
      }
      if (random().nextBoolean()) {
        flags |= CATENATE_WORDS;
      }
      if (random().nextBoolean()) {
        flags |= CATENATE_NUMBERS;
      }
      if (random().nextBoolean()) {
        flags |= CATENATE_ALL;
      }
      if (random().nextBoolean()) {
        flags |= PRESERVE_ORIGINAL;
      }
      if (random().nextBoolean()) {
        flags |= SPLIT_ON_CASE_CHANGE;
      }
      if (random().nextBoolean()) {
        flags |= SPLIT_ON_NUMERICS;
      }
      if (random().nextBoolean()) {
        flags |= STEM_ENGLISH_POSSESSIVE;
      }
      verify(text, flags);
    }
  }
  /** Runs normal and slow WDGF and compares results */
  private void verify(String text, int flags) throws IOException {
    Set<String> expected = slowWDF(text, flags);
    if (VERBOSE) {
      for(String path : expected) {
        System.out.println("  " + path);
      }
    }
    Set<String> actual = getGraphStrings(getAnalyzer(flags), text);
    if (actual.equals(expected) == false) {
      StringBuilder b = new StringBuilder();
      b.append("\n\nFAIL: text=");
      b.append(text);
      b.append(" flags=");
      b.append(WordDelimiterGraphFilter.flagsToString(flags));
      b.append('\n');
      b.append("  expected paths:\n");
      for (String s : expected) {
        b.append("    ");
        b.append(s);
        if (actual.contains(s) == false) {
          b.append(" [missing!]");
        }
        b.append('\n');
      }
      b.append("  actual paths:\n");
      for (String s : actual) {
        b.append("    ");
        b.append(s);
        if (expected.contains(s) == false) {
          b.append(" [unexpected!]");
        }
        b.append('\n');
      }
      fail(b.toString());
    }
  }
  public void testOnlyNumbers() throws Exception {
    // no token should be produced
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS), "7-586");
  }
  public void testNoCatenate() throws Exception {
    // no token should be produced
    assertGraphStrings(getAnalyzer(GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS), "a-b-c-9-d", "a b c 9 d");
  }
  public void testCuriousCase1() throws Exception {
    verify("u-0L-4836-ip4Gw--13--q7--L07E1", CATENATE_WORDS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE);
  }
  public void testCuriousCase2() throws Exception {
    verify("u-l-p", CATENATE_ALL);
  }
  public void testOriginalPosLength() throws Exception {
    verify("Foo-Bar-Baz", CATENATE_WORDS | SPLIT_ON_CASE_CHANGE | PRESERVE_ORIGINAL);
  }
  public void testCuriousCase3() throws Exception {
    verify("cQzk4-GL0izl0mKM-J8--4m-'s", GENERATE_NUMBER_PARTS | CATENATE_NUMBERS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS);
  }
  public void testEmptyString() throws Exception {
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("", 0, 0)), DEFAULT_WORD_DELIM_TABLE, GENERATE_WORD_PARTS | CATENATE_ALL | PRESERVE_ORIGINAL, null);
    wdf.reset();
    assertTrue(wdf.incrementToken());
    assertFalse(wdf.incrementToken());
    wdf.end();
    wdf.close();
  }
  public void testProtectedWords() throws Exception {
    TokenStream tokens = new CannedTokenStream(new Token("foo17-bar", 0, 9),
                                               new Token("foo-bar", 0, 7));
    CharArraySet protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("foo17-BAR")), true);
    WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(tokens, DEFAULT_WORD_DELIM_TABLE, GENERATE_WORD_PARTS | PRESERVE_ORIGINAL | CATENATE_ALL, protectedWords);
    assertGraphStrings(wdf,
                       "foo17-bar foo bar",
                       "foo17-bar foo-bar",
                       "foo17-bar foobar");
  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java
@ -17,14 +17,22 @@
 package org.apache.lucene.analysis.synonym;
 import java.io.IOException;
 import java.io.StringReader;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.FlattenGraphFilter;
 import org.apache.lucene.analysis.tokenattributes.*;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@ -35,7 +43,6 @@ import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.IOUtils;
@ -49,15 +56,6 @@ import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
 import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.Util;
 import java.io.IOException;
 import java.io.StringReader;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
  /** Set as a side effect by {@link #getAnalyzer} and {@link #getFlattenAnalyzer}. */
@ -1832,7 +1830,7 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
                     new int[]      {1,        1,   0,        0,     0,        1,   0,        0,   1,    0,         1,    1,         1},
                     new int[]      {1,        1,   1,        1,     4,        3,   1,        1,   2,    1,         1,    1,         1});
-    assertAllStrings(analyzer, "the usa is wealthy", new String[] {
+    assertGraphStrings(analyzer, "the usa is wealthy", new String[] {
        "the usa is wealthy",
        "the united states is wealthy",
        "the u s a is wealthy",
@ -1924,33 +1922,4 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
        new int[]{1, 1, 0, 1, 1});
    a.close();
  }
  /**
   * Helper method to validate all strings that can be generated from a token stream.
   * Uses {@link TokenStreamToAutomaton} to create an automaton. Asserts the finite strings of the automaton are all
   * and only the given valid strings.
   * @param analyzer analyzer containing the SynonymFilter under test.
   * @param text text to be analyzed.
   * @param expectedStrings all expected finite strings.
   */
  public void assertAllStrings(Analyzer analyzer, String text, String[] expectedStrings) throws IOException {
    TokenStream tokenStream = analyzer.tokenStream("dummy", text);
    try {
      Automaton automaton = new TokenStreamToAutomaton().toAutomaton(tokenStream);
      Set<IntsRef> finiteStrings = AutomatonTestUtil.getFiniteStringsRecursive(automaton, -1);
      assertEquals("Invalid resulting strings count. Expected " + expectedStrings.length + " was " + finiteStrings.size(),
          expectedStrings.length, finiteStrings.size());
      Set<String> expectedStringsSet = new HashSet<>(Arrays.asList(expectedStrings));
      BytesRefBuilder scratchBytesRefBuilder = new BytesRefBuilder();
      for (IntsRef ir: finiteStrings) {
        String s = Util.toBytesRef(ir, scratchBytesRefBuilder).utf8ToString().replace((char) TokenStreamToAutomaton.POS_SEP, ' ');
        assertTrue("Unexpected string found: " + s, expectedStringsSet.contains(s));
      }
    } finally {
      tokenStream.close();
    }
  }
 }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java
@ -286,6 +286,56 @@ final class SimpleTextBKDReader extends PointValues implements Accountable {
    }
  }
  @Override
  public long estimatePointCount(IntersectVisitor visitor) {
    return estimatePointCount(getIntersectState(visitor), 1, minPackedValue, maxPackedValue);
  }
  private long estimatePointCount(IntersectState state,
      int nodeID, byte[] cellMinPacked, byte[] cellMaxPacked) {
    Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
    if (r == Relation.CELL_OUTSIDE_QUERY) {
      // This cell is fully outside of the query shape: stop recursing
      return 0L;
    } else if (nodeID >= leafNodeOffset) {
      // Assume all points match and there are no dups
      return maxPointsInLeafNode;
    } else {
      // Non-leaf node: recurse on the split left and right nodes
      int address = nodeID * bytesPerIndexEntry;
      int splitDim;
      if (numDims == 1) {
        splitDim = 0;
      } else {
        splitDim = splitPackedValues[address++] & 0xff;
      }
      assert splitDim < numDims;
      // TODO: can we alloc & reuse this up front?
      byte[] splitPackedValue = new byte[packedBytesLength];
      // Recurse on left sub-tree:
      System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
      final long leftCost = estimatePointCount(state,
                2*nodeID,
                cellMinPacked, splitPackedValue);
      // Recurse on right sub-tree:
      System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
      final long rightCost = estimatePointCount(state,
                2*nodeID+1,
                splitPackedValue, cellMaxPacked);
      return leftCost + rightCost;
    }
  }
  /** Copies the split value for this node into the provided byte array */
  public void copySplitValue(int nodeID, byte[] splitPackedValue) {
    int address = nodeID * bytesPerIndexEntry;
--- a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
@ -39,6 +39,7 @@ import org.apache.lucene.util.automaton.Automaton;
 public class TokenStreamToAutomaton {
  private boolean preservePositionIncrements;
  private boolean finalOffsetGapAsHole;
  private boolean unicodeArcs;
  /** Sole constructor. */
@ -51,6 +52,11 @@ public class TokenStreamToAutomaton {
    this.preservePositionIncrements = enablePositionIncrements;
  }
  /** If true, any final offset gaps will result in adding a position hole. */
  public void setFinalOffsetGapAsHole(boolean finalOffsetGapAsHole) {
    this.finalOffsetGapAsHole = finalOffsetGapAsHole;
  }
  /** Whether to make transition labels Unicode code points instead of UTF8 bytes, 
   *  <code>false</code> by default */
  public void setUnicodeArcs(boolean unicodeArcs) {
@ -118,7 +124,7 @@ public class TokenStreamToAutomaton {
    int maxOffset = 0;
    while (in.incrementToken()) {
      int posInc = posIncAtt.getPositionIncrement();
-      if (!preservePositionIncrements && posInc > 1) {
+      if (preservePositionIncrements == false && posInc > 1) {
        posInc = 1;
      }
      assert pos > -1 || posInc > 0;
@ -201,10 +207,35 @@ public class TokenStreamToAutomaton {
    }
    in.end();
    int endState = -1;
-    if (offsetAtt.endOffset() > maxOffset) {
+
    int endPosInc = posIncAtt.getPositionIncrement();
    if (endPosInc == 0 && finalOffsetGapAsHole && offsetAtt.endOffset() > maxOffset) {
      endPosInc = 1;
    }
    if (endPosInc > 0) {
      // there were hole(s) after the last token
      endState = builder.createState();
-      builder.setAccept(endState, true);
+
      // add trailing holes now:
      int lastState = endState;
      while (true) {
        int state1 = builder.createState();
        builder.addTransition(lastState, state1, HOLE);
        endPosInc--;
        if (endPosInc == 0) {
          builder.setAccept(state1, true);
          break;
        }
        int state2 = builder.createState();
        builder.addTransition(state1, state2, POS_SEP);
        lastState = state2;
      }
    } else {
      endState = -1;
    }
    pos++;
--- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
@ -43,7 +43,7 @@ public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribut
    // OffsetAtt
    if (startOffset < 0 || endOffset < startOffset) {
-      throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+      throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset; got "
          + "startOffset=" + startOffset + ",endOffset=" + endOffset);
    }
--- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PackedTokenAttributeImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PackedTokenAttributeImpl.java
@ -107,7 +107,7 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
  @Override
  public void setOffset(int startOffset, int endOffset) {
    if (startOffset < 0 || endOffset < startOffset) {
-      throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+      throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset; got "
          + "startOffset=" + startOffset + ",endOffset=" + endOffset);
    }
    this.startOffset = startOffset;
--- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java
@ -30,8 +30,7 @@ public class PositionIncrementAttributeImpl extends AttributeImpl implements Pos
  @Override
  public void setPositionIncrement(int positionIncrement) {
    if (positionIncrement < 0) {
-      throw new IllegalArgumentException
+      throw new IllegalArgumentException("Position increment must be zero or greater; got " + positionIncrement);
        ("Increment must be zero or greater: got " + positionIncrement);
    }
    this.positionIncrement = positionIncrement;
  }
--- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionLengthAttributeImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionLengthAttributeImpl.java
@ -30,8 +30,7 @@ public class PositionLengthAttributeImpl extends AttributeImpl implements Positi
  @Override
  public void setPositionLength(int positionLength) {
    if (positionLength < 1) {
-      throw new IllegalArgumentException
+      throw new IllegalArgumentException("Position length must be 1 or greater; got " + positionLength);
        ("Position length must be 1 or greater: got " + positionLength);
    }
    this.positionLength = positionLength;
  }
--- a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
@ -127,6 +127,11 @@ public abstract class PointsWriter implements Closeable {
                }
              }
              @Override
              public long estimatePointCount(IntersectVisitor visitor) {
                throw new UnsupportedOperationException();
              }
              @Override
              public byte[] getMinPackedValue() {
                throw new UnsupportedOperationException();
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -42,6 +42,8 @@ import org.apache.lucene.codecs.TermVectorsReader;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DocumentStoredFieldVisitor;
 import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.LeafFieldComparator;
 import org.apache.lucene.search.Sort;
@ -1810,6 +1812,19 @@ public final class CheckIndex implements Closeable {
            long size = values.size();
            int docCount = values.getDocCount();
            final long crossCost = values.estimatePointCount(new ConstantRelationIntersectVisitor(Relation.CELL_CROSSES_QUERY));
            if (crossCost < size / 2) {
              throw new RuntimeException("estimatePointCount should return >= size/2 when all cells match");
            }
            final long insideCost = values.estimatePointCount(new ConstantRelationIntersectVisitor(Relation.CELL_INSIDE_QUERY));
            if (insideCost < size) {
              throw new RuntimeException("estimatePointCount should return >= size when all cells fully match");
            }
            final long outsideCost = values.estimatePointCount(new ConstantRelationIntersectVisitor(Relation.CELL_OUTSIDE_QUERY));
            if (outsideCost != 0) {
              throw new RuntimeException("estimatePointCount should return 0 when no cells match");
            }
            VerifyPointsVisitor visitor = new VerifyPointsVisitor(fieldInfo.name, reader.maxDoc(), values);
            values.intersect(visitor);
@ -2002,6 +2017,28 @@ public final class CheckIndex implements Closeable {
    }
  }
  private static class ConstantRelationIntersectVisitor implements IntersectVisitor {
    private final Relation relation;
    ConstantRelationIntersectVisitor(Relation relation) {
      this.relation = relation;
    }
    @Override
    public void visit(int docID) throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public void visit(int docID, byte[] packedValue) throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
      return relation;
    }
  }
  /**
   * Test stored fields.
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@ -313,10 +313,7 @@ final class DefaultIndexingChain extends DocConsumer {
  @Override
  public void abort() {
    try {
    storedFieldsConsumer.abort();
    } catch (Throwable t) {
    }
    try {
      // E.g. close any open files in the term vectors writer:
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@ -1034,17 +1034,17 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
  /** Confirms that the incoming index sort (if any) matches the existing index sort (if any).
   *  This is unfortunately just best effort, because it could be the old index only has unsorted flushed segments built
-   *  before {@link Version#LUCENE_7_0_0} (flushed segments are sorted in Lucene 7.0).  */
+   *  before {@link Version#LUCENE_6_5_0} (flushed segments are sorted in Lucene 7.0).  */
-  private void validateIndexSort() {
+  private void validateIndexSort() throws CorruptIndexException {
    Sort indexSort = config.getIndexSort();
    if (indexSort != null) {
      for(SegmentCommitInfo info : segmentInfos) {
        Sort segmentIndexSort = info.info.getIndexSort();
        if (segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) {
          throw new IllegalArgumentException("cannot change previous indexSort=" + segmentIndexSort + " (from segment=" + info + ") to new indexSort=" + indexSort);
-        } else if (segmentIndexSort == null) {
+        } else if (segmentIndexSort == null && info.info.getVersion().onOrAfter(Version.LUCENE_6_5_0)) {
-          // Flushed segments are not sorted if they were built with a version prior to 7.0
+          // Flushed segments are not sorted if they were built with a version prior to 6.5.0
-          assert info.info.getVersion().onOrAfter(Version.LUCENE_7_0_0) == false;
+          throw new CorruptIndexException("segment not sorted with indexSort=" + segmentIndexSort, info.info.toString());
        }
      }
    }
--- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
@ -26,6 +26,7 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FloatPoint;
 import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.bkd.BKDWriter;
@ -220,6 +221,12 @@ public abstract class PointValues {
   *  to test whether each document is deleted, if necessary. */
  public abstract void intersect(IntersectVisitor visitor) throws IOException;
  /** Estimate the number of points that would be visited by {@link #intersect}
   * with the given {@link IntersectVisitor}. This should run many times faster
   * than {@link #intersect(IntersectVisitor)}.
   * @see DocIdSetIterator#cost */
  public abstract long estimatePointCount(IntersectVisitor visitor);
  /** Returns minimum value for each dimension, packed, or null if {@link #size} is <code>0</code> */
  public abstract byte[] getMinPackedValue() throws IOException;
--- a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
@ -90,6 +90,11 @@ class PointValuesWriter {
        }
      }
      @Override
      public long estimatePointCount(IntersectVisitor visitor) {
        throw new UnsupportedOperationException();
      }
      @Override
      public byte[] getMinPackedValue() {
        throw new UnsupportedOperationException();
@ -208,6 +213,11 @@ class PointValuesWriter {
      });
    }
    @Override
    public long estimatePointCount(IntersectVisitor visitor) {
      return in.estimatePointCount(visitor);
    }
    @Override
    public byte[] getMinPackedValue() throws IOException {
      return in.getMinPackedValue();
--- a/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
@ -42,7 +42,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 /**
 * An {@link org.apache.lucene.index.LeafReader} which supports sorting documents by a given
- * {@link Sort}. This is package private and is only used by Lucene fo BWC when it needs to merge
+ * {@link Sort}. This is package private and is only used by Lucene for BWC when it needs to merge
 * an unsorted flushed segment built by an older version (newly flushed segments are sorted since version 7.0).
 *
 * @lucene.experimental
@ -327,6 +327,11 @@ class SortingLeafReader extends FilterLeafReader {
                   });
    }
    @Override
    public long estimatePointCount(IntersectVisitor visitor) {
      return in.estimatePointCount(visitor);
    }
    @Override
    public byte[] getMinPackedValue() throws IOException {
      return in.getMinPackedValue();
--- a/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java
@ -0,0 +1,217 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.OptionalLong;
 import java.util.stream.Stream;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.util.PriorityQueue;
 final class Boolean2ScorerSupplier extends ScorerSupplier {
  private final BooleanWeight weight;
  private final Map<BooleanClause.Occur, Collection<ScorerSupplier>> subs;
  private final boolean needsScores;
  private final int minShouldMatch;
  private long cost = -1;
  Boolean2ScorerSupplier(BooleanWeight weight,
      Map<Occur, Collection<ScorerSupplier>> subs,
      boolean needsScores, int minShouldMatch) {
    if (minShouldMatch < 0) {
      throw new IllegalArgumentException("minShouldMatch must be positive, but got: " + minShouldMatch);
    }
    if (minShouldMatch != 0 && minShouldMatch >= subs.get(Occur.SHOULD).size()) {
      throw new IllegalArgumentException("minShouldMatch must be strictly less than the number of SHOULD clauses");
    }
    if (needsScores == false && minShouldMatch == 0 && subs.get(Occur.SHOULD).size() > 0
        && subs.get(Occur.MUST).size() + subs.get(Occur.FILTER).size() > 0) {
      throw new IllegalArgumentException("Cannot pass purely optional clauses if scores are not needed");
    }
    if (subs.get(Occur.SHOULD).size() + subs.get(Occur.MUST).size() + subs.get(Occur.FILTER).size() == 0) {
      throw new IllegalArgumentException("There should be at least one positive clause");
    }
    this.weight = weight;
    this.subs = subs;
    this.needsScores = needsScores;
    this.minShouldMatch = minShouldMatch;
  }
  private long computeCost() {
    OptionalLong minRequiredCost = Stream.concat(
        subs.get(Occur.MUST).stream(),
        subs.get(Occur.FILTER).stream())
        .mapToLong(ScorerSupplier::cost)
        .min();
    if (minRequiredCost.isPresent() && minShouldMatch == 0) {
      return minRequiredCost.getAsLong();
    } else {
      final Collection<ScorerSupplier> optionalScorers = subs.get(Occur.SHOULD);
      final long shouldCost = MinShouldMatchSumScorer.cost(
          optionalScorers.stream().mapToLong(ScorerSupplier::cost),
          optionalScorers.size(), minShouldMatch);
      return Math.min(minRequiredCost.orElse(Long.MAX_VALUE), shouldCost);
    }
  }
  @Override
  public long cost() {
    if (cost == -1) {
      cost = computeCost();
    }
    return cost;
  }
  @Override
  public Scorer get(boolean randomAccess) throws IOException {
    // three cases: conjunction, disjunction, or mix
    // pure conjunction
    if (subs.get(Occur.SHOULD).isEmpty()) {
      return excl(req(subs.get(Occur.FILTER), subs.get(Occur.MUST), randomAccess), subs.get(Occur.MUST_NOT));
    }
    // pure disjunction
    if (subs.get(Occur.FILTER).isEmpty() && subs.get(Occur.MUST).isEmpty()) {
      return excl(opt(subs.get(Occur.SHOULD), minShouldMatch, needsScores, randomAccess), subs.get(Occur.MUST_NOT));
    }
    // conjunction-disjunction mix:
    // we create the required and optional pieces, and then
    // combine the two: if minNrShouldMatch > 0, then it's a conjunction: because the
    // optional side must match. otherwise it's required + optional
    if (minShouldMatch > 0) {
      boolean reqRandomAccess = true;
      boolean msmRandomAccess = true;
      if (randomAccess == false) {
        // We need to figure out whether the MUST/FILTER or the SHOULD clauses would lead the iteration
        final long reqCost = Stream.concat(
            subs.get(Occur.MUST).stream(),
            subs.get(Occur.FILTER).stream())
            .mapToLong(ScorerSupplier::cost)
            .min().getAsLong();
        final long msmCost = MinShouldMatchSumScorer.cost(
            subs.get(Occur.SHOULD).stream().mapToLong(ScorerSupplier::cost),
            subs.get(Occur.SHOULD).size(), minShouldMatch);
        reqRandomAccess = reqCost > msmCost;
        msmRandomAccess = msmCost > reqCost;
      }
      Scorer req = excl(req(subs.get(Occur.FILTER), subs.get(Occur.MUST), reqRandomAccess), subs.get(Occur.MUST_NOT));
      Scorer opt = opt(subs.get(Occur.SHOULD), minShouldMatch, needsScores, msmRandomAccess);
      return new ConjunctionScorer(weight, Arrays.asList(req, opt), Arrays.asList(req, opt));
    } else {
      assert needsScores;
      return new ReqOptSumScorer(
          excl(req(subs.get(Occur.FILTER), subs.get(Occur.MUST), randomAccess), subs.get(Occur.MUST_NOT)),
          opt(subs.get(Occur.SHOULD), minShouldMatch, needsScores, true));
    }
  }
  /** Create a new scorer for the given required clauses. Note that
   *  {@code requiredScoring} is a subset of {@code required} containing
   *  required clauses that should participate in scoring. */
  private Scorer req(Collection<ScorerSupplier> requiredNoScoring, Collection<ScorerSupplier> requiredScoring, boolean randomAccess) throws IOException {
    if (requiredNoScoring.size() + requiredScoring.size() == 1) {
      Scorer req = (requiredNoScoring.isEmpty() ? requiredScoring : requiredNoScoring).iterator().next().get(randomAccess);
      if (needsScores == false) {
        return req;
      }
      if (requiredScoring.isEmpty()) {
        // Scores are needed but we only have a filter clause
        // BooleanWeight expects that calling score() is ok so we need to wrap
        // to prevent score() from being propagated
        return new FilterScorer(req) {
          @Override
          public float score() throws IOException {
            return 0f;
          }
          @Override
          public int freq() throws IOException {
            return 0;
          }
        };
      }
      return req;
    } else {
      long minCost = Math.min(
          requiredNoScoring.stream().mapToLong(ScorerSupplier::cost).min().orElse(Long.MAX_VALUE),
          requiredScoring.stream().mapToLong(ScorerSupplier::cost).min().orElse(Long.MAX_VALUE));
      List<Scorer> requiredScorers = new ArrayList<>();
      List<Scorer> scoringScorers = new ArrayList<>();
      for (ScorerSupplier s : requiredNoScoring) {
        requiredScorers.add(s.get(randomAccess || s.cost() > minCost));
      }
      for (ScorerSupplier s : requiredScoring) {
        Scorer scorer = s.get(randomAccess || s.cost() > minCost);
        requiredScorers.add(scorer);
        scoringScorers.add(scorer);
      }
      return new ConjunctionScorer(weight, requiredScorers, scoringScorers);
    }
  }
  private Scorer excl(Scorer main, Collection<ScorerSupplier> prohibited) throws IOException {
    if (prohibited.isEmpty()) {
      return main;
    } else {
      return new ReqExclScorer(main, opt(prohibited, 1, false, true));
    }
  }
  private Scorer opt(Collection<ScorerSupplier> optional, int minShouldMatch,
      boolean needsScores, boolean randomAccess) throws IOException {
    if (optional.size() == 1) {
      return optional.iterator().next().get(randomAccess);
    } else if (minShouldMatch > 1) {
      final List<Scorer> optionalScorers = new ArrayList<>();
      final PriorityQueue<ScorerSupplier> pq = new PriorityQueue<ScorerSupplier>(subs.get(Occur.SHOULD).size() - minShouldMatch + 1) {
        @Override
        protected boolean lessThan(ScorerSupplier a, ScorerSupplier b) {
          return a.cost() > b.cost();
        }
      };
      for (ScorerSupplier scorer : subs.get(Occur.SHOULD)) {
        ScorerSupplier overflow = pq.insertWithOverflow(scorer);
        if (overflow != null) {
          optionalScorers.add(overflow.get(true));
        }
      }
      for (ScorerSupplier scorer : pq) {
        optionalScorers.add(scorer.get(randomAccess));
      }
      return new MinShouldMatchSumScorer(weight, optionalScorers, minShouldMatch);
    } else {
      final List<Scorer> optionalScorers = new ArrayList<>();
      for (ScorerSupplier scorer : optional) {
        optionalScorers.add(scorer.get(randomAccess));
      }
      return new DisjunctionSumScorer(weight, optionalScorers, needsScores);
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
@ -19,9 +19,11 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.Collection;
 import java.util.EnumMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import org.apache.lucene.index.LeafReaderContext;
@ -265,7 +267,9 @@ final class BooleanWeight extends Weight {
    if (prohibited.isEmpty()) {
      return positiveScorer;
    } else {
-      Scorer prohibitedScorer = opt(prohibited, 1);
+      Scorer prohibitedScorer = prohibited.size() == 1
          ? prohibited.get(0)
          : new DisjunctionSumScorer(this, prohibited, false);
      if (prohibitedScorer.twoPhaseIterator() != null) {
        // ReqExclBulkScorer can't deal efficiently with two-phased prohibited clauses
        return null;
@ -288,50 +292,48 @@ final class BooleanWeight extends Weight {
  @Override
  public Scorer scorer(LeafReaderContext context) throws IOException {
-    // initially the user provided value,
+    ScorerSupplier scorerSupplier = scorerSupplier(context);
-    // but if minNrShouldMatch == optional.size(),
+    if (scorerSupplier == null) {
-    // we will optimize and move these to required, making this 0
+      return null;
    }
    return scorerSupplier.get(false);
  }
  @Override
  public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
    int minShouldMatch = query.getMinimumNumberShouldMatch();
-    List<Scorer> required = new ArrayList<>();
+    final Map<Occur, Collection<ScorerSupplier>> scorers = new EnumMap<>(Occur.class);
-    // clauses that are required AND participate in scoring, subset of 'required'
+    for (Occur occur : Occur.values()) {
-    List<Scorer> requiredScoring = new ArrayList<>();
+      scorers.put(occur, new ArrayList<>());
-    List<Scorer> prohibited = new ArrayList<>();
+    }
-    List<Scorer> optional = new ArrayList<>();
+
    Iterator<BooleanClause> cIter = query.iterator();
    for (Weight w  : weights) {
      BooleanClause c =  cIter.next();
-      Scorer subScorer = w.scorer(context);
+      ScorerSupplier subScorer = w.scorerSupplier(context);
      if (subScorer == null) {
        if (c.isRequired()) {
          return null;
        }
      } else if (c.isRequired()) {
        required.add(subScorer);
        if (c.isScoring()) {
          requiredScoring.add(subScorer);
        }
      } else if (c.isProhibited()) {
        prohibited.add(subScorer);
      } else {
-        optional.add(subScorer);
+        scorers.get(c.getOccur()).add(subScorer);
      }
    }
    // scorer simplifications:
-    if (optional.size() == minShouldMatch) {
+    if (scorers.get(Occur.SHOULD).size() == minShouldMatch) {
      // any optional clauses are in fact required
-      required.addAll(optional);
+      scorers.get(Occur.MUST).addAll(scorers.get(Occur.SHOULD));
-      requiredScoring.addAll(optional);
+      scorers.get(Occur.SHOULD).clear();
      optional.clear();
      minShouldMatch = 0;
    }
-    if (required.isEmpty() && optional.isEmpty()) {
+    if (scorers.get(Occur.FILTER).isEmpty() && scorers.get(Occur.MUST).isEmpty() && scorers.get(Occur.SHOULD).isEmpty()) {
      // no required and optional clauses.
      return null;
-    } else if (optional.size() < minShouldMatch) {
+    } else if (scorers.get(Occur.SHOULD).size() < minShouldMatch) {
      // either >1 req scorer, or there are 0 req scorers and at least 1
      // optional scorer. Therefore if there are not enough optional scorers
      // no documents will be matched by the query
@ -339,87 +341,11 @@ final class BooleanWeight extends Weight {
    }
    // we don't need scores, so if we have required clauses, drop optional clauses completely
-    if (!needsScores && minShouldMatch == 0 && required.size() > 0) {
+    if (!needsScores && minShouldMatch == 0 && scorers.get(Occur.MUST).size() + scorers.get(Occur.FILTER).size() > 0) {
-      optional.clear();
+      scorers.get(Occur.SHOULD).clear();
    }
-    // three cases: conjunction, disjunction, or mix
+    return new Boolean2ScorerSupplier(this, scorers, needsScores, minShouldMatch);
    // pure conjunction
    if (optional.isEmpty()) {
      return excl(req(required, requiredScoring), prohibited);
  }
    // pure disjunction
    if (required.isEmpty()) {
      return excl(opt(optional, minShouldMatch), prohibited);
    }
    // conjunction-disjunction mix:
    // we create the required and optional pieces, and then
    // combine the two: if minNrShouldMatch > 0, then it's a conjunction: because the
    // optional side must match. otherwise it's required + optional
    Scorer req = excl(req(required, requiredScoring), prohibited);
    Scorer opt = opt(optional, minShouldMatch);
    if (minShouldMatch > 0) {
      return new ConjunctionScorer(this, Arrays.asList(req, opt), Arrays.asList(req, opt));
    } else {
      return new ReqOptSumScorer(req, opt);          
    }
  }
  /** Create a new scorer for the given required clauses. Note that
   *  {@code requiredScoring} is a subset of {@code required} containing
   *  required clauses that should participate in scoring. */
  private Scorer req(List<Scorer> required, List<Scorer> requiredScoring) {
    if (required.size() == 1) {
      Scorer req = required.get(0);
      if (needsScores == false) {
        return req;
      }
      if (requiredScoring.isEmpty()) {
        // Scores are needed but we only have a filter clause
        // BooleanWeight expects that calling score() is ok so we need to wrap
        // to prevent score() from being propagated
        return new FilterScorer(req) {
          @Override
          public float score() throws IOException {
            return 0f;
          }
          @Override
          public int freq() throws IOException {
            return 0;
          }
        };
      }
      return req;
    } else {
      return new ConjunctionScorer(this, required, requiredScoring);
    }
  }
  private Scorer excl(Scorer main, List<Scorer> prohibited) throws IOException {
    if (prohibited.isEmpty()) {
      return main;
    } else if (prohibited.size() == 1) {
      return new ReqExclScorer(main, prohibited.get(0));
    } else {
      return new ReqExclScorer(main, new DisjunctionSumScorer(this, prohibited, false));
    }
  }
  private Scorer opt(List<Scorer> optional, int minShouldMatch) throws IOException {
    if (optional.size() == 1) {
      return optional.get(0);
    } else if (minShouldMatch > 1) {
      return new MinShouldMatchSumScorer(this, optional, minShouldMatch);
    } else {
      return new DisjunctionSumScorer(this, optional, needsScores);
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@ -41,7 +41,7 @@ public final class ConjunctionDISI extends DocIdSetIterator {
   * returned {@link DocIdSetIterator} might leverage two-phase iteration in
   * which case it is possible to retrieve the {@link TwoPhaseIterator} using
   * {@link TwoPhaseIterator#unwrap}. */
-  public static DocIdSetIterator intersectScorers(List<Scorer> scorers) {
+  public static DocIdSetIterator intersectScorers(Collection<Scorer> scorers) {
    if (scorers.size() < 2) {
      throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators");
    }
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
@ -20,7 +20,6 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 /** Scorer for conjunctions, sets of queries, all of which are required. */
 class ConjunctionScorer extends Scorer {
@ -29,7 +28,7 @@ class ConjunctionScorer extends Scorer {
  final Scorer[] scorers;
  /** Create a new {@link ConjunctionScorer}, note that {@code scorers} must be a subset of {@code required}. */
-  ConjunctionScorer(Weight weight, List<Scorer> required, List<Scorer> scorers) {
+  ConjunctionScorer(Weight weight, Collection<Scorer> required, Collection<Scorer> scorers) {
    super(weight);
    assert required.containsAll(scorers);
    this.disi = ConjunctionDISI.intersectScorers(required);
--- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@ -125,11 +125,15 @@ public final class ConstantScoreQuery extends Query {
        }
        @Override
-        public Scorer scorer(LeafReaderContext context) throws IOException {
+        public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
-          final Scorer innerScorer = innerWeight.scorer(context);
+          ScorerSupplier innerScorerSupplier = innerWeight.scorerSupplier(context);
-          if (innerScorer == null) {
+          if (innerScorerSupplier == null) {
            return null;
          }
          return new ScorerSupplier() {
            @Override
            public Scorer get(boolean randomAccess) throws IOException {
              final Scorer innerScorer = innerScorerSupplier.get(randomAccess);
              final float score = score();
              return new FilterScorer(innerScorer) {
                @Override
@ -147,6 +151,22 @@ public final class ConstantScoreQuery extends Query {
              };
            }
            @Override
            public long cost() {
              return innerScorerSupplier.cost();
            }
          };
        }
        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
          ScorerSupplier scorerSupplier = scorerSupplier(context);
          if (scorerSupplier == null) {
            return null;
          }
          return scorerSupplier.get(false);
        }
      };
    } else {
      return innerWeight;
--- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
@ -22,6 +22,8 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.stream.LongStream;
 import java.util.stream.StreamSupport;
 import org.apache.lucene.util.PriorityQueue;
@ -47,7 +49,7 @@ import static org.apache.lucene.search.DisiPriorityQueue.rightNode;
 */
 final class MinShouldMatchSumScorer extends Scorer {
-  private static long cost(Collection<Scorer> scorers, int minShouldMatch) {
+  static long cost(LongStream costs, int numScorers, int minShouldMatch) {
    // the idea here is the following: a boolean query c1,c2,...cn with minShouldMatch=m
    // could be rewritten to:
    // (c1 AND (c2..cn|msm=m-1)) OR (!c1 AND (c2..cn|msm=m))
@ -61,20 +63,14 @@ final class MinShouldMatchSumScorer extends Scorer {
    // If we recurse infinitely, we find out that the cost of a msm query is the sum of the
    // costs of the num_scorers - minShouldMatch + 1 least costly scorers
-    final PriorityQueue<Scorer> pq = new PriorityQueue<Scorer>(scorers.size() - minShouldMatch + 1) {
+    final PriorityQueue<Long> pq = new PriorityQueue<Long>(numScorers - minShouldMatch + 1) {
      @Override
-      protected boolean lessThan(Scorer a, Scorer b) {
+      protected boolean lessThan(Long a, Long b) {
-        return a.iterator().cost() > b.iterator().cost();
+        return a > b;
      }
    };
-    for (Scorer scorer : scorers) {
+    costs.forEach(pq::insertWithOverflow);
-      pq.insertWithOverflow(scorer);
+    return StreamSupport.stream(pq.spliterator(), false).mapToLong(Number::longValue).sum();
    }
    long cost = 0;
    for (Scorer scorer = pq.pop(); scorer != null; scorer = pq.pop()) {
      cost += scorer.iterator().cost();
    }
    return cost;
  }
  final int minShouldMatch;
@ -124,7 +120,7 @@ final class MinShouldMatchSumScorer extends Scorer {
      children.add(new ChildScorer(scorer, "SHOULD"));
    }
    this.childScorers = Collections.unmodifiableCollection(children);
-    this.cost = cost(scorers, minShouldMatch);
+    this.cost = cost(scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost), scorers.size(), minShouldMatch);
  }
  @Override
--- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
@ -26,7 +26,9 @@ import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.document.IntPoint;    // javadocs
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.DocIdSetBuilder;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.StringHelper;
 /** 
@ -104,11 +106,8 @@ public abstract class PointRangeQuery extends Query {
    return new ConstantScoreWeight(this, boost) {
-      private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
+      private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
-        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
+        return new IntersectVisitor() {
        values.intersect(
            new IntersectVisitor() {
          DocIdSetBuilder.BulkAdder adder;
@ -163,12 +162,69 @@ public abstract class PointRangeQuery extends Query {
              return Relation.CELL_INSIDE_QUERY;
            }
          }
-            });
+        };
-        return result.build();
+      }
      /**
       * Create a visitor that clears documents that do NOT match the range.
       */
      private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, int[] cost) {
        return new IntersectVisitor() {
          @Override
          public void visit(int docID) {
            result.clear(docID);
            cost[0]--;
          }
          @Override
-      public Scorer scorer(LeafReaderContext context) throws IOException {
+          public void visit(int docID, byte[] packedValue) {
            for(int dim=0;dim<numDims;dim++) {
              int offset = dim*bytesPerDim;
              if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
                // Doc's value is too low, in this dimension
                result.clear(docID);
                cost[0]--;
                return;
              }
              if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
                // Doc's value is too high, in this dimension
                result.clear(docID);
                cost[0]--;
                return;
              }
            }
          }
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            boolean crosses = false;
            for(int dim=0;dim<numDims;dim++) {
              int offset = dim*bytesPerDim;
              if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 ||
                  StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
                // This dim is not in the range
                return Relation.CELL_INSIDE_QUERY;
              }
              crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 ||
                  StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
            }
            if (crosses) {
              return Relation.CELL_CROSSES_QUERY;
            } else {
              return Relation.CELL_OUTSIDE_QUERY;
            }
          }
        };
      }
      @Override
      public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
        LeafReader reader = context.reader();
        PointValues values = reader.getPointValues(field);
@ -201,15 +257,69 @@ public abstract class PointRangeQuery extends Query {
          allDocsMatch = false;
        }
-        DocIdSetIterator iterator;
+        final Weight weight = this;
        if (allDocsMatch) {
          // all docs have a value and all points are within bounds, so everything matches
-          iterator = DocIdSetIterator.all(reader.maxDoc());
+          return new ScorerSupplier() {
-        } else {
+            @Override
-          iterator = buildMatchingDocIdSet(reader, values).iterator();
+            public Scorer get(boolean randomAccess) {
              return new ConstantScoreScorer(weight, score(),
                  DocIdSetIterator.all(reader.maxDoc()));
            }
-        return new ConstantScoreScorer(this, score(), iterator);
+            @Override
            public long cost() {
              return reader.maxDoc();
            }
          };
        } else {
          return new ScorerSupplier() {
            final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
            final IntersectVisitor visitor = getIntersectVisitor(result);
            long cost = -1;
            @Override
            public Scorer get(boolean randomAccess) throws IOException {
              if (values.getDocCount() == reader.maxDoc()
                  && values.getDocCount() == values.size()
                  && cost() > reader.maxDoc() / 2) {
                // If all docs have exactly one value and the cost is greater
                // than half the leaf size then maybe we can make things faster
                // by computing the set of documents that do NOT match the range
                final FixedBitSet result = new FixedBitSet(reader.maxDoc());
                result.set(0, reader.maxDoc());
                int[] cost = new int[] { reader.maxDoc() };
                values.intersect(getInverseIntersectVisitor(result, cost));
                final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
                return new ConstantScoreScorer(weight, score(), iterator);
              }
              values.intersect(visitor);
              DocIdSetIterator iterator = result.build().iterator();
              return new ConstantScoreScorer(weight, score(), iterator);
            }
            @Override
            public long cost() {
              if (cost == -1) {
                // Computing the cost may be expensive, so only do it if necessary
                cost = values.estimatePointCount(visitor);
                assert cost >= 0;
              }
              return cost;
            }
          };
        }
      }
      @Override
      public Scorer scorer(LeafReaderContext context) throws IOException {
        ScorerSupplier scorerSupplier = scorerSupplier(context);
        if (scorerSupplier == null) {
          return null;
        }
        return scorerSupplier.get(false);
      }
    };
  }
--- a/lucene/core/src/java/org/apache/lucene/search/ScorerSupplier.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ScorerSupplier.java
@ -0,0 +1,47 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.search;
 import java.io.IOException;
 /**
 * A supplier of {@link Scorer}. This allows to get an estimate of the cost before
 * building the {@link Scorer}.
 */
 public abstract class ScorerSupplier {
  /**
   * Get the {@link Scorer}. This may not return {@code null} and must be called
   * at most once.
   * @param randomAccess A hint about the expected usage of the {@link Scorer}.
   * If {@link DocIdSetIterator#advance} or {@link TwoPhaseIterator} will be
   * used to check whether given doc ids match, then pass {@code true}.
   * Otherwise if the {@link Scorer} will be mostly used to lead the iteration
   * using {@link DocIdSetIterator#nextDoc()}, then {@code false} should be
   * passed. Under doubt, pass {@code false} which usually has a better
   * worst-case.
   */
  public abstract Scorer get(boolean randomAccess) throws IOException;
  /**
   * Get an estimate of the {@link Scorer} that would be returned by {@link #get}.
   * This may be a costly operation, so it should only be called if necessary.
   * @see DocIdSetIterator#cost
   */
  public abstract long cost();
 }
--- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
@ -21,7 +21,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.Set;
@ -73,39 +72,12 @@ public class TermInSetQuery extends Query implements Accountable {
  // Same threshold as MultiTermQueryConstantScoreWrapper
  static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
-  private final boolean singleField; // whether all terms are from the same field
+  private final String field;
  private final PrefixCodedTerms termData;
  private final int termDataHashCode; // cached hashcode of termData
  /**
-   * Creates a new {@link TermInSetQuery} from the given collection. It
+   * Creates a new {@link TermInSetQuery} from the given collection of terms.
   * can contain duplicate terms and multiple fields.
   */
  public TermInSetQuery(Collection<Term> terms) {
    Term[] sortedTerms = terms.toArray(new Term[terms.size()]);
    // already sorted if we are a SortedSet with natural order
    boolean sorted = terms instanceof SortedSet && ((SortedSet<Term>)terms).comparator() == null;
    if (!sorted) {
      ArrayUtil.timSort(sortedTerms);
    }
    PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
    Set<String> fields = new HashSet<>();
    Term previous = null;
    for (Term term : sortedTerms) {
      if (term.equals(previous) == false) {
        fields.add(term.field());
        builder.add(term);
      }
      previous = term;
    }
    singleField = fields.size() == 1;
    termData = builder.finish();
    termDataHashCode = termData.hashCode();
  }
  /**
   * Creates a new {@link TermInSetQuery} from the given collection for
   * a single field. It can contain duplicate terms.
   */
  public TermInSetQuery(String field, Collection<BytesRef> terms) {
    BytesRef[] sortedTerms = terms.toArray(new BytesRef[terms.size()]);
@ -125,27 +97,18 @@ public class TermInSetQuery extends Query implements Accountable {
      builder.add(field, term);
      previous.copyBytes(term);
    }
-    singleField = true;
+    this.field = field;
    termData = builder.finish();
    termDataHashCode = termData.hashCode();
  }
  /**
-   * Creates a new {@link TermInSetQuery} from the given {@link BytesRef} array for
+   * Creates a new {@link TermInSetQuery} from the given array of terms.
   * a single field.
   */
  public TermInSetQuery(String field, BytesRef...terms) {
    this(field, Arrays.asList(terms));
  }
  /**
   * Creates a new {@link TermInSetQuery} from the given array. The array can
   * contain duplicate terms and multiple fields.
   */
  public TermInSetQuery(final Term... terms) {
    this(Arrays.asList(terms));
  }
  @Override
  public Query rewrite(IndexReader reader) throws IOException {
    final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
@ -167,6 +130,7 @@ public class TermInSetQuery extends Query implements Accountable {
  }
  private boolean equalsTo(TermInSetQuery other) {
    // no need to check 'field' explicitly since it is encoded in 'termData'
    // termData might be heavy to compare so check the hash code first
    return termDataHashCode == other.termDataHashCode &&
        termData.equals(other.termData);
@ -260,6 +224,15 @@ public class TermInSetQuery extends Query implements Accountable {
      private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
        final LeafReader reader = context.reader();
        final Fields fields = reader.fields();
        Terms terms = fields.terms(field);
        if (terms == null) {
          return null;
        }
        TermsEnum termsEnum = terms.iterator();
        PostingsEnum docs = null;
        TermIterator iterator = termData.iterator();
        // We will first try to collect up to 'threshold' terms into 'matchingTerms'
        // if there are two many terms, we will fall back to building the 'builder'
        final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
@ -267,25 +240,9 @@ public class TermInSetQuery extends Query implements Accountable {
        List<TermAndState> matchingTerms = new ArrayList<>(threshold);
        DocIdSetBuilder builder = null;
        final Fields fields = reader.fields();
        String lastField = null;
        Terms terms = null;
        TermsEnum termsEnum = null;
        PostingsEnum docs = null;
        TermIterator iterator = termData.iterator();
        for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
-          String field = iterator.field();
+          assert field.equals(iterator.field());
-          // comparing references is fine here
+          if (termsEnum.seekExact(term)) {
          if (field != lastField) {
            terms = fields.terms(field);
            if (terms == null) {
              termsEnum = null;
            } else {
              termsEnum = terms.iterator();
            }
            lastField = field;
          }
          if (termsEnum != null && termsEnum.seekExact(term)) {
            if (matchingTerms == null) {
              docs = termsEnum.postings(docs, PostingsEnum.NONE);
              builder.add(docs);
@ -293,15 +250,7 @@ public class TermInSetQuery extends Query implements Accountable {
              matchingTerms.add(new TermAndState(field, termsEnum));
            } else {
              assert matchingTerms.size() == threshold;
              if (singleField) {
                // common case: all terms are in the same field
                // use an optimized builder that leverages terms stats to be more efficient
              builder = new DocIdSetBuilder(reader.maxDoc(), terms);
              } else {
                // corner case: different fields
                // don't make assumptions about the docs we will get
                builder = new DocIdSetBuilder(reader.maxDoc());
              }
              docs = termsEnum.postings(docs, PostingsEnum.NONE);
              builder.add(docs);
              for (TermAndState t : matchingTerms) {
@ -344,7 +293,9 @@ public class TermInSetQuery extends Query implements Accountable {
      @Override
      public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
        final WeightOrDocIdSet weightOrBitSet = rewrite(context);
-        if (weightOrBitSet.weight != null) {
+        if (weightOrBitSet == null) {
          return null;
        } else if (weightOrBitSet.weight != null) {
          return weightOrBitSet.weight.bulkScorer(context);
        } else {
          final Scorer scorer = scorer(weightOrBitSet.set);
@ -358,7 +309,9 @@ public class TermInSetQuery extends Query implements Accountable {
      @Override
      public Scorer scorer(LeafReaderContext context) throws IOException {
        final WeightOrDocIdSet weightOrBitSet = rewrite(context);
-        if (weightOrBitSet.weight != null) {
+        if (weightOrBitSet == null) {
          return null;
        } else if (weightOrBitSet.weight != null) {
          return weightOrBitSet.weight.scorer(context);
        } else {
          return scorer(weightOrBitSet.set);
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@ -102,6 +102,31 @@ public abstract class Weight {
   */
  public abstract Scorer scorer(LeafReaderContext context) throws IOException;
  /**
   * Optional method.
   * Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer}
   * before building it. The default implementation calls {@link #scorer} and
   * builds a {@link ScorerSupplier} wrapper around it.
   * @see #scorer
   */
  public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
    final Scorer scorer = scorer(context);
    if (scorer == null) {
      return null;
    }
    return new ScorerSupplier() {
      @Override
      public Scorer get(boolean randomAccess) {
        return scorer;
      }
      @Override
      public long cost() {
        return scorer.iterator().cost();
      }
    };
  }
  /**
   * Optional method, to return a {@link BulkScorer} to
   * score the query and send hits to a {@link Collector}.
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@ -223,6 +223,41 @@ public final class BKDReader extends PointValues implements Accountable {
    /** Only valid after pushLeft or pushRight, not pop! */
    public abstract long getLeafBlockFP();
    /** Return the number of leaves below the current node. */
    public int getNumLeaves() {
      int leftMostLeafNode = nodeID;
      while (leftMostLeafNode < leafNodeOffset) {
        leftMostLeafNode = leftMostLeafNode * 2;
      }
      int rightMostLeafNode = nodeID;
      while (rightMostLeafNode < leafNodeOffset) {
        rightMostLeafNode = rightMostLeafNode * 2 + 1;
      }
      final int numLeaves;
      if (rightMostLeafNode >= leftMostLeafNode) {
        // both are on the same level
        numLeaves = rightMostLeafNode - leftMostLeafNode + 1;
      } else {
        // left is one level deeper than right
        numLeaves = rightMostLeafNode - leftMostLeafNode + 1 + leafNodeOffset;
      }
      assert numLeaves == getNumLeavesSlow(nodeID) : numLeaves + " " + getNumLeavesSlow(nodeID);
      return numLeaves;
    }
    // for assertions
    private int getNumLeavesSlow(int node) {
      if (node >= 2 * leafNodeOffset) {
        return 0;
      } else if (node >= leafNodeOffset) {
        return 1;
      } else {
        final int leftCount = getNumLeavesSlow(node * 2);
        final int rightCount = getNumLeavesSlow(node * 2 + 1);
        return leftCount + rightCount;
      }
    }
  }
  /** Reads the original simple yet heap-heavy index format */
@ -482,10 +517,16 @@ public final class BKDReader extends PointValues implements Accountable {
    }
  }
  @Override
  public void intersect(IntersectVisitor visitor) throws IOException {
    intersect(getIntersectState(visitor), minPackedValue, maxPackedValue);
  }
  @Override
  public long estimatePointCount(IntersectVisitor visitor) {
    return estimatePointCount(getIntersectState(visitor), minPackedValue, maxPackedValue);
  }
  /** Fast path: this is called when the query box fully encompasses all cells under this node. */
  private void addAll(IntersectState state) throws IOException {
    //System.out.println("R: addAll nodeID=" + nodeID);
@ -696,6 +737,61 @@ public final class BKDReader extends PointValues implements Accountable {
    }
  }
  private long estimatePointCount(IntersectState state, byte[] cellMinPacked, byte[] cellMaxPacked) {
    /*
    System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
    for(int dim=0;dim<numDims;dim++) {
      System.out.println("  dim=" + dim + "\n    cellMin=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim) + "\n    cellMax=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
    }
    */
    Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
    if (r == Relation.CELL_OUTSIDE_QUERY) {
      // This cell is fully outside of the query shape: stop recursing
      return 0L;
    } else if (r == Relation.CELL_INSIDE_QUERY) {
      return (long) maxPointsInLeafNode * state.index.getNumLeaves();
    } else if (state.index.isLeafNode()) {
      // Assume half the points matched
      return (maxPointsInLeafNode + 1) / 2;
    } else {
      // Non-leaf node: recurse on the split left and right nodes
      int splitDim = state.index.getSplitDim();
      assert splitDim >= 0: "splitDim=" + splitDim;
      assert splitDim < numDims;
      byte[] splitPackedValue = state.index.getSplitPackedValue();
      BytesRef splitDimValue = state.index.getSplitDimValue();
      assert splitDimValue.length == bytesPerDim;
      //System.out.println("  splitDimValue=" + splitDimValue + " splitDim=" + splitDim);
      // make sure cellMin <= splitValue <= cellMax:
      assert StringHelper.compare(bytesPerDim, cellMinPacked, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset) <= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numDims=" + numDims;
      assert StringHelper.compare(bytesPerDim, cellMaxPacked, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset) >= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numDims=" + numDims;
      // Recurse on left sub-tree:
      System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
      System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
      state.index.pushLeft();
      final long leftCost = estimatePointCount(state, cellMinPacked, splitPackedValue);
      state.index.pop();
      // Restore the split dim value since it may have been overwritten while recursing:
      System.arraycopy(splitPackedValue, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset, bytesPerDim);
      // Recurse on right sub-tree:
      System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
      System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
      state.index.pushRight();
      final long rightCost = estimatePointCount(state, splitPackedValue, cellMaxPacked);
      state.index.pop();
      return leftCost + rightCost;
    }
  }
  @Override
  public long ramBytesUsed() {
    if (packedIndex != null) {
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@ -487,7 +487,7 @@ public class BKDWriter implements Closeable {
    assert Arrays.equals(parentSplits, new int[numDims]);
    long indexFP = out.getFilePointer();
-    writeIndex(out, leafBlockFPs, splitPackedValues);
+    writeIndex(out, Math.toIntExact(countPerLeaf), leafBlockFPs, splitPackedValues);
    return indexFP;
  }
@ -645,7 +645,7 @@ public class BKDWriter implements Closeable {
      for(int i=0;i<leafBlockFPs.size();i++) {
        arr[i] = leafBlockFPs.get(i);
      }
-      writeIndex(out, arr, index);
+      writeIndex(out, maxPointsInLeafNode, arr, index);
      return indexFP;
    }
@ -1035,7 +1035,7 @@ public class BKDWriter implements Closeable {
    // Write index:
    long indexFP = out.getFilePointer();
-    writeIndex(out, leafBlockFPs, splitPackedValues);
+    writeIndex(out, Math.toIntExact(countPerLeaf), leafBlockFPs, splitPackedValues);
    return indexFP;
  }
@ -1241,16 +1241,16 @@ public class BKDWriter implements Closeable {
    return result;
  }
-  private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
+  private void writeIndex(IndexOutput out, int countPerLeaf, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
    byte[] packedIndex = packIndex(leafBlockFPs, splitPackedValues);
-    writeIndex(out, leafBlockFPs.length, packedIndex);
+    writeIndex(out, countPerLeaf, leafBlockFPs.length, packedIndex);
  }
-  private void writeIndex(IndexOutput out, int numLeaves, byte[] packedIndex) throws IOException {
+  private void writeIndex(IndexOutput out, int countPerLeaf, int numLeaves, byte[] packedIndex) throws IOException {
    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
    out.writeVInt(numDims);
-    out.writeVInt(maxPointsInLeafNode);
+    out.writeVInt(countPerLeaf);
    out.writeVInt(bytesPerDim);
    assert numLeaves > 0;
--- a/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
@ -21,16 +21,22 @@ import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Random;
 import java.util.Set;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.fst.Util;
 import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
@ -565,7 +571,13 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
    assertSameLanguage(join(HOLE_A, SEP_A, s2a("abc")), ts);
  }
-  // TODO: testEndsWithHole... but we need posInc to set in TS.end()
+  public void testEndsWithHole() throws Exception {
    final TokenStream ts = new CannedTokenStream(1, 0,
                                                 new Token[] {
                                                   token("abc", 2, 1),
                                                 });
    assertSameLanguage(join(HOLE_A, SEP_A, s2a("abc"), SEP_A, HOLE_A), ts);
  }
  public void testSynHangingOverEnd() throws Exception {
    final TokenStream ts = new CannedTokenStream(
@ -576,14 +588,47 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
    assertSameLanguage(Operations.union(s2a("a"), s2a("X")), ts);
  }
  /** Returns all paths */
  private Set<String> toPathStrings(Automaton a) {
    BytesRefBuilder scratchBytesRefBuilder = new BytesRefBuilder();
    Set<String> paths = new HashSet<>();
    for (IntsRef ir: AutomatonTestUtil.getFiniteStringsRecursive(a, -1)) {
      paths.add(Util.toBytesRef(ir, scratchBytesRefBuilder).utf8ToString().replace((char) TokenStreamToAutomaton.POS_SEP, ' '));
    }
    return paths;
  }
  private void assertSameLanguage(Automaton expected, TokenStream ts) throws IOException {
    assertSameLanguage(expected, new TokenStreamToAutomaton().toAutomaton(ts));
  }
  private void assertSameLanguage(Automaton expected, Automaton actual) {
-    assertTrue(Operations.sameLanguage(
+    Automaton expectedDet = Operations.determinize(Operations.removeDeadStates(expected), DEFAULT_MAX_DETERMINIZED_STATES);
-      Operations.determinize(Operations.removeDeadStates(expected), DEFAULT_MAX_DETERMINIZED_STATES),
+    Automaton actualDet = Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_MAX_DETERMINIZED_STATES);
-      Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_MAX_DETERMINIZED_STATES)));
+    if (Operations.sameLanguage(expectedDet, actualDet) == false) {
      Set<String> expectedPaths = toPathStrings(expectedDet);
      Set<String> actualPaths = toPathStrings(actualDet);
      StringBuilder b = new StringBuilder();
      b.append("expected:\n");
      for(String path : expectedPaths) {
        b.append("  ");
        b.append(path);
        if (actualPaths.contains(path) == false) {
          b.append(" [missing!]");
        }
        b.append('\n');
      }
      b.append("actual:\n");
      for(String path : actualPaths) {
        b.append("  ");
        b.append(path);
        if (expectedPaths.contains(path) == false) {
          b.append(" [unexpected!]");
        }
        b.append('\n');
      }
      fail("accepted language is different:\n\n" + b.toString());
    }
  }
  public void testTokenStreamGraphWithHoles() throws Exception {
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene60/TestLucene60PointsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene60/TestLucene60PointsFormat.java
@ -18,29 +18,43 @@ package org.apache.lucene.codecs.lucene60;
 import java.io.IOException;
 import java.util.Arrays;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.FilterCodec;
 import org.apache.lucene.codecs.PointsFormat;
 import org.apache.lucene.codecs.PointsReader;
 import org.apache.lucene.codecs.PointsWriter;
 import org.apache.lucene.document.BinaryPoint;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.BasePointsFormatTestCase;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.bkd.BKDWriter;
 /**
 * Tests Lucene60PointsFormat
 */
 public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
  private final Codec codec;
  private final int maxPointsInLeafNode;
  public TestLucene60PointsFormat() {
    // standard issue
    Codec defaultCodec = TestUtil.getDefaultCodec();
    if (random().nextBoolean()) {
      // randomize parameters
-      int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
+      maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
      double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
      if (VERBOSE) {
        System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
@ -66,6 +80,7 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
    } else {
      // standard issue
      codec = defaultCodec;
      maxPointsInLeafNode = BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
    }
  }
@ -80,4 +95,177 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
    super.testMergeStability();
  }
  public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
    byte[] pointValue = new byte[3];
    byte[] uniquePointValue = new byte[3];
    random().nextBytes(uniquePointValue);
    final int numDocs = atLeast(10000); // make sure we have several leaves
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      if (i == numDocs / 2) {
        doc.add(new BinaryPoint("f", uniquePointValue));
      } else {
        do {
          random().nextBytes(pointValue);
        } while (Arrays.equals(pointValue, uniquePointValue));
        doc.add(new BinaryPoint("f", pointValue));
      }
      w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
    assertEquals(numLeaves * maxPointsInLeafNode,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          @Override
          public void visit(int docID) throws IOException {}
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
          }
        }));
    // Return 0 if no points match
    assertEquals(0,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          @Override
          public void visit(int docID) throws IOException {}
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
          }
        }));
    // If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
    assertEquals((maxPointsInLeafNode + 1) / 2,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          @Override
          public void visit(int docID) throws IOException {}
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 ||
                StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
              return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
          }
        }));
    r.close();
    dir.close();
  }
  // The tree is always balanced in the N dims case, and leaves are
  // not all full so things are a bit different
  public void testEstimatePointCount2Dims() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
    byte[][] pointValue = new byte[2][];
    pointValue[0] = new byte[3];
    pointValue[1] = new byte[3];
    byte[][] uniquePointValue = new byte[2][];
    uniquePointValue[0] = new byte[3];
    uniquePointValue[1] = new byte[3];
    random().nextBytes(uniquePointValue[0]);
    random().nextBytes(uniquePointValue[1]);
    final int numDocs = atLeast(10000); // make sure we have several leaves
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      if (i == numDocs / 2) {
        doc.add(new BinaryPoint("f", uniquePointValue));
      } else {
        do {
          random().nextBytes(pointValue[0]);
          random().nextBytes(pointValue[1]);
        } while (Arrays.equals(pointValue[0], uniquePointValue[0]) || Arrays.equals(pointValue[1], uniquePointValue[1]));
        doc.add(new BinaryPoint("f", pointValue));
      }
      w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");
    // With >1 dims, the tree is balanced
    int actualMaxPointsInLeafNode = numDocs;
    while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
      actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
    }
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = Integer.highestOneBit((numDocs - 1) / actualMaxPointsInLeafNode) << 1;
    assertEquals(numLeaves * actualMaxPointsInLeafNode,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          @Override
          public void visit(int docID) throws IOException {}
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
          }
        }));
    // Return 0 if no points match
    assertEquals(0,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          @Override
          public void visit(int docID) throws IOException {}
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
          }
        }));
    // If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
    assertEquals((actualMaxPointsInLeafNode + 1) / 2,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          @Override
          public void visit(int docID) throws IOException {}
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            for (int dim = 0; dim < 2; ++dim) {
              if (StringHelper.compare(3, uniquePointValue[0], 0, maxPackedValue, dim * 3) > 0 ||
                  StringHelper.compare(3, uniquePointValue[0], 0, minPackedValue, dim * 3) < 0) {
                return Relation.CELL_OUTSIDE_QUERY;
              }
            }
            return Relation.CELL_CROSSES_QUERY;
          }
        }));
    r.close();
    dir.close();
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TermInSetQueryTest.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TermInSetQueryTest.java
@ -18,15 +18,12 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import com.carrotsearch.randomizedtesting.generators.RandomStrings;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
@ -53,25 +50,25 @@ public class TermInSetQueryTest extends LuceneTestCase {
  public void testDuel() throws IOException {
    final int iters = atLeast(2);
    final String field = "f";
    for (int iter = 0; iter < iters; ++iter) {
-      final List<Term> allTerms = new ArrayList<>();
+      final List<BytesRef> allTerms = new ArrayList<>();
      final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
      for (int i = 0; i < numTerms; ++i) {
        final String field = usually() ? "f" : "g";
        final String value = TestUtil.randomAnalysisString(random(), 10, true);
-        allTerms.add(new Term(field, value));
+        allTerms.add(new BytesRef(value));
      }
      Directory dir = newDirectory();
      RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
      final int numDocs = atLeast(100);
      for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
-        final Term term = allTerms.get(random().nextInt(allTerms.size()));
+        final BytesRef term = allTerms.get(random().nextInt(allTerms.size()));
-        doc.add(new StringField(term.field(), term.text(), Store.NO));
+        doc.add(new StringField(field, term, Store.NO));
        iw.addDocument(doc);
      }
      if (numTerms > 1 && random().nextBoolean()) {
-        iw.deleteDocuments(new TermQuery(allTerms.get(0)));
+        iw.deleteDocuments(new TermQuery(new Term(field, allTerms.get(0))));
      }
      iw.commit();
      final IndexReader reader = iw.getReader();
@ -87,16 +84,16 @@ public class TermInSetQueryTest extends LuceneTestCase {
      for (int i = 0; i < 100; ++i) {
        final float boost = random().nextFloat() * 10;
        final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
-        List<Term> queryTerms = new ArrayList<>();
+        List<BytesRef> queryTerms = new ArrayList<>();
        for (int j = 0; j < numQueryTerms; ++j) {
          queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
        }
        final BooleanQuery.Builder bq = new BooleanQuery.Builder();
-        for (Term t : queryTerms) {
+        for (BytesRef t : queryTerms) {
-          bq.add(new TermQuery(t), Occur.SHOULD);
+          bq.add(new TermQuery(new Term(field, t)), Occur.SHOULD);
        }
        final Query q1 = new ConstantScoreQuery(bq.build());
-        final Query q2 = new TermInSetQuery(queryTerms);
+        final Query q2 = new TermInSetQuery(field, queryTerms);
        assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q2, boost), true);
      }
@ -118,103 +115,72 @@ public class TermInSetQueryTest extends LuceneTestCase {
    }
  }
  private TermInSetQuery termsQuery(boolean singleField, Term...terms) {
    return termsQuery(singleField, Arrays.asList(terms));
  }
  private TermInSetQuery termsQuery(boolean singleField, Collection<Term> termList) {
    if (!singleField) {
      return new TermInSetQuery(new ArrayList<>(termList));
    }
    final TermInSetQuery filter;
    List<BytesRef> bytes = new ArrayList<>();
    String field = null;
    for (Term term : termList) {
        bytes.add(term.bytes());
        if (field != null) {
          assertEquals(term.field(), field);
        }
        field = term.field();
    }
    assertNotNull(field);
    filter = new TermInSetQuery(field, bytes);
    return filter;
  }
  public void testHashCodeAndEquals() {
    int num = atLeast(100);
-    final boolean singleField = random().nextBoolean();
+    List<BytesRef> terms = new ArrayList<>();
-    List<Term> terms = new ArrayList<>();
+    Set<BytesRef> uniqueTerms = new HashSet<>();
    Set<Term> uniqueTerms = new HashSet<>();
    for (int i = 0; i < num; i++) {
      String field = "field" + (singleField ? "1" : random().nextInt(100));
      String string = TestUtil.randomRealisticUnicodeString(random());
-      terms.add(new Term(field, string));
+      terms.add(new BytesRef(string));
-      uniqueTerms.add(new Term(field, string));
+      uniqueTerms.add(new BytesRef(string));
-      TermInSetQuery left = termsQuery(singleField ? random().nextBoolean() : false, uniqueTerms);
+      TermInSetQuery left = new TermInSetQuery("field", uniqueTerms);
      Collections.shuffle(terms, random());
-      TermInSetQuery right = termsQuery(singleField ? random().nextBoolean() : false, terms);
+      TermInSetQuery right = new TermInSetQuery("field", terms);
      assertEquals(right, left);
      assertEquals(right.hashCode(), left.hashCode());
      if (uniqueTerms.size() > 1) {
-        List<Term> asList = new ArrayList<>(uniqueTerms);
+        List<BytesRef> asList = new ArrayList<>(uniqueTerms);
        asList.remove(0);
-        TermInSetQuery notEqual = termsQuery(singleField ? random().nextBoolean() : false, asList);
+        TermInSetQuery notEqual = new TermInSetQuery("field", asList);
        assertFalse(left.equals(notEqual));
        assertFalse(right.equals(notEqual));
      }
    }
-    TermInSetQuery tq1 = new TermInSetQuery(new Term("thing", "apple"));
+    TermInSetQuery tq1 = new TermInSetQuery("thing", new BytesRef("apple"));
-    TermInSetQuery tq2 = new TermInSetQuery(new Term("thing", "orange"));
+    TermInSetQuery tq2 = new TermInSetQuery("thing", new BytesRef("orange"));
    assertFalse(tq1.hashCode() == tq2.hashCode());
    // different fields with the same term should have differing hashcodes
-    tq1 = new TermInSetQuery(new Term("thing1", "apple"));
+    tq1 = new TermInSetQuery("thing", new BytesRef("apple"));
-    tq2 = new TermInSetQuery(new Term("thing2", "apple"));
+    tq2 = new TermInSetQuery("thing2", new BytesRef("apple"));
    assertFalse(tq1.hashCode() == tq2.hashCode());
  }
-  public void testSingleFieldEquals() {
+  public void testSimpleEquals() {
    // Two terms with the same hash code
    assertEquals("AaAaBB".hashCode(), "BBBBBB".hashCode());
-    TermInSetQuery left = termsQuery(true, new Term("id", "AaAaAa"), new Term("id", "AaAaBB"));
+    TermInSetQuery left = new TermInSetQuery("id", new BytesRef("AaAaAa"), new BytesRef("AaAaBB"));
-    TermInSetQuery right = termsQuery(true, new Term("id", "AaAaAa"), new Term("id", "BBBBBB"));
+    TermInSetQuery right = new TermInSetQuery("id", new BytesRef("AaAaAa"), new BytesRef("BBBBBB"));
    assertFalse(left.equals(right));
  }
  public void testToString() {
-    TermInSetQuery termsQuery = new TermInSetQuery(new Term("field1", "a"),
+    TermInSetQuery termsQuery = new TermInSetQuery("field1",
-                                              new Term("field1", "b"),
+        new BytesRef("a"), new BytesRef("b"), new BytesRef("c"));
                                              new Term("field1", "c"));
    assertEquals("field1:a field1:b field1:c", termsQuery.toString());
  }
  public void testDedup() {
-    Query query1 = new TermInSetQuery(new Term("foo", "bar"));
+    Query query1 = new TermInSetQuery("foo", new BytesRef("bar"));
-    Query query2 = new TermInSetQuery(new Term("foo", "bar"), new Term("foo", "bar"));
+    Query query2 = new TermInSetQuery("foo", new BytesRef("bar"), new BytesRef("bar"));
    QueryUtils.checkEqual(query1, query2);
  }
  public void testOrderDoesNotMatter() {
    // order of terms if different
-    Query query1 = new TermInSetQuery(new Term("foo", "bar"), new Term("foo", "baz"));
+    Query query1 = new TermInSetQuery("foo", new BytesRef("bar"), new BytesRef("baz"));
-    Query query2 = new TermInSetQuery(new Term("foo", "baz"), new Term("foo", "bar"));
+    Query query2 = new TermInSetQuery("foo", new BytesRef("baz"), new BytesRef("bar"));
    QueryUtils.checkEqual(query1, query2);
    // order of fields is different
    query1 = new TermInSetQuery(new Term("foo", "bar"), new Term("bar", "bar"));
    query2 = new TermInSetQuery(new Term("bar", "bar"), new Term("foo", "bar"));
    QueryUtils.checkEqual(query1, query2);
  }
  public void testRamBytesUsed() {
-    List<Term> terms = new ArrayList<>();
+    List<BytesRef> terms = new ArrayList<>();
    final int numTerms = 1000 + random().nextInt(1000);
    for (int i = 0; i < numTerms; ++i) {
-      terms.add(new Term("f", RandomStrings.randomUnicodeOfLength(random(), 10)));
+      terms.add(new BytesRef(RandomStrings.randomUnicodeOfLength(random(), 10)));
    }
-    TermInSetQuery query = new TermInSetQuery(terms);
+    TermInSetQuery query = new TermInSetQuery("f", terms);
    final long actualRamBytesUsed = RamUsageTester.sizeOf(query);
    final long expectedRamBytesUsed = query.ramBytesUsed();
    // error margin within 5%
@ -281,43 +247,40 @@ public class TermInSetQueryTest extends LuceneTestCase {
  }
-  public void testPullOneTermsEnumPerField() throws Exception {
+  public void testPullOneTermsEnum() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new StringField("foo", "1", Store.NO));
    doc.add(new StringField("bar", "2", Store.NO));
    doc.add(new StringField("baz", "3", Store.NO));
    w.addDocument(doc);
    DirectoryReader reader = w.getReader();
    w.close();
    final AtomicInteger counter = new AtomicInteger();
    DirectoryReader wrapped = new TermsCountingDirectoryReaderWrapper(reader, counter);
-    final List<Term> terms = new ArrayList<>();
+    final List<BytesRef> terms = new ArrayList<>();
    final Set<String> fields = new HashSet<>();
    // enough terms to avoid the rewrite
    final int numTerms = TestUtil.nextInt(random(), TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD + 1, 100);
    for (int i = 0; i < numTerms; ++i) {
      final String field = RandomPicks.randomFrom(random(), new String[] {"foo", "bar", "baz"});
      final BytesRef term = new BytesRef(RandomStrings.randomUnicodeOfCodepointLength(random(), 10));
-      fields.add(field);
+      terms.add(term);
      terms.add(new Term(field, term));
    }
-    new IndexSearcher(wrapped).count(new TermInSetQuery(terms));
+    assertEquals(0, new IndexSearcher(wrapped).count(new TermInSetQuery("bar", terms)));
-    assertEquals(fields.size(), counter.get());
+    assertEquals(0, counter.get()); // missing field
    new IndexSearcher(wrapped).count(new TermInSetQuery("foo", terms));
    assertEquals(1, counter.get());
    wrapped.close();
    dir.close();
  }
  public void testBinaryToString() {
-    TermInSetQuery query = new TermInSetQuery(new Term("field", new BytesRef(new byte[] { (byte) 0xff, (byte) 0xfe })));
+    TermInSetQuery query = new TermInSetQuery("field", new BytesRef(new byte[] { (byte) 0xff, (byte) 0xfe }));
    assertEquals("field:[ff fe]", query.toString());
  }
  public void testIsConsideredCostlyByQueryCache() throws IOException {
-    TermInSetQuery query = new TermInSetQuery(new Term("foo", "bar"), new Term("foo", "baz"));
+    TermInSetQuery query = new TermInSetQuery("foo", new BytesRef("bar"), new BytesRef("baz"));
    UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
    assertFalse(policy.shouldCache(query));
    policy.onUse(query);
--- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java
@ -0,0 +1,332 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.EnumMap;
 import java.util.Map;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 public class TestBoolean2ScorerSupplier extends LuceneTestCase {
  private static class FakeScorer extends Scorer {
    private final DocIdSetIterator it;
    FakeScorer(long cost) {
      super(null);
      this.it = DocIdSetIterator.all(Math.toIntExact(cost));
    }
    @Override
    public int docID() {
      return it.docID();
    }
    @Override
    public float score() throws IOException {
      return 1;
    }
    @Override
    public int freq() throws IOException {
      return 1;
    }
    @Override
    public DocIdSetIterator iterator() {
      return it;
    }
    @Override
    public String toString() {
      return "FakeScorer(cost=" + it.cost() + ")";
    }
  }
  private static class FakeScorerSupplier extends ScorerSupplier {
    private final long cost;
    private final Boolean randomAccess;
    FakeScorerSupplier(long cost) {
      this.cost = cost;
      this.randomAccess = null;
    }
    FakeScorerSupplier(long cost, boolean randomAccess) {
      this.cost = cost;
      this.randomAccess = randomAccess;
    }
    @Override
    public Scorer get(boolean randomAccess) throws IOException {
      if (this.randomAccess != null) {
        assertEquals(this.toString(), this.randomAccess, randomAccess);
      }
      return new FakeScorer(cost);
    }
    @Override
    public long cost() {
      return cost;
    }
    @Override
    public String toString() {
      return "FakeLazyScorer(cost=" + cost + ",randomAccess=" + randomAccess + ")";
    }
  }
  public void testConjunctionCost() {
    Map<Occur, Collection<ScorerSupplier>> subs = new EnumMap<>(Occur.class);
    for (Occur occur : Occur.values()) {
      subs.put(occur, new ArrayList<>());
    }
    subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(42));
    assertEquals(42, new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).cost());
    subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(12));
    assertEquals(12, new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).cost());
    subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(20));
    assertEquals(12, new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).cost());
  }
  public void testDisjunctionCost() throws IOException {
    Map<Occur, Collection<ScorerSupplier>> subs = new EnumMap<>(Occur.class);
    for (Occur occur : Occur.values()) {
      subs.put(occur, new ArrayList<>());
    }
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42));
    ScorerSupplier s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0);
    assertEquals(42, s.cost());
    assertEquals(42, s.get(random().nextBoolean()).iterator().cost());
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12));
    s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0);
    assertEquals(42 + 12, s.cost());
    assertEquals(42 + 12, s.get(random().nextBoolean()).iterator().cost());
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20));
    s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0);
    assertEquals(42 + 12 + 20, s.cost());
    assertEquals(42 + 12 + 20, s.get(random().nextBoolean()).iterator().cost());
  }
  public void testDisjunctionWithMinShouldMatchCost() throws IOException {
    Map<Occur, Collection<ScorerSupplier>> subs = new EnumMap<>(Occur.class);
    for (Occur occur : Occur.values()) {
      subs.put(occur, new ArrayList<>());
    }
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12));
    ScorerSupplier s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 1);
    assertEquals(42 + 12, s.cost());
    assertEquals(42 + 12, s.get(random().nextBoolean()).iterator().cost());
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20));
    s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 1);
    assertEquals(42 + 12 + 20, s.cost());
    assertEquals(42 + 12 + 20, s.get(random().nextBoolean()).iterator().cost());
    s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2);
    assertEquals(12 + 20, s.cost());
    assertEquals(12 + 20, s.get(random().nextBoolean()).iterator().cost());
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30));
    s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 1);
    assertEquals(42 + 12 + 20 + 30, s.cost());
    assertEquals(42 + 12 + 20 + 30, s.get(random().nextBoolean()).iterator().cost());
    s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2);
    assertEquals(12 + 20 + 30, s.cost());
    assertEquals(12 + 20 + 30, s.get(random().nextBoolean()).iterator().cost());
    s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 3);
    assertEquals(12 + 20, s.cost());
    assertEquals(12 + 20, s.get(random().nextBoolean()).iterator().cost());
  }
  public void testDuelCost() throws Exception {
    final int iters = atLeast(1000);
    for (int iter = 0; iter < iters; ++iter) {
      Map<Occur, Collection<ScorerSupplier>> subs = new EnumMap<>(Occur.class);
      for (Occur occur : Occur.values()) {
        subs.put(occur, new ArrayList<>());
      }
      int numClauses = TestUtil.nextInt(random(), 1, 10);
      int numShoulds = 0;
      int numRequired = 0;
      for (int j = 0; j < numClauses; ++j) {
        Occur occur = RandomPicks.randomFrom(random(), Occur.values());
        subs.get(occur).add(new FakeScorerSupplier(random().nextInt(100)));
        if (occur == Occur.SHOULD) {
          ++numShoulds;
        } else if (occur == Occur.FILTER || occur == Occur.MUST) {
          numRequired++;
        }
      }
      boolean needsScores = random().nextBoolean();
      if (needsScores == false && numRequired > 0) {
        numClauses -= numShoulds;
        numShoulds = 0;
        subs.get(Occur.SHOULD).clear();
      }
      if (numShoulds + numRequired == 0) {
        // only negative clauses, invalid
        continue;
      }
      int minShouldMatch = numShoulds == 0 ? 0 : TestUtil.nextInt(random(), 0, numShoulds - 1);
      Boolean2ScorerSupplier supplier = new Boolean2ScorerSupplier(null,
          subs, needsScores, minShouldMatch);
      long cost1 = supplier.cost();
      long cost2 = supplier.get(false).iterator().cost();
      assertEquals("clauses=" + subs + ", minShouldMatch=" + minShouldMatch, cost1, cost2);
    }
  }
  // test the tester...
  public void testFakeScorerSupplier() {
    FakeScorerSupplier randomAccessSupplier = new FakeScorerSupplier(random().nextInt(100), true);
    expectThrows(AssertionError.class, () -> randomAccessSupplier.get(false));
    FakeScorerSupplier sequentialSupplier = new FakeScorerSupplier(random().nextInt(100), false);
    expectThrows(AssertionError.class, () -> sequentialSupplier.get(true));
  }
  public void testConjunctionRandomAccess() throws IOException {
    Map<Occur, Collection<ScorerSupplier>> subs = new EnumMap<>(Occur.class);
    for (Occur occur : Occur.values()) {
      subs.put(occur, new ArrayList<>());
    }
    // If sequential access is required, only the least costly clause does not use random-access
    subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(42, true));
    subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(12, false));
    new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(false); // triggers assertions as a side-effect
    subs = new EnumMap<>(Occur.class);
    for (Occur occur : Occur.values()) {
      subs.put(occur, new ArrayList<>());
    }
    // If random access is required, then we propagate to sub clauses
    subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(42, true));
    subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(12, true));
    new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(true); // triggers assertions as a side-effect
  }
  public void testDisjunctionRandomAccess() throws IOException {
    // disjunctions propagate
    for (boolean randomAccess : new boolean[] {false, true}) {
      Map<Occur, Collection<ScorerSupplier>> subs = new EnumMap<>(Occur.class);
      for (Occur occur : Occur.values()) {
        subs.put(occur, new ArrayList<>());
      }
      subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, randomAccess));
      subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, randomAccess));
      new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(randomAccess); // triggers assertions as a side-effect
    }
  }
  public void testDisjunctionWithMinShouldMatchRandomAccess() throws IOException {
    Map<Occur, Collection<ScorerSupplier>> subs = new EnumMap<>(Occur.class);
    for (Occur occur : Occur.values()) {
      subs.put(occur, new ArrayList<>());
    }
    // Only the most costly clause uses random-access in that case:
    // most of time, we will find agreement between the 2 least costly
    // clauses and only then check whether the 3rd one matches too
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, true));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, false));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, false));
    new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2).get(false); // triggers assertions as a side-effect
    subs = new EnumMap<>(Occur.class);
    for (Occur occur : Occur.values()) {
      subs.put(occur, new ArrayList<>());
    }
    // When random-access is true, just propagate
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, true));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, true));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, true));
    new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2).get(true); // triggers assertions as a side-effect
    subs = new EnumMap<>(Occur.class);
    for (Occur occur : Occur.values()) {
      subs.put(occur, new ArrayList<>());
    }
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, true));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, false));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, false));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20, false));
    new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2).get(false); // triggers assertions as a side-effect
    subs = new EnumMap<>(Occur.class);
    for (Occur occur : Occur.values()) {
      subs.put(occur, new ArrayList<>());
    }
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, true));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, false));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, true));
    subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20, false));
    new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 3).get(false); // triggers assertions as a side-effect
  }
  public void testProhibitedRandomAccess() throws IOException {
    for (boolean randomAccess : new boolean[] {false, true}) {
      Map<Occur, Collection<ScorerSupplier>> subs = new EnumMap<>(Occur.class);
      for (Occur occur : Occur.values()) {
        subs.put(occur, new ArrayList<>());
      }
      // The MUST_NOT clause always uses random-access
      subs.get(Occur.MUST).add(new FakeScorerSupplier(42, randomAccess));
      subs.get(Occur.MUST_NOT).add(new FakeScorerSupplier(TestUtil.nextInt(random(), 1, 100), true));
      new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(randomAccess); // triggers assertions as a side-effect
    }
  }
  public void testMixedRandomAccess() throws IOException {
    for (boolean randomAccess : new boolean[] {false, true}) {
      Map<Occur, Collection<ScorerSupplier>> subs = new EnumMap<>(Occur.class);
      for (Occur occur : Occur.values()) {
        subs.put(occur, new ArrayList<>());
      }
      // The SHOULD clause always uses random-access if there is a MUST clause
      subs.get(Occur.MUST).add(new FakeScorerSupplier(42, randomAccess));
      subs.get(Occur.SHOULD).add(new FakeScorerSupplier(TestUtil.nextInt(random(), 1, 100), true));
      new Boolean2ScorerSupplier(null, subs, true, 0).get(randomAccess); // triggers assertions as a side-effect
    }
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
@ -206,8 +206,8 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
          "    MUST ConstantScoreScorer\n" +
          "    MUST MinShouldMatchSumScorer\n" +
          "            SHOULD TermScorer body:nutch\n" +
-          "            SHOULD TermScorer body:web\n" +
+          "            SHOULD TermScorer body:crawler\n" +
-          "            SHOULD TermScorer body:crawler",
+          "            SHOULD TermScorer body:web",
          summary);
    }
  }
--- a/lucene/core/src/test/org/apache/lucene/search/TestFilterWeight.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestFilterWeight.java
@ -18,6 +18,7 @@ package org.apache.lucene.search;
 import java.lang.reflect.Method;
 import java.lang.reflect.Modifier;
 import java.util.Arrays;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Test;
@ -35,7 +36,7 @@ public class TestFilterWeight extends LuceneTestCase {
      final int modifiers = superClassMethod.getModifiers();
      if (Modifier.isFinal(modifiers)) continue;
      if (Modifier.isStatic(modifiers)) continue;
-      if (superClassMethod.getName().equals("bulkScorer")) {
+      if (Arrays.asList("bulkScorer", "scorerSupplier").contains(superClassMethod.getName())) {
        try {
          final Method subClassMethod = subClass.getDeclaredMethod(
              superClassMethod.getName(),
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@ -69,6 +69,7 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.bkd.BKDWriter;
 import org.junit.BeforeClass;
 public class TestPointQueries extends LuceneTestCase {
@ -2080,4 +2081,38 @@ public class TestPointQueries extends LuceneTestCase {
    assertTrue(Float.compare(Float.NEGATIVE_INFINITY, FloatPoint.nextDown(Float.NEGATIVE_INFINITY)) == 0);
    assertTrue(Float.compare(Float.MAX_VALUE, FloatPoint.nextDown(Float.POSITIVE_INFINITY)) == 0);
  }
  public void testInversePointRange() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
    final int numDims = TestUtil.nextInt(random(), 1, 3);
    final int numDocs = atLeast(10 * BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE); // we need multiple leaves to enable this optimization
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      int[] values = new int[numDims];
      Arrays.fill(values, i);
      doc.add(new IntPoint("f", values));
      w.addDocument(doc);
    }
    w.forceMerge(1);
    IndexReader r = DirectoryReader.open(w);
    w.close();
    IndexSearcher searcher = newSearcher(r);
    int[] low = new int[numDims];
    int[] high = new int[numDims];
    Arrays.fill(high, numDocs - 2);
    assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
    Arrays.fill(low, 1);
    assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
    Arrays.fill(high, numDocs - 1);
    assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
    Arrays.fill(low, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE + 1);
    assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
    Arrays.fill(high, numDocs - BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
    assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
    r.close();
    dir.close();
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
@ -311,6 +311,11 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
      throw new UnsupportedOperationException();
    }
    @Override
    public long estimatePointCount(IntersectVisitor visitor) {
      throw new UnsupportedOperationException();
    }
    @Override
    public byte[] getMinPackedValue() throws IOException {
      throw new UnsupportedOperationException();
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@ -1104,4 +1104,94 @@ public class TestBKD extends LuceneTestCase {
    in.close();
    dir.close();
  }
  public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    final int numValues = atLeast(10000); // make sure to have multiple leaves
    final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
    final byte[] pointValue = new byte[numBytesPerDim];
    final byte[] uniquePointValue = new byte[numBytesPerDim];
    random().nextBytes(uniquePointValue);
    BKDWriter w = new BKDWriter(numValues, dir, "_temp", 1, numBytesPerDim, maxPointsInLeafNode,
        BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues, true);
    for (int i = 0; i < numValues; ++i) {
      if (i == numValues / 2) {
        w.add(uniquePointValue, i);
      } else {
        do {
          random().nextBytes(pointValue);
        } while (Arrays.equals(pointValue, uniquePointValue));
        w.add(pointValue, i);
      }
    }
    final long indexFP;
    try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
      indexFP = w.finish(out);
      w.close();
    }
    IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
    pointsIn.seek(indexFP);
    BKDReader points = new BKDReader(pointsIn);
    int actualMaxPointsInLeafNode = numValues;
    while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
      actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
    }
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = Integer.highestOneBit((numValues - 1) / actualMaxPointsInLeafNode) << 1;
    assertEquals(numLeaves * actualMaxPointsInLeafNode,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          @Override
          public void visit(int docID) throws IOException {}
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
          }
        }));
    // Return 0 if no points match
    assertEquals(0,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          @Override
          public void visit(int docID) throws IOException {}
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
          }
        }));
    // If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
    assertEquals((actualMaxPointsInLeafNode + 1) / 2,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          @Override
          public void visit(int docID) throws IOException {}
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 ||
                StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
              return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
          }
        }));
    pointsIn.close();
    dir.close();
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
@ -220,6 +220,11 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
      throw new UnsupportedOperationException();
    }
    @Override
    public long estimatePointCount(IntersectVisitor visitor) {
      throw new UnsupportedOperationException();
    }
    @Override
    public byte[] getMinPackedValue() throws IOException {
      throw new UnsupportedOperationException();
--- a/lucene/facet/src/java/org/apache/lucene/facet/MultiFacetQuery.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/MultiFacetQuery.java
@ -19,9 +19,9 @@ package org.apache.lucene.facet;
 import java.util.ArrayList;
 import java.util.Collection;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermInSetQuery;
 import org.apache.lucene.util.BytesRef;
 /**
 * A multi-terms {@link Query} over a {@link FacetField}.
@ -38,7 +38,7 @@ public class MultiFacetQuery extends TermInSetQuery {
   * Creates a new {@code MultiFacetQuery} filtering the query on the given dimension.
   */
  public MultiFacetQuery(final FacetsConfig facetsConfig, final String dimension, final String[]... paths) {
-    super(toTerms(facetsConfig.getDimConfig(dimension), dimension, paths));
+    super(facetsConfig.getDimConfig(dimension).indexFieldName, toTerms(dimension, paths));
  }
  /**
@ -47,14 +47,13 @@ public class MultiFacetQuery extends TermInSetQuery {
   * <b>NOTE:</b>Uses FacetsConfig.DEFAULT_DIM_CONFIG.
   */
  public MultiFacetQuery(final String dimension, final String[]... paths) {
-    super(toTerms(FacetsConfig.DEFAULT_DIM_CONFIG, dimension, paths));
+    super(FacetsConfig.DEFAULT_DIM_CONFIG.indexFieldName, toTerms(dimension, paths));
  }
-  static Collection<Term> toTerms(final FacetsConfig.DimConfig dimConfig, final String dimension,
+  static Collection<BytesRef> toTerms(final String dimension, final String[]... paths) {
-          final String[]... paths) {
+    final Collection<BytesRef> terms = new ArrayList<>(paths.length);
    final Collection<Term> terms = new ArrayList<>(paths.length);
    for (String[] path : paths)
-      terms.add(FacetQuery.toTerm(dimConfig, dimension, path));
+      terms.add(new BytesRef(FacetsConfig.pathToString(dimension, path)));
    return terms;
  }
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@ -1521,6 +1521,11 @@ public class MemoryIndex {
        }
      }
      @Override
      public long estimatePointCount(IntersectVisitor visitor) {
        return 1L;
      }
      @Override
      public byte[] getMinPackedValue() throws IOException {
        BytesRef[] values = info.pointValues;
--- a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesRangeQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesRangeQuery.java
@ -23,8 +23,10 @@ import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
 /**
@ -33,10 +35,11 @@ import org.apache.lucene.util.BytesRef;
 * dense case where most documents match this query, it <b>might</b> be as
 * fast or faster than a regular {@link PointRangeQuery}.
 *
- * <p>
+ * <b>NOTE:</b> This query is typically best used within a
- * <b>NOTE</b>: be very careful using this query: it is
+ * {@link IndexOrDocValuesQuery} alongside a query that uses an indexed
- * typically much slower than using {@code TermsQuery},
+ * structure such as {@link PointValues points} or {@link Terms terms},
- * but in certain specialized cases may be faster.
+ * which allows to run the query on doc values when that would be more
 * efficient, and using an index otherwise.
 *
 * @lucene.experimental
 */
--- a/lucene/sandbox/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
@ -0,0 +1,116 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.search;
 import java.io.IOException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 /**
 * A query that uses either an index (points or terms) or doc values in order
 * to run a range query, depending which one is more efficient.
 */
 public final class IndexOrDocValuesQuery extends Query {
  private final Query indexQuery, dvQuery;
  /**
   * Constructor that takes both a query that executes on an index structure
   * like the inverted index or the points tree, and another query that
   * executes on doc values. Both queries must match the same documents and
   * attribute constant scores.
   */
  public IndexOrDocValuesQuery(Query indexQuery, Query dvQuery) {
    this.indexQuery = indexQuery;
    this.dvQuery = dvQuery;
  }
  @Override
  public String toString(String field) {
    return indexQuery.toString(field);
  }
  @Override
  public boolean equals(Object obj) {
    if (sameClassAs(obj) == false) {
      return false;
    }
    IndexOrDocValuesQuery that = (IndexOrDocValuesQuery) obj;
    return indexQuery.equals(that.indexQuery) && dvQuery.equals(that.dvQuery);
  }
  @Override
  public int hashCode() {
    int h = classHash();
    h = 31 * h + indexQuery.hashCode();
    h = 31 * h + dvQuery.hashCode();
    return h;
  }
  @Override
  public Query rewrite(IndexReader reader) throws IOException {
    Query indexRewrite = indexQuery.rewrite(reader);
    Query dvRewrite = dvQuery.rewrite(reader);
    if (indexQuery != indexRewrite || dvQuery != dvRewrite) {
      return new IndexOrDocValuesQuery(indexRewrite, dvRewrite);
    }
    return this;
  }
  @Override
  public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    final Weight indexWeight = indexQuery.createWeight(searcher, needsScores, boost);
    final Weight dvWeight = dvQuery.createWeight(searcher, needsScores, boost);
    return new ConstantScoreWeight(this, boost) {
      @Override
      public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
        return indexWeight.bulkScorer(context);
      }
      @Override
      public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
        final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context);
        final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context); 
        if (indexScorerSupplier == null || dvScorerSupplier == null) {
          return null;
        }
        return new ScorerSupplier() {
          @Override
          public Scorer get(boolean randomAccess) throws IOException {
            return (randomAccess ? dvScorerSupplier : indexScorerSupplier).get(randomAccess);
          }
          @Override
          public long cost() {
            return Math.min(indexScorerSupplier.cost(), dvScorerSupplier.cost());
          }
        };
      }
      @Override
      public Scorer scorer(LeafReaderContext context) throws IOException {
        ScorerSupplier scorerSupplier = scorerSupplier(context);
        if (scorerSupplier == null) {
          return null;
        }
        return scorerSupplier.get(false);
      }
    };
  }
 }
--- a/lucene/sandbox/src/test/org/apache/lucene/search/TestIndexOrDocValuesQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestIndexOrDocValuesQuery.java
@ -0,0 +1,89 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.search;
 import java.io.IOException;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 public class TestIndexOrDocValuesQuery extends LuceneTestCase {
  public void testUseIndexForSelectiveQueries() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()
        // relies on costs and PointValues.estimateCost so we need the default codec
        .setCodec(TestUtil.getDefaultCodec()));
    for (int i = 0; i < 2000; ++i) {
      Document doc = new Document();
      if (i == 42) {
        doc.add(new StringField("f1", "bar", Store.NO));
        doc.add(new LongPoint("f2", 42L));
        doc.add(new NumericDocValuesField("f2", 42L));
      } else if (i == 100) {
        doc.add(new StringField("f1", "foo", Store.NO));
        doc.add(new LongPoint("f2", 2L));
        doc.add(new NumericDocValuesField("f2", 2L));
      } else {
        doc.add(new StringField("f1", "bar", Store.NO));
        doc.add(new LongPoint("f2", 2L));
        doc.add(new NumericDocValuesField("f2", 2L));
      }
      w.addDocument(doc);
    }
    w.forceMerge(1);
    IndexReader reader = DirectoryReader.open(w);
    IndexSearcher searcher = newSearcher(reader);
    searcher.setQueryCache(null);
    // The term query is more selective, so the IndexOrDocValuesQuery should use doc values
    final Query q1 = new BooleanQuery.Builder()
        .add(new TermQuery(new Term("f1", "foo")), Occur.MUST)
        .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), new DocValuesNumbersQuery("f2", 2L)), Occur.MUST)
        .build();
    final Weight w1 = searcher.createNormalizedWeight(q1, random().nextBoolean());
    final Scorer s1 = w1.scorer(reader.leaves().get(0));
    assertNotNull(s1.twoPhaseIterator()); // means we use doc values
    // The term query is less selective, so the IndexOrDocValuesQuery should use points
    final Query q2 = new BooleanQuery.Builder()
        .add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
        .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), new DocValuesNumbersQuery("f2", 42L)), Occur.MUST)
        .build();
    final Weight w2 = searcher.createNormalizedWeight(q2, random().nextBoolean());
    final Scorer s2 = w2.scorer(reader.leaves().get(0));
    assertNull(s2.twoPhaseIterator()); // means we use points
    reader.close();
    w.close();
    dir.close();
  }
 }
--- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java
+++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java
@ -24,7 +24,6 @@ import java.util.List;
 import com.carrotsearch.randomizedtesting.annotations.Repeat;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SimpleCollector;
 import org.apache.lucene.search.TermInSetQuery;
@ -36,6 +35,7 @@ import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
 import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
 import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.junit.Before;
 import org.junit.Test;
@ -127,12 +127,12 @@ public class NumberRangeFacetsTest extends StrategyTestCase {
        Collections.shuffle(acceptFieldIds, random());
        acceptFieldIds = acceptFieldIds.subList(0, randomInt(acceptFieldIds.size()));
        if (!acceptFieldIds.isEmpty()) {
-          List<Term> terms = new ArrayList<>();
+          List<BytesRef> terms = new ArrayList<>();
          for (Integer acceptDocId : acceptFieldIds) {
-            terms.add(new Term("id", acceptDocId.toString()));
+            terms.add(new BytesRef(acceptDocId.toString()));
          }
-          topAcceptDocs = searchForDocBits(new TermInSetQuery(terms));
+          topAcceptDocs = searchForDocBits(new TermInSetQuery("id", terms));
        }
      }
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
@ -332,6 +332,7 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
  TokenStreamToAutomaton getTokenStreamToAutomaton() {
    final TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
    tsta.setPreservePositionIncrements(preservePositionIncrements);
    tsta.setFinalOffsetGapAsHole(true);
    return tsta;
  }
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
@ -41,11 +41,16 @@ import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeReflector;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.Rethrow;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
 import org.apache.lucene.util.fst.Util;
 /** 
 * Base class for all Lucene unit tests that use TokenStreams. 
@ -166,6 +171,8 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
    final Map<Integer,Integer> posToStartOffset = new HashMap<>();
    final Map<Integer,Integer> posToEndOffset = new HashMap<>();
    // TODO: would be nice to be able to assert silly duplicated tokens are not created, but a number of cases do this "legitimately": LUCENE-7622
    ts.reset();
    int pos = -1;
    int lastStartOffset = 0;
@ -261,12 +268,12 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
        }
      }
      if (posLengthAtt != null) {
-        assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
+        assertTrue("posLength must be >= 1; got: " + posLengthAtt.getPositionLength(), posLengthAtt.getPositionLength() >= 1);
      }
    }
    if (ts.incrementToken()) {
-      fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token=" + termAtt);
+      fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token=" + ts.getAttribute(CharTermAttribute.class));
    }
    // repeat our extra safety checks for end()
@ -977,4 +984,105 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
  public static AttributeFactory newAttributeFactory() {
    return newAttributeFactory(random());
  }
  private static String toString(Set<String> strings) {
    List<String> stringsList = new ArrayList<>(strings);
    Collections.sort(stringsList);
    StringBuilder b = new StringBuilder();
    for(String s : stringsList) {
      b.append("  ");
      b.append(s);
      b.append('\n');
    }
    return b.toString();
  }
  /**
   * Enumerates all accepted strings in the token graph created by the analyzer on the provided text, and then
   * asserts that it's equal to the expected strings.
   * Uses {@link TokenStreamToAutomaton} to create an automaton. Asserts the finite strings of the automaton are all
   * and only the given valid strings.
   * @param analyzer analyzer containing the SynonymFilter under test.
   * @param text text to be analyzed.
   * @param expectedStrings all expected finite strings.
   */
  public static void assertGraphStrings(Analyzer analyzer, String text, String... expectedStrings) throws IOException {
    checkAnalysisConsistency(random(), analyzer, true, text, true);
    try (TokenStream tokenStream = analyzer.tokenStream("dummy", text)) {
      assertGraphStrings(tokenStream, expectedStrings);
    }
  }
  /**
   * Enumerates all accepted strings in the token graph created by the already initialized {@link TokenStream}.
   */
  public static void assertGraphStrings(TokenStream tokenStream, String... expectedStrings) throws IOException {
    Automaton automaton = new TokenStreamToAutomaton().toAutomaton(tokenStream);
    Set<IntsRef> actualStringPaths = AutomatonTestUtil.getFiniteStringsRecursive(automaton, -1);
    Set<String> expectedStringsSet = new HashSet<>(Arrays.asList(expectedStrings));
    BytesRefBuilder scratchBytesRefBuilder = new BytesRefBuilder();
    Set<String> actualStrings = new HashSet<>();
    for (IntsRef ir: actualStringPaths) {
      actualStrings.add(Util.toBytesRef(ir, scratchBytesRefBuilder).utf8ToString().replace((char) TokenStreamToAutomaton.POS_SEP, ' '));
    }
    for (String s : actualStrings) {
      assertTrue("Analyzer created unexpected string path: " + s + "\nexpected:\n" + toString(expectedStringsSet) + "\nactual:\n" + toString(actualStrings), expectedStringsSet.contains(s));
    }
    for (String s : expectedStrings) {
      assertTrue("Analyzer created unexpected string path: " + s + "\nexpected:\n" + toString(expectedStringsSet) + "\nactual:\n" + toString(actualStrings), actualStrings.contains(s));
    }
  }
  /** Returns all paths accepted by the token stream graph produced by analyzing text with the provided analyzer.  The tokens {@link
   *  CharTermAttribute} values are concatenated, and separated with space. */
  public static Set<String> getGraphStrings(Analyzer analyzer, String text) throws IOException {
    try(TokenStream tokenStream = analyzer.tokenStream("dummy", text)) {
      return getGraphStrings(tokenStream);
    }
  }
  /** Returns all paths accepted by the token stream graph produced by the already initialized {@link TokenStream}. */
  public static Set<String> getGraphStrings(TokenStream tokenStream) throws IOException {
    Automaton automaton = new TokenStreamToAutomaton().toAutomaton(tokenStream);
    Set<IntsRef> actualStringPaths = AutomatonTestUtil.getFiniteStringsRecursive(automaton, -1);
    BytesRefBuilder scratchBytesRefBuilder = new BytesRefBuilder();
    Set<String> paths = new HashSet<>();
    for (IntsRef ir: actualStringPaths) {
      paths.add(Util.toBytesRef(ir, scratchBytesRefBuilder).utf8ToString().replace((char) TokenStreamToAutomaton.POS_SEP, ' '));
    }
    return paths;
  }
  /** Returns a {@code String} summary of the tokens this analyzer produces on this text */
  public static String toString(Analyzer analyzer, String text) throws IOException {
    try(TokenStream ts = analyzer.tokenStream("field", text)) {
      StringBuilder b = new StringBuilder();
      CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
      PositionIncrementAttribute posIncAtt = ts.getAttribute(PositionIncrementAttribute.class);
      PositionLengthAttribute posLengthAtt = ts.getAttribute(PositionLengthAttribute.class);
      OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
      assertNotNull(offsetAtt);
      ts.reset();
      int pos = -1;
      while (ts.incrementToken()) {
        pos += posIncAtt.getPositionIncrement();
        b.append(termAtt);
        b.append(" at pos=");
        b.append(pos);
        if (posLengthAtt != null) {
          b.append(" to pos=");
          b.append(pos + posLengthAtt.getPositionLength());
        }
        b.append(" offsets=");
        b.append(offsetAtt.startOffset());
        b.append('-');
        b.append(offsetAtt.endOffset());
        b.append('\n');
      }
      ts.end();
      return b.toString();
    }
  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/TokenStreamToDot.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/TokenStreamToDot.java
@ -93,7 +93,10 @@ public class TokenStreamToDot {
        final int endOffset = offsetAtt.endOffset();
        //System.out.println("start=" + startOffset + " end=" + endOffset + " len=" + inputText.length());
        if (inputText != null) {
-          arcLabel += " / " + inputText.substring(startOffset, endOffset);
+          String fragment = inputText.substring(startOffset, endOffset);
          if (fragment.equals(termAtt.toString()) == false) {
            arcLabel += " / " + fragment;
          }
        } else {
          arcLabel += " / " + startOffset + "-" + endOffset;
        }
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingLiveDocsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingLiveDocsFormat.java
@ -68,13 +68,8 @@ public class AssertingLiveDocsFormat extends LiveDocsFormat {
  @Override
  public void writeLiveDocs(MutableBits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException {
-    MutableBits raw = bits;
+    assert bits instanceof AssertingMutableBits;
-    /**
+    MutableBits raw = (MutableBits) ((AssertingMutableBits)bits).in;
     * bits is not necessarily an AssertingMutableBits because index sorting needs to wrap it in a sorted view.
     */
    if (bits instanceof AssertingMutableBits) {
      raw = (MutableBits) ((AssertingMutableBits) bits).in;
    }
    check(raw, info.info.maxDoc(), info.getDelCount() + newDelCount);
    in.writeLiveDocs(raw, dir, info, newDelCount, context);
  }
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointsFormat.java
@ -133,6 +133,11 @@ class CrankyPointsFormat extends PointsFormat {
          }  
        }
        @Override
        public long estimatePointCount(IntersectVisitor visitor) {
          return delegate.estimatePointCount(visitor);
        }
        @Override
        public byte[] getMinPackedValue() throws IOException {
          if (random.nextInt(100) == 0) {
--- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
@ -883,6 +883,13 @@ public class AssertingLeafReader extends FilterLeafReader {
      in.intersect(new AssertingIntersectVisitor(in.getNumDimensions(), in.getBytesPerDimension(), visitor));
    }
    @Override
    public long estimatePointCount(IntersectVisitor visitor) {
      long cost = in.estimatePointCount(visitor);
      assert cost >= 0;
      return cost;
    }
    @Override
    public byte[] getMinPackedValue() throws IOException {
      return Objects.requireNonNull(in.getMinPackedValue());
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
@ -33,9 +33,45 @@ class AssertingWeight extends FilterWeight {
  @Override
  public Scorer scorer(LeafReaderContext context) throws IOException {
    if (random.nextBoolean()) {
      final Scorer inScorer = in.scorer(context);
      assert inScorer == null || inScorer.docID() == -1;
      return AssertingScorer.wrap(new Random(random.nextLong()), inScorer, needsScores);
    } else {
      final ScorerSupplier scorerSupplier = scorerSupplier(context);
      if (scorerSupplier == null) {
        return null;
      }
      if (random.nextBoolean()) {
        // Evil: make sure computing the cost has no side effects
        scorerSupplier.cost();
      }
      return scorerSupplier.get(false);
    }
  }
  @Override
  public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
    final ScorerSupplier inScorerSupplier = in.scorerSupplier(context);
    if (inScorerSupplier == null) {
      return null;
    }
    return new ScorerSupplier() {
      private boolean getCalled = false;
      @Override
      public Scorer get(boolean randomAccess) throws IOException {
        assert getCalled == false;
        getCalled = true;
        return AssertingScorer.wrap(new Random(random.nextLong()), inScorerSupplier.get(randomAccess), needsScores);
      }
      @Override
      public long cost() {
        final long cost = inScorerSupplier.cost();
        assert cost >= 0;
        return cost;
      }
    };
  }
  @Override
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -74,9 +74,6 @@ Optimizations
 * SOLR-9584: Support Solr being proxied with another endpoint than default /solr, by using relative links
  in AdminUI javascripts (Yun Jie Zhou via janhoy)
 * SOLR-9941: Clear the deletes lists at UpdateLog before replaying from log. This prevents redundantly pre-applying
  DBQs, during the log replay, to every update in the log as if the DBQs were out of order. (hossman, Ishan Chattopadhyaya)
 ==================  6.5.0 ==================
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@ -90,9 +87,22 @@ Apache UIMA 2.3.1
 Apache ZooKeeper 3.4.6
 Jetty 9.3.14.v20161028
 Detailed Change List
 ----------------------
-(No Changes)
+Bug Fixes
 ----------------------
 * SOLR-9976: Fix init bug in SegmentsInfoRequestHandlerTest (hossman)
 * SOLR-9977: Fix config bug in DistribDocExpirationUpdateProcessorTest that allowed false assumptions
  about when index version changes (hossman)
 Optimizations
 ----------------------
 * SOLR-9941: Clear the deletes lists at UpdateLog before replaying from log. This prevents redundantly pre-applying
  DBQs, during the log replay, to every update in the log as if the DBQs were out of order. (hossman, Ishan Chattopadhyaya)
 ==================  6.4.0 ==================
--- a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
@ -295,6 +295,13 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
    @Override
    protected BreakIterator getBreakIterator(String field) {
      // Use a default fragsize the same as the regex Fragmenter (original Highlighter) since we're
      //  both likely shooting for sentence-like patterns.
      int fragsize = params.getFieldInt(field, HighlightParams.FRAGSIZE, LuceneRegexFragmenter.DEFAULT_FRAGMENT_SIZE);
      if (fragsize == 0) { // special value; no fragmenting
        return new WholeBreakIterator();
      }
      String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE);
      String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY);
      String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
@ -302,9 +309,6 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
      String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
      BreakIterator baseBI = parseBreakIterator(type, locale);
      // Use a default fragsize the same as the regex Fragmenter (original Highlighter) since we're
      //  both likely shooting for sentence-like patterns.
      int fragsize = params.getFieldInt(field, HighlightParams.FRAGSIZE, LuceneRegexFragmenter.DEFAULT_FRAGMENT_SIZE);
      if (fragsize <= 1 || baseBI instanceof WholeBreakIterator) { // no real minimum size
        return baseBI;
      }
--- a/solr/core/src/test-files/solr/configsets/doc-expiry/conf/solrconfig.snippet.randomindexconfig.xml
+++ b/solr/core/src/test-files/solr/configsets/doc-expiry/conf/solrconfig.snippet.randomindexconfig.xml
@ -1,47 +0,0 @@
 <?xml version="1.0" ?>
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <!-- 
 A solrconfig.xml snippet containing indexConfig settings for randomized testing.
 -->
 <indexConfig>
  <!-- this sys property is not set by SolrTestCaseJ4 because we ideally want to use
       the RandomMergePolicy in all tests - but some tests expect very specific
       Merge behavior, so those tests can set it as needed.
  -->
  <mergePolicy enable="${solr.tests.useMergePolicy:true}" class="${solr.tests.mergePolicy:org.apache.solr.util.RandomMergePolicy}" />
  <mergePolicyFactory enable="${solr.tests.useMergePolicyFactory:true}" class="${solr.tests.mergePolicyFactory:org.apache.solr.util.RandomMergePolicyFactory}" />
  <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
  <maxBufferedDocs>${solr.tests.maxBufferedDocs}</maxBufferedDocs>
  <ramBufferSizeMB>${solr.tests.ramBufferSizeMB}</ramBufferSizeMB>
  <mergeScheduler class="${solr.tests.mergeScheduler}" />
  <writeLockTimeout>1000</writeLockTimeout>
  <commitLockTimeout>10000</commitLockTimeout>
  <!-- this sys property is not set by SolrTestCaseJ4 because almost all tests should
       use the single process lockType for speed - but tests that explicitly need
       to vary the lockType canset it as needed.
  -->
  <lockType>${solr.tests.lockType:single}</lockType>
 </indexConfig>
--- a/solr/core/src/test-files/solr/configsets/doc-expiry/conf/solrconfig.xml
+++ b/solr/core/src/test-files/solr/configsets/doc-expiry/conf/solrconfig.xml
@ -25,14 +25,25 @@
  <dataDir>${solr.data.dir:}</dataDir>
  <indexConfig>
    <!-- NOTE: we do *NOT* want Randomized Merging for these tests,
         because we need to be able to assert that index changes are only happening
         on the shards we expected them to as a result of our deletes.
         (the random/mock merge classes can cause new readers to be opened after a commit
         even if the index itself hasn't changed - ex: new segments file listing same exact segments
         Instead use Solr defaults for almost everything
    -->
    <lockType>${solr.tests.lockType:single}</lockType>
  </indexConfig>
  <directoryFactory name="DirectoryFactory"
                    class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
  <schemaFactory class="ClassicIndexSchemaFactory"/>
  <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
  <xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
  <updateHandler class="solr.DirectUpdateHandler2">
    <updateLog>
      <str name="dir">${solr.ulog.dir:}</str>
--- a/solr/core/src/test/org/apache/solr/handler/admin/SegmentsInfoRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/SegmentsInfoRequestHandlerTest.java
@ -16,9 +16,11 @@
 */
 package org.apache.solr.handler.admin;
 import org.apache.lucene.index.LogDocMergePolicy;
 import org.apache.lucene.util.Version;
 import org.apache.solr.index.LogDocMergePolicyFactory;
 import org.apache.solr.util.AbstractSolrTestCase;
-import org.junit.Before;
+
 import org.junit.BeforeClass;
 import org.junit.Test;
@ -32,13 +34,17 @@ public class SegmentsInfoRequestHandlerTest extends AbstractSolrTestCase {
  @BeforeClass
  public static void beforeClass() throws Exception {
    System.setProperty("enable.update.log", "false");
    System.setProperty("solr.tests.useMergePolicy", "false");
    initCore("solrconfig.xml", "schema12.xml");
  }
-  @Before
+    // we need a consistent segmentation to ensure we don't get a random
-  public void before() throws Exception {
+    // merge that reduces the total num docs in all segments, or the number of deletes
    //
    systemSetPropertySolrTestsMergePolicy(LogDocMergePolicy.class.getName());
    systemSetPropertySolrTestsMergePolicyFactory(LogDocMergePolicyFactory.class.getName());
    System.setProperty("enable.update.log", "false"); // no _version_ in our schema
    initCore("solrconfig.xml", "schema12.xml"); // segments API shouldn't depend on _version_ or ulog
    // build up an index with at least 2 segments and some deletes
    for (int i = 0; i < DOC_COUNT; i++) {
      assertU(adoc("id","SOLR100" + i, "name","Apache Solr:" + i));
    }
--- a/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
@ -183,6 +183,42 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
  }
  @Test
  public void testCollateExtendedResultsWithJsonNl() throws Exception {
    final String q = "documemtsss broens";
    final String jsonNl = "map";
    final boolean collateExtendedResults = random().nextBoolean();
    final List<String> testsList = new ArrayList<String>();
    if (collateExtendedResults) {
      testsList.add("/spellcheck/collations/collation/collationQuery=='document brown'");
      testsList.add("/spellcheck/collations/collation/hits==0");
      switch (jsonNl) {
        case "map":
          testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/documemtsss=='document'");
          testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/broens=='brown'");
          break;
        default:
          fail("unexpected json.nl choice: "+jsonNl);
          break;
      }
    } else {
      testsList.add("/spellcheck/collations/collation=='document brown'");
    }
    final String[] testsArray = new String[testsList.size()];
    implTestCollateExtendedResultsWithJsonNl(q, jsonNl, collateExtendedResults, testsList.toArray(testsArray));
  }
  private void implTestCollateExtendedResultsWithJsonNl(String q, String jsonNl, boolean collateExtendedResults, String ... tests) throws Exception {
    final SolrQueryRequest solrQueryRequest = req(
        CommonParams.QT, rh,
        CommonParams.Q, q,
        "json.nl", jsonNl,
        SpellCheckComponent.COMPONENT_NAME, "true",
        SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, Boolean.toString(collateExtendedResults),
        SpellingParams.SPELLCHECK_COLLATE, "true");
    assertJQ(solrQueryRequest, tests);
  }
  @Test
  public void testCorrectSpelling() throws Exception {
    // Make sure correct spellings are signaled in the response
--- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
@ -79,7 +79,7 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
    assertU(commit());
    assertQ("multiple snippets test",
        req("q", "text:document", "sort", "id asc", "hl", "true", "hl.snippets", "2", "hl.bs.type", "SENTENCE",
-            "hl.fragsize", "0"),
+            "hl.fragsize", "-1"),
        "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=2",
        "//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Document</em> snippet one. '",
        "//lst[@name='highlighting']/lst[@name='101']/arr/str[2]='<em>Document</em> snippet two.'");
@ -214,9 +214,12 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
  public void testBreakIteratorWhole() {
    assertU(adoc("text", "Document one has a first sentence. Document two has a second sentence.", "id", "103"));
    assertU(commit());
-    assertQ("different breakiterator", 
+    assertQ("WHOLE breakiterator",
        req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE", "hl.fragsize", "-1"),
        "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first sentence. <em>Document</em> two has a second sentence.'");
    assertQ("hl.fragsize 0 is equivalent to WHOLE",
        req("q", "text:document", "sort", "id asc", "hl", "true", "hl.fragsize", "0"),
        "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first sentence. <em>Document</em> two has a second sentence.'");
  }
  public void testFragsize() {
--- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
@ -259,6 +259,21 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
    }
    assertEquals(26, ((TermInSetQuery)qq).getTermData().size());
    // test terms queries of two different fields (LUCENE-7637 changed to require all terms be in the same field)
    StringBuilder sb = new StringBuilder();
    for (int i=0; i<17; i++) {
      char letter = (char)('a'+i);
      sb.append("foo_s:" + letter + " bar_s:" + letter + " ");
    }
    qParser = QParser.getParser(sb.toString(), req);
    qParser.setIsFilter(true); // this may change in the future
    q = qParser.getQuery();
    assertEquals(2, ((BooleanQuery)q).clauses().size());
    for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
      qq = clause.getQuery();
      assertEquals(17, ((TermInSetQuery)qq).getTermData().size());
    }
    req.close();
  }