Merge branch 'apache-https-master' into jira/solr-8593

2016-11-18 10:42:14 -06:00 · 2016-11-18 10:42:14 -06:00 · 750cf6d7a5
parent bda84d8442 3c4315c566
commit 750cf6d7a5
34 changed files with 875 additions and 312 deletions
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@ -431,6 +431,7 @@ reChangesSectionHREF = re.compile('<a id="(.*?)".*?>(.*?)</a>', re.IGNORECASE)
 reUnderbarNotDashHTML = re.compile(r'<li>(\s*(LUCENE|SOLR)_\d\d\d\d+)')
 reUnderbarNotDashTXT = re.compile(r'\s+((LUCENE|SOLR)_\d\d\d\d+)', re.MULTILINE)
 def checkChangesContent(s, version, name, project, isHTML):
+  currentVersionTuple = versionToTuple(version, name)

  if isHTML and s.find('Release %s' % version) == -1:
    raise RuntimeError('did not see "Release %s" in %s' % (version, name))
@ -459,7 +460,8 @@ def checkChangesContent(s, version, name, project, isHTML):
        raise RuntimeError('did not see "%s" in %s' % (sub, name))

  if isHTML:
-    # Make sure a section only appears once under each release:
+    # Make sure that a section only appears once under each release,
+    # and that each release is not greater than the current version
    seenIDs = set()
    seenText = set()

@ -468,6 +470,9 @@ def checkChangesContent(s, version, name, project, isHTML):
      if text.lower().startswith('release '):
        release = text[8:].strip()
        seenText.clear()
+        releaseTuple = versionToTuple(release, name)
+        if releaseTuple > currentVersionTuple:
+          raise RuntimeError('Future release %s is greater than %s in %s' % (release, version, name))
      if id in seenIDs:
        raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
      seenIDs.add(id)
@ -475,6 +480,27 @@ def checkChangesContent(s, version, name, project, isHTML):
        raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
      seenText.add(text)

+
+reVersion = re.compile(r'(\d+)\.(\d+)(?:\.(\d+))?\s*(-alpha|-beta|final|RC\d+)?\s*(?:\[.*\])?', re.IGNORECASE)
+def versionToTuple(version, name):
+  versionMatch = reVersion.match(version)
+  if versionMatch is None:
+    raise RuntimeError('Version %s in %s cannot be parsed' % (version, name))
+  versionTuple = versionMatch.groups()
+  while versionTuple[-1] is None or versionTuple[-1] == '':
+    versionTuple = versionTuple[:-1]
+  if versionTuple[-1].lower() == '-alpha':
+    versionTuple = versionTuple[:-1] + ('0',)
+  elif versionTuple[-1].lower() == '-beta':
+    versionTuple = versionTuple[:-1] + ('1',)
+  elif versionTuple[-1].lower() == 'final':
+    versionTuple = versionTuple[:-2] + ('100',)
+  elif versionTuple[-1].lower()[:2] == 'rc':
+    versionTuple = versionTuple[:-2] + (versionTuple[-1][2:],)
+  print('%s: %s' % (version, versionTuple))
+  return versionTuple
+
+
 reUnixPath = re.compile(r'\b[a-zA-Z_]+=(?:"(?:\\"|[^"])*"' + '|(?:\\\\.|[^"\'\\s])*' + r"|'(?:\\'|[^'])*')" \
                        + r'|(/(?:\\.|[^"\'\s])*)' \
                        + r'|("/(?:\\.|[^"])*")'   \
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -56,6 +56,11 @@ Other

 ======================= Lucene 6.4.0 =======================

+API Changes
+
+* LUCENE-7533: Classic query parser no longer allows autoGeneratePhraseQueries
+  to be set to true when splitOnWhitespace is false (and vice-versa).
+
 New features

 * LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
@ -65,6 +70,15 @@ Bug Fixes
 * LUCENE-7547: JapaneseTokenizerFactory was failing to close the
  dictionary file it opened (Markus via Mike McCandless)

+* LUCENE-7562: CompletionFieldsConsumer sometimes throws
+  NullPointerException on ghost fields (Oliver Eilhard via Mike McCandless)
+  
+* LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true
+  when splitOnWhitespace=false (and vice-versa). (Steve Rowe)
+
+* LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
+  component when preserveOriginal was set to true. (Adrien Grand)
+
 Improvements

 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
@ -84,6 +98,9 @@ Improvements

 * LUCENE-7524: Added more detailed explanation of how IDF is computed in
  ClassicSimilarity and BM25Similarity. (Adrien Grand)
+  
+* LUCENE-7564: AnalyzingInfixSuggester should close its IndexWriter by default
+  at the end of build(). (Steve Rowe)

 * LUCENE-7526: Enhanced UnifiedHighlighter's passage relevancy for queries with
  wildcards and sometimes just terms. Added shouldPreferPassageRelevancyOverSpeed()
@ -93,6 +110,11 @@ Improvements
 * LUCENE-7537: Index time sorting now supports multi-valued sorts
  using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)

+* LUCENE-7560: QueryBuilder.createFieldQuery is no longer final,
+  giving custom query parsers subclassing QueryBuilder more freedom to
+  control how text is analyzed and converted into a query (Matt Weber
+  via Mike McCandless)
+
 Other

 * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
@ -100,6 +122,9 @@ Other

 * LUCENE-7534: fix smokeTestRelease.py to run on Cygwin (Mikhail Khludnev)

+* LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to
+  be customized. (David Smiley)
+
 Build

 * LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman)
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
@ -17,6 +17,7 @@
 package org.apache.lucene.analysis.miscellaneous;


+import java.util.HashMap;
 import java.util.Map;

 import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -36,12 +37,14 @@ import org.apache.lucene.analysis.TokenStream;
 * &lt;/fieldType&gt;</pre>
 */
 public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+  private static final String PRESERVE_ORIGINAL = "preserveOriginal";
+
  private final boolean preserveOriginal;
  
  /** Creates a new ASCIIFoldingFilterFactory */
  public ASCIIFoldingFilterFactory(Map<String,String> args) {
    super(args);
-    preserveOriginal = getBoolean(args, "preserveOriginal", false);
+    preserveOriginal = getBoolean(args, PRESERVE_ORIGINAL, false);
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -54,7 +57,17 @@ public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements Mul

  @Override
  public AbstractAnalysisFactory getMultiTermComponent() {
-    return this;
+    if (preserveOriginal) {
+      // The main use-case for using preserveOriginal is to match regardless of
+      // case but to give better scores to exact matches. Since most multi-term
+      // queries return constant scores anyway, the multi-term component only
+      // emits the folded token
+      Map<String, String> args = new HashMap<>(getOriginalArgs());
+      args.remove(PRESERVE_ORIGINAL);
+      return new ASCIIFoldingFilterFactory(args);
+    } else {
+      return this;
+    }
  }
 }

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestAsciiFoldingFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestAsciiFoldingFilterFactory.java
@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+
+import org.apache.lucene.analysis.CannedTokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+public class TestAsciiFoldingFilterFactory extends BaseTokenStreamFactoryTestCase {
+
+  public void testMultiTermAnalysis() throws IOException {
+    TokenFilterFactory factory = new ASCIIFoldingFilterFactory(Collections.emptyMap());
+    TokenStream stream = new CannedTokenStream(new Token("Été", 0, 3));
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream, new String[] { "Ete" });
+
+    factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
+    stream = new CannedTokenStream(new Token("Été", 0, 3));
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream, new String[] { "Ete" });
+
+    factory = new ASCIIFoldingFilterFactory(new HashMap<>(Collections.singletonMap("preserveOriginal", "true")));
+    stream = new CannedTokenStream(new Token("Été", 0, 3));
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream, new String[] { "Ete", "Été" });
+
+    factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
+    stream = new CannedTokenStream(new Token("Été", 0, 3));
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream, new String[] { "Ete" });
+  }
+
+}
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -25,13 +25,18 @@ import java.lang.reflect.Modifier;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.text.ParsePosition;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Random;
+import java.util.TimeZone;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

@ -62,6 +67,8 @@ import org.apache.lucene.legacy.LegacyNumericUtils;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.BaseDirectoryWrapper;
 import org.apache.lucene.store.Directory;
@ -165,6 +172,57 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
    // a test option to not remove temp dir...):
    Thread.sleep(100000);
  }
+
+  // ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
+  public void testCreateSortedIndex() throws Exception {
+    
+    Path indexDir = getIndexDir().resolve("sorted");
+    Files.deleteIfExists(indexDir);
+    Directory dir = newFSDirectory(indexDir);
+
+    LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
+    mp.setNoCFSRatio(1.0);
+    mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+    // TODO: remove randomness
+    IndexWriterConfig conf = new IndexWriterConfig(analyzer);
+    conf.setMergePolicy(mp);
+    conf.setUseCompoundFile(false);
+    conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
+    IndexWriter writer = new IndexWriter(dir, conf);
+    LineFileDocs docs = new LineFileDocs(random());
+    SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
+    parser.setTimeZone(TimeZone.getTimeZone("UTC"));
+    ParsePosition position = new ParsePosition(0);
+    Field dateDVField = null;
+    for(int i=0;i<50;i++) {
+      Document doc = docs.nextDoc();
+      String dateString = doc.get("date");
+      
+      position.setIndex(0);
+      Date date = parser.parse(dateString, position);
+      if (position.getErrorIndex() != -1) {
+        throw new AssertionError("failed to parse \"" + dateString + "\" as date");
+      }
+      if (position.getIndex() != dateString.length()) {
+        throw new AssertionError("failed to parse \"" + dateString + "\" as date");
+      }
+      if (dateDVField == null) {
+        dateDVField = new NumericDocValuesField("dateDV", 0l);
+        doc.add(dateDVField);
+      }
+      dateDVField.setLongValue(date.getTime());
+      if (i == 250) {
+        writer.commit();
+      }      
+      writer.addDocument(doc);
+    }
+    writer.forceMerge(1);
+    writer.close();
+    dir.close();
+  }
  
  private void updateNumeric(IndexWriter writer, String id, String f, String cf, long value) throws IOException {
    writer.updateNumericDocValue(new Term("id", id), f, value);
@ -1483,6 +1541,30 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
      dir.close();
    }
  }
+
+  public void testSortedIndex() throws Exception {
+    String[] versions = new String[] {"6.2.0", "6.2.1", "6.3.0"};
+    for(String version : versions) {
+      Path path = createTempDir("sorted");
+      InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream("sorted." + version + ".zip");
+      assertNotNull("Sorted index index " + version + " not found", resource);
+      TestUtil.unzip(resource, path);
+
+      // TODO: more tests
+      Directory dir = newFSDirectory(path);
+
+      DirectoryReader reader = DirectoryReader.open(dir);
+      assertEquals(1, reader.leaves().size());
+      Sort sort = reader.leaves().get(0).reader().getIndexSort();
+      assertNotNull(sort);
+      assertEquals("<long: \"dateDV\">!", sort.toString());
+      reader.close();
+
+      // this will confirm the docs really are sorted:
+      TestUtil.checkIndex(dir);
+      dir.close();
+    }
+  }
  
  static long getValue(BinaryDocValues bdv) throws IOException {
    BytesRef term = bdv.binaryValue();
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted.6.2.0.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted.6.2.0.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted.6.2.1.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted.6.2.1.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted.6.3.0.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted.6.3.0.zip
--- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
@ -196,7 +196,7 @@ public class QueryBuilder {
   * @param quoted true if phrases should be generated when terms occur at more than one position
   * @param phraseSlop slop factor for phrase/multiphrase queries
   */
-  protected final Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) {
+  protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) {
    assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
    
    // Use the analyzer to get all the tokens, and then build an appropriate
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
@ -75,6 +75,9 @@ public abstract class AnalysisOffsetStrategy extends FieldOffsetStrategy {
   *
   * @lucene.internal
   */
+  // TODO we could make this go away.  MemoryIndexOffsetStrategy could simply split and analyze each value into the
+  //   MemoryIndex. TokenStreamOffsetStrategy's hack TokenStreamPostingsEnum could incorporate this logic,
+  //   albeit with less code, less hack.
  private static final class MultiValueTokenStream extends TokenFilter {

    private final String fieldName;
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
@ -24,115 +24,117 @@ package org.apache.lucene.search.uhighlight;
 * ellipses between unconnected passages.
 */
 public class DefaultPassageFormatter extends PassageFormatter {
-    /** text that will appear before highlighted terms */
-    protected final String preTag;
-    /** text that will appear after highlighted terms */
-    protected final String postTag;
-    /** text that will appear between two unconnected passages */
-    protected final String ellipsis;
-    /** true if we should escape for html */
-    protected final boolean escape;
+  /** text that will appear before highlighted terms */
+  protected final String preTag;
+  /** text that will appear after highlighted terms */
+  protected final String postTag;
+  /** text that will appear between two unconnected passages */
+  protected final String ellipsis;
+  /** true if we should escape for html */
+  protected final boolean escape;

-    /**
-     * Creates a new DefaultPassageFormatter with the default tags.
-     */
-    public DefaultPassageFormatter() {
-        this("<b>", "</b>", "... ", false);
+  /**
+   * Creates a new DefaultPassageFormatter with the default tags.
+   */
+  public DefaultPassageFormatter() {
+    this("<b>", "</b>", "... ", false);
+  }
+
+  /**
+   * Creates a new DefaultPassageFormatter with custom tags.
+   *
+   * @param preTag   text which should appear before a highlighted term.
+   * @param postTag  text which should appear after a highlighted term.
+   * @param ellipsis text which should be used to connect two unconnected passages.
+   * @param escape   true if text should be html-escaped
+   */
+  public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
+    if (preTag == null || postTag == null || ellipsis == null) {
+      throw new NullPointerException();
    }
+    this.preTag = preTag;
+    this.postTag = postTag;
+    this.ellipsis = ellipsis;
+    this.escape = escape;
+  }

-    /**
-     * Creates a new DefaultPassageFormatter with custom tags.
-     * @param preTag text which should appear before a highlighted term.
-     * @param postTag text which should appear after a highlighted term.
-     * @param ellipsis text which should be used to connect two unconnected passages.
-     * @param escape true if text should be html-escaped
-     */
-    public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
-        if (preTag == null || postTag == null || ellipsis == null) {
-            throw new NullPointerException();
+  @Override
+  public String format(Passage passages[], String content) {
+    StringBuilder sb = new StringBuilder();
+    int pos = 0;
+    for (Passage passage : passages) {
+      // don't add ellipsis if its the first one, or if its connected.
+      if (passage.getStartOffset() > pos && pos > 0) {
+        sb.append(ellipsis);
+      }
+      pos = passage.getStartOffset();
+      for (int i = 0; i < passage.getNumMatches(); i++) {
+        int start = passage.getMatchStarts()[i];
+        int end = passage.getMatchEnds()[i];
+        // its possible to have overlapping terms
+        if (start > pos) {
+          append(sb, content, pos, start);
        }
-        this.preTag = preTag;
-        this.postTag = postTag;
-        this.ellipsis = ellipsis;
-        this.escape = escape;
-    }
-
-    @Override
-    public String format(Passage passages[], String content) {
-        StringBuilder sb = new StringBuilder();
-        int pos = 0;
-        for (Passage passage : passages) {
-            // don't add ellipsis if its the first one, or if its connected.
-            if (passage.startOffset > pos && pos > 0) {
-                sb.append(ellipsis);
-            }
-            pos = passage.startOffset;
-            for (int i = 0; i < passage.numMatches; i++) {
-                int start = passage.matchStarts[i];
-                int end = passage.matchEnds[i];
-                // its possible to have overlapping terms
-                if (start > pos) {
-                    append(sb, content, pos, start);
-                }
-                if (end > pos) {
-                    sb.append(preTag);
-                    append(sb, content, Math.max(pos, start), end);
-                    sb.append(postTag);
-                    pos = end;
-                }
-            }
-            // its possible a "term" from the analyzer could span a sentence boundary.
-            append(sb, content, pos, Math.max(pos, passage.endOffset));
-            pos = passage.endOffset;
+        if (end > pos) {
+          sb.append(preTag);
+          append(sb, content, Math.max(pos, start), end);
+          sb.append(postTag);
+          pos = end;
        }
-        return sb.toString();
+      }
+      // its possible a "term" from the analyzer could span a sentence boundary.
+      append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
+      pos = passage.getEndOffset();
    }
+    return sb.toString();
+  }

-    /**
-     * Appends original text to the response.
-     * @param dest resulting text, possibly transformed or encoded
-     * @param content original text content
-     * @param start index of the first character in content
-     * @param end index of the character following the last character in content
-     */
-    protected void append(StringBuilder dest, String content, int start, int end) {
-        if (escape) {
-            // note: these are the rules from owasp.org
-            for (int i = start; i < end; i++) {
-                char ch = content.charAt(i);
-                switch(ch) {
-                    case '&':
-                        dest.append("&amp;");
-                        break;
-                    case '<':
-                        dest.append("&lt;");
-                        break;
-                    case '>':
-                        dest.append("&gt;");
-                        break;
-                    case '"':
-                        dest.append("&quot;");
-                        break;
-                    case '\'':
-                        dest.append("&#x27;");
-                        break;
-                    case '/':
-                        dest.append("&#x2F;");
-                        break;
-                    default:
-                        if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
-                            dest.append(ch);
-                        } else if (ch < 0xff) {
-                            dest.append("&#");
-                            dest.append((int)ch);
-                            dest.append(";");
-                        } else {
-                            dest.append(ch);
-                        }
-                }
+  /**
+   * Appends original text to the response.
+   *
+   * @param dest    resulting text, possibly transformed or encoded
+   * @param content original text content
+   * @param start   index of the first character in content
+   * @param end     index of the character following the last character in content
+   */
+  protected void append(StringBuilder dest, String content, int start, int end) {
+    if (escape) {
+      // note: these are the rules from owasp.org
+      for (int i = start; i < end; i++) {
+        char ch = content.charAt(i);
+        switch (ch) {
+          case '&':
+            dest.append("&amp;");
+            break;
+          case '<':
+            dest.append("&lt;");
+            break;
+          case '>':
+            dest.append("&gt;");
+            break;
+          case '"':
+            dest.append("&quot;");
+            break;
+          case '\'':
+            dest.append("&#x27;");
+            break;
+          case '/':
+            dest.append("&#x2F;");
+            break;
+          default:
+            if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
+              dest.append(ch);
+            } else if (ch < 0xff) {
+              dest.append("&#");
+              dest.append((int) ch);
+              dest.append(";");
+            } else {
+              dest.append(ch);
            }
-        } else {
-            dest.append(content, start, end);
        }
+      }
+    } else {
+      dest.append(content, start, end);
    }
+  }
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
@ -117,9 +117,9 @@ public class FieldHighlighter {
        break;
      }
      Passage passage = new Passage();
-      passage.score = Float.NaN;
-      passage.startOffset = pos;
-      passage.endOffset = next;
+      passage.setScore(Float.NaN);
+      passage.setStartOffset(pos);
+      passage.setEndOffset(next);
      passages.add(passage);
      pos = next;
    }
@ -145,12 +145,12 @@ public class FieldHighlighter {
    offsetsEnumQueue.add(new OffsetsEnum(null, EMPTY)); // a sentinel for termination

    PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
-      if (left.score < right.score) {
+      if (left.getScore() < right.getScore()) {
        return -1;
-      } else if (left.score > right.score) {
+      } else if (left.getScore() > right.getScore()) {
        return 1;
      } else {
-        return left.startOffset - right.startOffset;
+        return left.getStartOffset() - right.getStartOffset();
      }
    });
    Passage passage = new Passage(); // the current passage in-progress.  Will either get reset or added to queue.
@ -170,12 +170,12 @@ public class FieldHighlighter {
        continue;
      }
      // See if this term should be part of a new passage.
-      if (start >= passage.endOffset) {
-        if (passage.startOffset >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
+      if (start >= passage.getEndOffset()) {
+        if (passage.getStartOffset() >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
          // finalize passage
-          passage.score *= scorer.norm(passage.startOffset);
+          passage.setScore(passage.getScore() * scorer.norm(passage.getStartOffset()));
          // new sentence: first add 'passage' to queue
-          if (passageQueue.size() == maxPassages && passage.score < passageQueue.peek().score) {
+          if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
            passage.reset(); // can't compete, just reset it
          } else {
            passageQueue.offer(passage);
@ -192,8 +192,8 @@ public class FieldHighlighter {
          break;
        }
        // advance breakIterator
-        passage.startOffset = Math.max(breakIterator.preceding(start + 1), 0);
-        passage.endOffset = Math.min(breakIterator.following(start), contentLength);
+        passage.setStartOffset(Math.max(breakIterator.preceding(start + 1), 0));
+        passage.setEndOffset(Math.min(breakIterator.following(start), contentLength));
      }
      // Add this term to the passage.
      int tf = 0;
@ -209,12 +209,12 @@ public class FieldHighlighter {
        off.nextPosition();
        start = off.startOffset();
        end = off.endOffset();
-        if (start >= passage.endOffset || end > contentLength) { // it's beyond this passage
+        if (start >= passage.getEndOffset() || end > contentLength) { // it's beyond this passage
          offsetsEnumQueue.offer(off);
          break;
        }
      }
-      passage.score += off.weight * scorer.tf(tf, passage.endOffset - passage.startOffset);
+      passage.setScore(passage.getScore() + off.weight * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
    }

    Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
@ -222,7 +222,7 @@ public class FieldHighlighter {
      p.sort();
    }
    // sort in ascending order
-    Arrays.sort(passages, (left, right) -> left.startOffset - right.startOffset);
+    Arrays.sort(passages, (left, right) -> left.getStartOffset() - right.getStartOffset());
    return passages;
  }

--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@ -66,9 +66,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
  }

  BytesRef getTerm() throws IOException {
-    // the dp.getPayload thing is a hack -- see MultiTermHighlighting
-    return term != null ? term : postingsEnum.getPayload();
-    // We don't deepcopy() because in this hack we know we don't have to.
+    // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then remove this hack here
+    return term != null ? term : postingsEnum.getPayload(); // abusing payload like this is a total hack!
  }

  boolean hasMorePositions() throws IOException {
@ -91,7 +90,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {

  @Override
  public void close() throws IOException {
-    if (postingsEnum instanceof Closeable) { // the one in MultiTermHighlighting is.
+    // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then this base impl would be no-op.
+    if (postingsEnum instanceof Closeable) {
      ((Closeable) postingsEnum).close();
    }
  }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
@ -23,139 +23,159 @@ import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.RamUsageEstimator;

 /**
- * Represents a passage (typically a sentence of the document). 
+ * Represents a passage (typically a sentence of the document).
 * <p>
 * A passage contains {@link #getNumMatches} highlights from the query,
 * and the offsets and query terms that correspond with each match.
 *
 * @lucene.experimental
 */
-public final class Passage {
-    int startOffset = -1;
-    int endOffset = -1;
-    float score = 0.0f;
+public class Passage {
+  private int startOffset = -1;
+  private int endOffset = -1;
+  private float score = 0.0f;

-    int matchStarts[] = new int[8];
-    int matchEnds[] = new int[8];
-    BytesRef matchTerms[] = new BytesRef[8];
-    int numMatches = 0;
+  private int[] matchStarts = new int[8];
+  private int[] matchEnds = new int[8];
+  private BytesRef[] matchTerms = new BytesRef[8];
+  private int numMatches = 0;

-    public void addMatch(int startOffset, int endOffset, BytesRef term) {
-        assert startOffset >= this.startOffset && startOffset <= this.endOffset;
-        if (numMatches == matchStarts.length) {
-            int newLength = ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
-            int newMatchStarts[] = new int[newLength];
-            int newMatchEnds[] = new int[newLength];
-            BytesRef newMatchTerms[] = new BytesRef[newLength];
-            System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
-            System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
-            System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
-            matchStarts = newMatchStarts;
-            matchEnds = newMatchEnds;
-            matchTerms = newMatchTerms;
-        }
-        assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
-        matchStarts[numMatches] = startOffset;
-        matchEnds[numMatches] = endOffset;
-        matchTerms[numMatches] = term;
-        numMatches++;
+  /** @lucene.internal */
+  public void addMatch(int startOffset, int endOffset, BytesRef term) {
+    assert startOffset >= this.startOffset && startOffset <= this.endOffset;
+    if (numMatches == matchStarts.length) {
+      int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+      int newMatchStarts[] = new int[newLength];
+      int newMatchEnds[] = new int[newLength];
+      BytesRef newMatchTerms[] = new BytesRef[newLength];
+      System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
+      System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
+      System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
+      matchStarts = newMatchStarts;
+      matchEnds = newMatchEnds;
+      matchTerms = newMatchTerms;
    }
+    assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
+    matchStarts[numMatches] = startOffset;
+    matchEnds[numMatches] = endOffset;
+    matchTerms[numMatches] = term;
+    numMatches++;
+  }

-    void sort() {
-        final int starts[] = matchStarts;
-        final int ends[] = matchEnds;
-        final BytesRef terms[] = matchTerms;
-        new InPlaceMergeSorter() {
-            @Override
-            protected void swap(int i, int j) {
-                int temp = starts[i];
-                starts[i] = starts[j];
-                starts[j] = temp;
+  /** @lucene.internal */
+  public void sort() {
+    final int starts[] = matchStarts;
+    final int ends[] = matchEnds;
+    final BytesRef terms[] = matchTerms;
+    new InPlaceMergeSorter() {
+      @Override
+      protected void swap(int i, int j) {
+        int temp = starts[i];
+        starts[i] = starts[j];
+        starts[j] = temp;

-                temp = ends[i];
-                ends[i] = ends[j];
-                ends[j] = temp;
+        temp = ends[i];
+        ends[i] = ends[j];
+        ends[j] = temp;

-                BytesRef tempTerm = terms[i];
-                terms[i] = terms[j];
-                terms[j] = tempTerm;
-            }
+        BytesRef tempTerm = terms[i];
+        terms[i] = terms[j];
+        terms[j] = tempTerm;
+      }

-            @Override
-            protected int compare(int i, int j) {
-                return Integer.compare(starts[i], starts[j]);
-            }
+      @Override
+      protected int compare(int i, int j) {
+        return Integer.compare(starts[i], starts[j]);
+      }

-        }.sort(0, numMatches);
-    }
+    }.sort(0, numMatches);
+  }

-    void reset() {
-        startOffset = endOffset = -1;
-        score = 0.0f;
-        numMatches = 0;
-    }
+  /** @lucene.internal */
+  public void reset() {
+    startOffset = endOffset = -1;
+    score = 0.0f;
+    numMatches = 0;
+  }

-    /**
-     * Start offset of this passage.
-     * @return start index (inclusive) of the passage in the
-     *         original content: always &gt;= 0.
-     */
-    public int getStartOffset() {
-        return startOffset;
-    }
+  /**
+   * Start offset of this passage.
+   *
+   * @return start index (inclusive) of the passage in the
+   * original content: always &gt;= 0.
+   */
+  public int getStartOffset() {
+    return startOffset;
+  }

-    /**
-     * End offset of this passage.
-     * @return end index (exclusive) of the passage in the
-     *         original content: always &gt;= {@link #getStartOffset()}
-     */
-    public int getEndOffset() {
-        return endOffset;
-    }
+  /**
+   * End offset of this passage.
+   *
+   * @return end index (exclusive) of the passage in the
+   * original content: always &gt;= {@link #getStartOffset()}
+   */
+  public int getEndOffset() {
+    return endOffset;
+  }

-    /**
-     * Passage's score.
-     */
-    public float getScore() {
-        return score;
-    }
+  /**
+   * Passage's score.
+   */
+  public float getScore() {
+    return score;
+  }

-    /**
-     * Number of term matches available in
-     * {@link #getMatchStarts}, {@link #getMatchEnds},
-     * {@link #getMatchTerms}
-     */
-    public int getNumMatches() {
-        return numMatches;
-    }
+  /**
+   * Number of term matches available in
+   * {@link #getMatchStarts}, {@link #getMatchEnds},
+   * {@link #getMatchTerms}
+   */
+  public int getNumMatches() {
+    return numMatches;
+  }

-    /**
-     * Start offsets of the term matches, in increasing order.
-     * <p>
-     * Only {@link #getNumMatches} are valid. Note that these
-     * offsets are absolute (not relative to {@link #getStartOffset()}).
-     */
-    public int[] getMatchStarts() {
-        return matchStarts;
-    }
+  /**
+   * Start offsets of the term matches, in increasing order.
+   * <p>
+   * Only {@link #getNumMatches} are valid. Note that these
+   * offsets are absolute (not relative to {@link #getStartOffset()}).
+   */
+  public int[] getMatchStarts() {
+    return matchStarts;
+  }

-    /**
-     * End offsets of the term matches, corresponding with {@link #getMatchStarts}.
-     * <p>
-     * Only {@link #getNumMatches} are valid. Note that its possible that an end offset
-     * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
-     * Analyzer produced a term which spans a passage boundary.
-     */
-    public int[] getMatchEnds() {
-        return matchEnds;
-    }
+  /**
+   * End offsets of the term matches, corresponding with {@link #getMatchStarts}.
+   * <p>
+   * Only {@link #getNumMatches} are valid. Note that its possible that an end offset
+   * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
+   * Analyzer produced a term which spans a passage boundary.
+   */
+  public int[] getMatchEnds() {
+    return matchEnds;
+  }

-    /**
-     * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
-     * <p>
-     * Only {@link #getNumMatches()} are valid.
-     */
-    public BytesRef[] getMatchTerms() {
-        return matchTerms;
-    }
+  /**
+   * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
+   * <p>
+   * Only {@link #getNumMatches()} are valid.
+   */
+  public BytesRef[] getMatchTerms() {
+    return matchTerms;
+  }
+
+  /** @lucene.internal */
+  public void setStartOffset(int startOffset) {
+    this.startOffset = startOffset;
+  }
+
+  /** @lucene.internal */
+  public void setEndOffset(int endOffset) {
+    this.endOffset = endOffset;
+  }
+
+  /** @lucene.internal */
+  public void setScore(float score) {
+    this.score = score;
+  }
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
@ -69,10 +69,8 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
    return Collections.singletonList(new OffsetsEnum(null, mtqPostingsEnum));
  }

-  // but this would have a performance cost for likely little gain in the user experience, it
-  // would only serve to make this method less bogus.
-  // instead, we always return freq() = Integer.MAX_VALUE and let the highlighter terminate based on offset...
-  // TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl?
+  // See class javadocs.
+  // TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl?  See TODOs in OffsetsEnum.
  private static class TokenStreamPostingsEnum extends PostingsEnum implements Closeable {
    TokenStream stream; // becomes null when closed
    final CharacterRunAutomaton[] matchers;
@ -134,6 +132,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
      return currentEndOffset;
    }

+    // TOTAL HACK; used in OffsetsEnum.getTerm()
    @Override
    public BytesRef getPayload() throws IOException {
      if (matchDescriptions[currentMatch] == null) {
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@ -697,13 +697,13 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
            int pos = 0;
            for (Passage passage : passages) {
              // don't add ellipsis if its the first one, or if its connected.
-              if (passage.startOffset > pos && pos > 0) {
+              if (passage.getStartOffset() > pos && pos > 0) {
                sb.append("... ");
              }
-              pos = passage.startOffset;
-              for (int i = 0; i < passage.numMatches; i++) {
-                int start = passage.matchStarts[i];
-                int end = passage.matchEnds[i];
+              pos = passage.getStartOffset();
+              for (int i = 0; i < passage.getNumMatches(); i++) {
+                int start = passage.getMatchStarts()[i];
+                int end = passage.getMatchEnds()[i];
                // its possible to have overlapping terms
                if (start > pos) {
                  sb.append(content, pos, start);
@ -719,8 +719,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
                }
              }
              // its possible a "term" from the analyzer could span a sentence boundary.
-              sb.append(content, pos, Math.max(pos, passage.endOffset));
-              pos = passage.endOffset;
+              sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
+              pos = passage.getEndOffset();
            }
            return sb.toString();
          }
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
@ -96,6 +96,27 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
    init(f, a);
  }

+  /**
+   * Set to true if phrase queries will be automatically generated
+   * when the analyzer returns more than one term from whitespace
+   * delimited text.
+   * NOTE: this behavior may not be suitable for all languages.
+   * <p>
+   * Set to false if phrase queries should only be generated when
+   * surrounded by double quotes.
+   * <p>
+   * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
+   * is disallowed.  See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
+   */
+  @Override
+  public void setAutoGeneratePhraseQueries(boolean value) {
+    if (splitOnWhitespace == false && value == true) {
+      throw new IllegalArgumentException
+          ("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
+    }
+    this.autoGeneratePhraseQueries = value;
+  }
+
  /**
   * @see #setSplitOnWhitespace(boolean)
   */
@ -106,8 +127,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
  /**
   * Whether query text should be split on whitespace prior to analysis.
   * Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
+   * <p>
+   * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
+   * is disallowed.  See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
   */
  public void setSplitOnWhitespace(boolean splitOnWhitespace) {
+    if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
+      throw new IllegalArgumentException
+          ("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
+    }
    this.splitOnWhitespace = splitOnWhitespace;
  }

@ -635,6 +663,31 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
    finally { jj_save(2, xla); }
  }

+  private boolean jj_3R_3() {
+    if (jj_scan_token(TERM)) return true;
+    jj_lookingAhead = true;
+    jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
+    jj_lookingAhead = false;
+    if (!jj_semLA || jj_3R_6()) return true;
+    Token xsp;
+    if (jj_3R_7()) return true;
+    while (true) {
+      xsp = jj_scanpos;
+      if (jj_3R_7()) { jj_scanpos = xsp; break; }
+    }
+    return false;
+  }
+
+  private boolean jj_3R_6() {
+    return false;
+  }
+
+  private boolean jj_3R_5() {
+    if (jj_scan_token(STAR)) return true;
+    if (jj_scan_token(COLON)) return true;
+    return false;
+  }
+
  private boolean jj_3R_4() {
    if (jj_scan_token(TERM)) return true;
    if (jj_scan_token(COLON)) return true;
@ -666,31 +719,6 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
    return false;
  }

-  private boolean jj_3R_3() {
-    if (jj_scan_token(TERM)) return true;
-    jj_lookingAhead = true;
-    jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
-    jj_lookingAhead = false;
-    if (!jj_semLA || jj_3R_6()) return true;
-    Token xsp;
-    if (jj_3R_7()) return true;
-    while (true) {
-      xsp = jj_scanpos;
-      if (jj_3R_7()) { jj_scanpos = xsp; break; }
-    }
-    return false;
-  }
-
-  private boolean jj_3R_6() {
-    return false;
-  }
-
-  private boolean jj_3R_5() {
-    if (jj_scan_token(STAR)) return true;
-    if (jj_scan_token(COLON)) return true;
-    return false;
-  }
-
  /** Generated Token Manager. */
  public QueryParserTokenManager token_source;
  /** Current token. */
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
@ -120,6 +120,27 @@ public class QueryParser extends QueryParserBase {
    init(f, a);
  }

+  /**
+   * Set to true if phrase queries will be automatically generated
+   * when the analyzer returns more than one term from whitespace
+   * delimited text.
+   * NOTE: this behavior may not be suitable for all languages.
+   * <p>
+   * Set to false if phrase queries should only be generated when
+   * surrounded by double quotes.
+   * <p>
+   * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
+   * is disallowed.  See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
+   */
+  @Override
+  public void setAutoGeneratePhraseQueries(boolean value) {
+    if (splitOnWhitespace == false && value == true) {
+      throw new IllegalArgumentException
+          ("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
+    }
+    this.autoGeneratePhraseQueries = value;
+  }
+
  /**
   * @see #setSplitOnWhitespace(boolean)
   */
@ -130,8 +151,15 @@ public class QueryParser extends QueryParserBase {
  /**
   * Whether query text should be split on whitespace prior to analysis.
   * Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
+   * <p>
+   * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
+   * is disallowed.  See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
   */
  public void setSplitOnWhitespace(boolean splitOnWhitespace) {
+    if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
+      throw new IllegalArgumentException
+          ("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
+    }
    this.splitOnWhitespace = splitOnWhitespace;
  }

--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
@ -144,7 +144,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
   * Set to false if phrase queries should only be generated when
   * surrounded by double quotes.
   */
-  public final void setAutoGeneratePhraseQueries(boolean value) {
+  public void setAutoGeneratePhraseQueries(boolean value) {
    this.autoGeneratePhraseQueries = value;
  }

--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
@ -840,6 +840,20 @@ public class TestQueryParser extends QueryParserTestBase {
    assertTrue(isAHit(qp.parse("เ??"), s, analyzer));
  }
   
+  // LUCENE-7533
+  public void test_splitOnWhitespace_with_autoGeneratePhraseQueries() {
+    final QueryParser qp = new QueryParser(FIELD, new MockAnalyzer(random()));
+    expectThrows(IllegalArgumentException.class, () -> {
+      qp.setSplitOnWhitespace(false);
+      qp.setAutoGeneratePhraseQueries(true);
+    });
+    final QueryParser qp2 = new QueryParser(FIELD, new MockAnalyzer(random()));
+    expectThrows(IllegalArgumentException.class, () -> {
+      qp2.setSplitOnWhitespace(true);
+      qp2.setAutoGeneratePhraseQueries(true);
+      qp2.setSplitOnWhitespace(false);
+    });
+  }
  
  private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
    Directory ramDir = newDirectory();
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
@ -38,6 +38,7 @@ import org.apache.lucene.index.Term;
 //import org.apache.lucene.queryparser.classic.ParseException;
 //import org.apache.lucene.queryparser.classic.QueryParser;
 //import org.apache.lucene.queryparser.classic.QueryParserBase;
+import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.queryparser.classic.QueryParserBase;
 //import org.apache.lucene.queryparser.classic.QueryParserTokenManager;
 import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
@ -328,6 +329,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
  
    PhraseQuery expected = new PhraseQuery("field", "中", "国");
    CommonQueryParserConfiguration qp = getParserConfig(analyzer);
+    if (qp instanceof QueryParser) { // Always true, since TestStandardQP overrides this method
+      ((QueryParser)qp).setSplitOnWhitespace(true); // LUCENE-7533
+    }
    setAutoGeneratePhraseQueries(qp, true);
    assertEquals(expected, getQuery("中国",qp));
  }
--- a/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java
+++ b/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java
@ -56,7 +56,7 @@ public abstract class PrimaryNode extends Node {
  // Current NRT segment infos, incRef'd with IndexWriter.deleter:
  private SegmentInfos curInfos;

-  final IndexWriter writer;
+  protected final IndexWriter writer;

  // IncRef'd state of the last published NRT point; when a replica comes asking, we give it this as the current NRT point:
  private CopyState copyState;
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@ -129,9 +129,10 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
  private final boolean highlight;
  
  private final boolean commitOnBuild;
+  private final boolean closeIndexWriterOnBuild;

  /** Used for ongoing NRT additions/updates. */
-  private IndexWriter writer;
+  protected IndexWriter writer;

  /** {@link IndexSearcher} used for lookups. */
  protected SearcherManager searcherMgr;
@ -146,6 +147,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
  /** Default higlighting option. */
  public static final boolean DEFAULT_HIGHLIGHT = true;

+  /** Default option to close the IndexWriter once the index has been built. */
+  protected final static boolean DEFAULT_CLOSE_INDEXWRITER_ON_BUILD = true;
+
  /** How we sort the postings and search results. */
  private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));

@ -198,8 +202,34 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
   *
   */
  public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
-                                 boolean commitOnBuild, 
+                                 boolean commitOnBuild,
                                 boolean allTermsRequired, boolean highlight) throws IOException {
+    this(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild, allTermsRequired, highlight, 
+         DEFAULT_CLOSE_INDEXWRITER_ON_BUILD);
+  }
+
+    /** Create a new instance, loading from a previously built
+     *  AnalyzingInfixSuggester directory, if it exists.  This directory must be
+     *  private to the infix suggester (i.e., not an external
+     *  Lucene index).  Note that {@link #close}
+     *  will also close the provided directory.
+     *
+     *  @param minPrefixChars Minimum number of leading characters
+     *     before PrefixQuery is used (default 4).
+     *     Prefixes shorter than this are indexed as character
+     *     ngrams (increasing index size but making lookups
+     *     faster).
+     *
+     *  @param commitOnBuild Call commit after the index has finished building. This would persist the
+     *                       suggester index to disk and future instances of this suggester can use this pre-built dictionary.
+     *
+     *  @param allTermsRequired All terms in the suggest query must be matched.
+     *  @param highlight Highlight suggest query in suggestions.
+     *  @param closeIndexWriterOnBuild If true, the IndexWriter will be closed after the index has finished building.
+     */
+  public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
+                                 boolean commitOnBuild, boolean allTermsRequired, 
+                                 boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
                                    
    if (minPrefixChars < 0) {
      throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
@ -212,6 +242,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
    this.commitOnBuild = commitOnBuild;
    this.allTermsRequired = allTermsRequired;
    this.highlight = highlight;
+    this.closeIndexWriterOnBuild = closeIndexWriterOnBuild;

    if (DirectoryReader.indexExists(dir)) {
      // Already built; open it:
@ -276,15 +307,22 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
      }

      //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
-      if (commitOnBuild) {
+      if (commitOnBuild || closeIndexWriterOnBuild) {
        commit();
      }
      searcherMgr = new SearcherManager(writer, null);
      success = true;
    } finally {
-      if (success == false && writer != null) {
-        writer.rollback();
-        writer = null;
+      if (success) {
+        if (closeIndexWriterOnBuild) {
+          writer.close();
+          writer = null;
+        }
+      } else {  // failure
+        if (writer != null) {
+          writer.rollback();
+          writer = null;
+        }
      }
    }
  }
@ -294,9 +332,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
   *  @see IndexWriter#commit */
  public void commit() throws IOException {
    if (writer == null) {
-      throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
+      if (searcherMgr == null || closeIndexWriterOnBuild == false) {
+        throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
+      }
+      // else no-op: writer was committed and closed after the index was built, so commit is unnecessary
+    } else {
+      writer.commit();
    }
-    writer.commit();
  }

  private Analyzer getGramAnalyzer() {
@ -321,13 +363,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {

  private synchronized void ensureOpen() throws IOException {
    if (writer == null) {
-      if (searcherMgr != null) {
-        searcherMgr.close();
-        searcherMgr = null;
+      if (DirectoryReader.indexExists(dir)) {
+        // Already built; open it:
+        writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
+      } else {
+        writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
      }
-      writer = new IndexWriter(dir,
-          getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
+      SearcherManager oldSearcherMgr = searcherMgr;
      searcherMgr = new SearcherManager(writer, null);
+      if (oldSearcherMgr != null) {
+        oldSearcherMgr.close();
+      }
    }
  }

@ -382,7 +428,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
    if (searcherMgr == null) {
      throw new IllegalStateException("suggester was not built");
    }
-    searcherMgr.maybeRefreshBlocking();
+    if (writer != null) {
+      searcherMgr.maybeRefreshBlocking();
+    }
+    // else no-op: writer was committed and closed after the index was built
+    //             and before searchMgr was constructed, so refresh is unnecessary
  }

  /**
@ -791,9 +841,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
    }
    if (writer != null) {
      writer.close();
-      dir.close();
      writer = null;
    }
+    if (dir != null) {
+      dir.close();
+    }
  }

  @Override
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java
@ -86,6 +86,10 @@ final class CompletionFieldsConsumer extends FieldsConsumer {
    for (String field : fields) {
      CompletionTermWriter termWriter = new CompletionTermWriter();
      Terms terms = fields.terms(field);
+      if (terms == null) {
+        // this can happen from ghost fields, where the incoming Fields iterator claims a field exists but it does not
+        continue;
+      }
      TermsEnum termsEnum = terms.iterator();

      // write terms
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
@ -35,11 +35,14 @@ import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.search.suggest.Lookup.LookupResult;
+import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
@ -1334,4 +1337,112 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {

    suggester.close();
  }
+  
+  public void testCloseIndexWriterOnBuild() throws Exception {
+    class MyAnalyzingInfixSuggester extends AnalyzingInfixSuggester {
+      public MyAnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, 
+                                       int minPrefixChars, boolean commitOnBuild, boolean allTermsRequired,
+                                       boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
+        super(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild, 
+              allTermsRequired, highlight, closeIndexWriterOnBuild);
+      }
+      public IndexWriter getIndexWriter() {
+        return writer;
+      } 
+      public SearcherManager getSearcherManager() {
+        return searcherMgr;
+      }
+    }
+
+    // After build(), when closeIndexWriterOnBuild = true: 
+    // * The IndexWriter should be null 
+    // * The SearcherManager should be non-null
+    // * SearcherManager's IndexWriter reference should be closed 
+    //   (as evidenced by maybeRefreshBlocking() throwing AlreadyClosedException)
+    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+    MyAnalyzingInfixSuggester suggester = new MyAnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
+        AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
+    suggester.build(new InputArrayIterator(sharedInputs));
+    assertNull(suggester.getIndexWriter());
+    assertNotNull(suggester.getSearcherManager());
+    expectThrows(AlreadyClosedException.class, () -> suggester.getSearcherManager().maybeRefreshBlocking());
+    
+    suggester.close();
+    a.close();
+  }
+  
+  public void testCommitAfterBuild() throws Exception {
+    performOperationWithAllOptionCombinations(suggester -> {
+      suggester.build(new InputArrayIterator(sharedInputs));
+      suggester.commit();
+    });    
+  }
+
+  public void testRefreshAfterBuild() throws Exception {
+    performOperationWithAllOptionCombinations(suggester -> {
+      suggester.build(new InputArrayIterator(sharedInputs)); 
+      suggester.refresh(); 
+    });
+  }
+  
+  public void testDisallowCommitBeforeBuild() throws Exception {
+    performOperationWithAllOptionCombinations
+        (suggester -> expectThrows(IllegalStateException.class, suggester::commit));
+  }
+
+  public void testDisallowRefreshBeforeBuild() throws Exception {
+    performOperationWithAllOptionCombinations
+        (suggester -> expectThrows(IllegalStateException.class, suggester::refresh));
+  }
+
+  private Input sharedInputs[] = new Input[] {
+      new Input("lend me your ear", 8, new BytesRef("foobar")),
+      new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
+  };
+
+  private interface SuggesterOperation {
+    void operate(AnalyzingInfixSuggester suggester) throws Exception;
+  }
+
+  /**
+   * Perform the given operation on suggesters constructed with all combinations of options
+   * commitOnBuild and closeIndexWriterOnBuild, including defaults.
+   */
+  private void performOperationWithAllOptionCombinations(SuggesterOperation operation) throws Exception {
+    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+
+    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a);
+    operation.operate(suggester);
+    suggester.close();
+
+    suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
+    operation.operate(suggester);
+    suggester.close();
+
+    suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true);
+    operation.operate(suggester);
+    suggester.close();
+
+    suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
+        AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
+    operation.operate(suggester);
+    suggester.close();
+
+    suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
+        AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
+    operation.operate(suggester);
+    suggester.close();
+
+    suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
+        AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
+    operation.operate(suggester);
+    suggester.close();
+
+    suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
+        AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
+    operation.operate(suggester);
+    suggester.close();
+
+    a.close();
+  }
 }
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
@ -24,9 +24,12 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SortedNumericDocValues;
@ -38,7 +41,6 @@ import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.After;
 import org.junit.Before;
-import org.junit.Test;

 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 import static org.apache.lucene.search.suggest.document.TestSuggestField.Entry;
@ -112,7 +114,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    dir.close();
  }

-  @Test
  public void testSimple() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
@ -141,7 +142,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    iw.close();
  }

-  @Test
  public void testMostlyFilteredOutDocuments() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
@ -188,7 +188,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    iw.close();
  }

-  @Test
  public void testDocFiltering() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
@ -230,7 +229,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    iw.close();
  }

-  @Test
  public void testAnalyzerWithoutPreservePosAndSep() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
    CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, false);
@ -254,7 +252,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    iw.close();
  }

-  @Test
  public void testAnalyzerWithSepAndNoPreservePos() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
    CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, true, false);
@ -278,7 +275,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    iw.close();
  }

-  @Test
  public void testAnalyzerWithPreservePosAndNoSep() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
    CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, true);
@ -302,4 +298,43 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    iw.close();
  }

+  public void testGhostField() throws Exception {
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field", "suggest_field2", "suggest_field3"));
+
+    Document document = new Document();
+    document.add(new StringField("id", "0", Field.Store.NO));
+    document.add(new SuggestField("suggest_field", "apples", 3));
+    iw.addDocument(document);
+    // need another document so whole segment isn't deleted
+    iw.addDocument(new Document());
+    iw.commit();
+
+    document = new Document();
+    document.add(new StringField("id", "1", Field.Store.NO));
+    document.add(new SuggestField("suggest_field2", "apples", 3));
+    iw.addDocument(document);
+    iw.commit();
+
+    iw.deleteDocuments(new Term("id", "0"));
+    // first force merge is OK
+    iw.forceMerge(1);
+    
+    // second force merge causes MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
+    // this field anymore)
+    iw.addDocument(new Document());
+    iw.forceMerge(1);
+
+    DirectoryReader reader = DirectoryReader.open(iw);
+    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
+
+    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
+    assertEquals(0, indexSearcher.suggest(query, 3).totalHits);
+
+    query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
+    assertSuggestions(indexSearcher.suggest(query, 3), new Entry("apples", 3));
+
+    reader.close();
+    iw.close();
+  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
@ -40,6 +40,8 @@ import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.BytesRef;
@ -312,6 +314,49 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
    dir.close();
  }

+  // tests that level 2 ghost fields still work
+  public void testLevel2Ghosts() throws Exception {
+    Directory dir = newDirectory();
+
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwc = newIndexWriterConfig(null);
+    iwc.setCodec(getCodec());
+    iwc.setMergePolicy(newLogMergePolicy());
+    IndexWriter iw = new IndexWriter(dir, iwc);
+
+    Document document = new Document();
+    document.add(new StringField("id", "0", Field.Store.NO));
+    document.add(new StringField("suggest_field", "apples", Field.Store.NO));
+    iw.addDocument(document);
+    // need another document so whole segment isn't deleted
+    iw.addDocument(new Document());
+    iw.commit();
+
+    document = new Document();
+    document.add(new StringField("id", "1", Field.Store.NO));
+    document.add(new StringField("suggest_field2", "apples", Field.Store.NO));
+    iw.addDocument(document);
+    iw.commit();
+
+    iw.deleteDocuments(new Term("id", "0"));
+    // first force merge creates a level 1 ghost field
+    iw.forceMerge(1);
+    
+    // second force merge creates a level 2 ghost field, causing MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
+    // this field anymore)
+    iw.addDocument(new Document());
+    iw.forceMerge(1);
+
+    DirectoryReader reader = DirectoryReader.open(iw);
+    IndexSearcher indexSearcher = new IndexSearcher(reader);
+
+    assertEquals(1, indexSearcher.count(new TermQuery(new Term("id", "1"))));
+
+    reader.close();
+    iw.close();
+    dir.close();
+  }
+
  private static class TermFreqs {
    long totalTermFreq;
    int docFreq;
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -120,6 +120,9 @@ New Features

 * SOLR-9077: Streaming expressions should support collection alias (Kevin Risden)

+* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
+  (Gregory Chanan, Hrishikesh Gadre via yonik)
+
 Optimizations
 ----------------------
 * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
@ -128,6 +131,9 @@ Optimizations
 * SOLR-9726: Reduce number of lookupOrd calls made by the DocValuesFacets.getCounts method.
  (Jonny Marks via Christine Poerschke)

+* SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse
+  comparator and only invalidate leafComparator. (John Call via yonik)
+
 Bug Fixes
 ----------------------
 * SOLR-9701: NPE in export handler when "fl" parameter is omitted.
@ -183,6 +189,10 @@ Other Changes
 * SOLR-8332: Factor HttpShardHandler[Factory]'s url shuffling out into a ReplicaListTransformer class.
  (Christine Poerschke, Noble Paul)

+* SOLR-9597: Add setReadOnly(String ...) to ConnectionImpl (Kevin Risden)
+
+* SOLR-9609: Change hard-coded keysize from 512 to 1024 (Jeremy Martini via Erick Erickson)
+
 ==================  6.3.0 ==================

 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@ -615,9 +625,6 @@ New Features
 * SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
  (Doug Turnbull, David Smiley)

-* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
-  (Gregory Chanan)
-
 * SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)

 * SOLR-6465: CDCR: fall back to whole-index replication when tlogs are insufficient.
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@ -2645,16 +2645,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
      try {
        FileUtils.deleteDirectory(dataDir);
      } catch (IOException e) {
-        SolrException.log(log, "Failed to delete data dir for unloaded core:" + cd.getName()
-            + " dir:" + dataDir.getAbsolutePath());
+        log.error("Failed to delete data dir for unloaded core: {} dir: {}", cd.getName(), dataDir.getAbsolutePath(), e);
      }
    }
    if (deleteInstanceDir) {
      try {
        FileUtils.deleteDirectory(cd.getInstanceDir().toFile());
      } catch (IOException e) {
-        SolrException.log(log, "Failed to delete instance dir for unloaded core:" + cd.getName()
-            + " dir:" + cd.getInstanceDir());
+        log.error("Failed to delete instance dir for unloaded core: {} dir: {}", cd.getName(), cd.getInstanceDir(), e);
      }
    }
  }
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@ -616,7 +616,7 @@ public class QueryComponent extends SearchComponent
        // :TODO: would be simpler to always serialize every position of SortField[]
        if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;

-        FieldComparator<?> comparator = null;
+        FieldComparator<?> comparator = sortField.getComparator(1,0);
        LeafFieldComparator leafComparator = null;
        Object[] vals = new Object[nDocs];

@ -633,13 +633,13 @@ public class QueryComponent extends SearchComponent
            idx = ReaderUtil.subIndex(doc, leaves);
            currentLeaf = leaves.get(idx);
            if (idx != lastIdx) {
-              // we switched segments.  invalidate comparator.
-              comparator = null;
+              // we switched segments.  invalidate leafComparator.
+              lastIdx = idx;
+              leafComparator = null;
            }
          }

-          if (comparator == null) {
-            comparator = sortField.getComparator(1,0);
+          if (leafComparator == null) {
            leafComparator = comparator.getLeafComparator(currentLeaf);
          }

--- a/solr/core/src/java/org/apache/solr/util/CryptoKeys.java
+++ b/solr/core/src/java/org/apache/solr/util/CryptoKeys.java
@ -285,6 +285,10 @@ public final class CryptoKeys {
    private final PrivateKey privateKey;
    private final SecureRandom random = new SecureRandom();

+    // If this ever comes back to haunt us see the discussion at
+    // SOLR-9609 for background and code allowing this to go
+    // into security.json
+    private static final int DEFAULT_KEYPAIR_LENGTH = 1024;

    public RSAKeyPair() {
      KeyPairGenerator keyGen = null;
@ -293,7 +297,7 @@ public final class CryptoKeys {
      } catch (NoSuchAlgorithmException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
      }
-      keyGen.initialize(512);
+      keyGen.initialize(DEFAULT_KEYPAIR_LENGTH);
      java.security.KeyPair keyPair = keyGen.genKeyPair();
      privateKey = keyPair.getPrivate();
      publicKey = keyPair.getPublic();
--- a/solr/core/src/test/org/apache/solr/store/blockcache/BlockDirectoryTest.java
+++ b/solr/core/src/test/org/apache/solr/store/blockcache/BlockDirectoryTest.java
@ -114,7 +114,7 @@ public class BlockDirectoryTest extends SolrTestCaseJ4 {
    if (random().nextBoolean()) {
      Metrics metrics = new Metrics();
      int blockSize = 8192;
-      int slabSize = blockSize * 32768;
+      int slabSize = blockSize * 16384;
      long totalMemory = 1 * slabSize;
      BlockCache blockCache = new BlockCache(metrics, true, totalMemory, slabSize, blockSize);
      BlockDirectoryCache cache = new BlockDirectoryCache(blockCache, "/collection1", metrics, true);
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/sql/ConnectionImpl.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/sql/ConnectionImpl.java
@ -155,6 +155,15 @@ class ConnectionImpl implements Connection {

  }

+  /*
+   * When using OpenLink ODBC-JDBC bridge on Windows, it runs the method ConnectionImpl.setReadOnly(String ...).
+   * The spec says that setReadOnly(boolean ...) is required. This causes the ODBC-JDBC bridge to fail on Windows.
+   * OpenLink case: http://support.openlinksw.com/support/techupdate.vsp?c=21881
+   */
+  public void setReadOnly(String readOnly) throws SQLException {
+
+  }
+
  @Override
  public boolean isReadOnly() throws SQLException {
    return true;
--- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
@ -886,7 +886,7 @@ public class JavaBinCodec implements PushWriter {
      daos.writeByte(NULL);
      return true;
    } else if (val instanceof CharSequence) {
-      writeStr((String) val);
+      writeStr((CharSequence) val);
      return true;
    } else if (val instanceof Number) {