LUCENE-5879: add auto-prefix terms to block tree, and experimental AutoPrefixTermsPostingsFormat

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670918 13f79535-47bb-0310-9956-ffa450edef68
2015-04-02 15:05:48 +00:00 · 2015-04-02 15:05:48 +00:00 · 30807709e6
parent 658a131002
commit 30807709e6
45 changed files with 4867 additions and 1772 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -19,6 +19,10 @@ New Features
  for counting ranges that align with the underlying terms as defined by the
  NumberRangePrefixTree (e.g. familiar date units like days).  (David Smiley)

+* LUCENE-5879: Added experimental auto-prefix terms to BlockTree terms
+  dictionary, exposed as AutoPrefixPostingsFormat (Adrien Grand,
+  Uwe Schindler, Robert Muir, Mike McCandless)
+
 API Changes

 * LUCENE-3312: The API of oal.document was restructured to
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/AutoPrefixPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/AutoPrefixPostingsFormat.java
@ -0,0 +1,125 @@
+package org.apache.lucene.codecs.autoprefix;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Just like {@link Lucene50PostingsFormat} except this format
+ * exposes the experimental auto-prefix terms.
+ *
+ * @lucene.experimental
+ */
+
+public final class AutoPrefixPostingsFormat extends PostingsFormat {
+
+  private final int minItemsInBlock;
+  private final int maxItemsInBlock;
+  private final int minItemsInAutoPrefix;
+  private final int maxItemsInAutoPrefix;
+
+  /** Creates {@code AutoPrefixPostingsFormat} with default settings. */
+  public AutoPrefixPostingsFormat() {
+    this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
+         BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE,
+         25, 48);
+  }
+
+  /** Creates {@code Lucene50PostingsFormat} with custom
+   *  values for {@code minBlockSize} and {@code
+   *  maxBlockSize} passed to block terms dictionary.
+   *  @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
+  public AutoPrefixPostingsFormat(int minItemsInAutoPrefix, int maxItemsInAutoPrefix) {
+    this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
+         BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE,
+         minItemsInAutoPrefix,
+         maxItemsInAutoPrefix);
+  }
+
+  /** Creates {@code Lucene50PostingsFormat} with custom
+   *  values for {@code minBlockSize}, {@code
+   *  maxBlockSize}, {@code minItemsInAutoPrefix} and {@code maxItemsInAutoPrefix}, passed
+   *  to block tree terms dictionary.
+   *  @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int,int,int) */
+  public AutoPrefixPostingsFormat(int minItemsInBlock, int maxItemsInBlock, int minItemsInAutoPrefix, int maxItemsInAutoPrefix) {
+    super("AutoPrefix");
+    BlockTreeTermsWriter.validateSettings(minItemsInBlock,
+                                          maxItemsInBlock);
+    BlockTreeTermsWriter.validateAutoPrefixSettings(minItemsInAutoPrefix,
+                                                    maxItemsInAutoPrefix);
+    this.minItemsInBlock = minItemsInBlock;
+    this.maxItemsInBlock = maxItemsInBlock;
+    this.minItemsInAutoPrefix = minItemsInAutoPrefix;
+    this.maxItemsInAutoPrefix = maxItemsInAutoPrefix;
+  }
+
+  @Override
+  public String toString() {
+    return getName();
+  }
+
+  @Override
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
+
+    boolean success = false;
+    try {
+      FieldsConsumer ret = new BlockTreeTermsWriter(state, 
+                                                    postingsWriter,
+                                                    minItemsInBlock, 
+                                                    maxItemsInBlock,
+                                                    minItemsInAutoPrefix,
+                                                    maxItemsInAutoPrefix);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(postingsWriter);
+      }
+    }
+  }
+
+  @Override
+  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+    PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
+    boolean success = false;
+    try {
+      FieldsProducer ret = new BlockTreeTermsReader(postingsReader, state);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(postingsReader);
+      }
+    }
+  }
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/package-info.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/package-info.java
@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An experimental postings format that automatically indexes appropriate
+ * prefix terms for fast range and prefix queries.
+ */
+package org.apache.lucene.codecs.autoprefix;
--- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
+++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@ -20,3 +20,4 @@ org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
 org.apache.lucene.codecs.memory.FSTPostingsFormat
 org.apache.lucene.codecs.memory.MemoryPostingsFormat
 org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
+org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixPostingsFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixPostingsFormat.java
@ -0,0 +1,38 @@
+package org.apache.lucene.codecs.autoprefix;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.RandomPostingsTester;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+/**
+ * Tests AutoPrefix's postings
+ */
+
+// NOTE: we don't extend BasePostingsFormatTestCase becase we can only handle DOCS_ONLY fields:
+
+public class TestAutoPrefixPostingsFormat extends LuceneTestCase {
+  public void test() throws Exception {
+    new RandomPostingsTester(random()).testFull(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat()),
+                                                createTempDir("autoprefix"),
+                                                IndexOptions.DOCS,
+                                                false);
+  }
+}
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java
@ -0,0 +1,738 @@
+package org.apache.lucene.codecs.autoprefix;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SerialMergeScheduler;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+
+public class TestAutoPrefixTerms extends LuceneTestCase {
+
+  private int minItemsPerBlock = TestUtil.nextInt(random(), 2, 100);
+  private int maxItemsPerBlock = 2*(Math.max(2, minItemsPerBlock-1)) + random().nextInt(100);
+  private int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
+  private int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
+
+  private final Codec codec = TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minItemsPerBlock, maxItemsPerBlock,
+                                                                                         minTermsAutoPrefix, maxTermsAutoPrefix));
+
+  // Numbers in a restricted range, encoded in decimal, left-0-padded:
+  public void testBasicNumericRanges() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    iwc.setCodec(codec);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    int numTerms = TestUtil.nextInt(random(), 3000, 50000);
+    Set<String> terms = new HashSet<>();
+    int digits = TestUtil.nextInt(random(), 5, 10);
+    int maxValue = 1;
+    for(int i=0;i<digits;i++) {
+      maxValue *= 10;
+    }
+    String format = "%0" + digits + "d";
+    while (terms.size() < numTerms) {
+      terms.add(String.format(Locale.ROOT, format, random().nextInt(maxValue)));
+    }
+
+    for(String term : terms) {
+      Document doc = new Document();
+      doc.add(new StringField("field", term, Field.Store.NO));
+      doc.add(new NumericDocValuesField("field", Long.parseLong(term)));
+      w.addDocument(doc);
+    }
+
+    if (VERBOSE) System.out.println("\nTEST: now optimize");
+    if (random().nextBoolean()) {
+      w.forceMerge(1);
+    }
+
+    if (VERBOSE) System.out.println("\nTEST: now done");
+    IndexReader r = DirectoryReader.open(w, true);
+
+    List<String> sortedTerms = new ArrayList<>(terms);
+    Collections.sort(sortedTerms);
+
+    if (VERBOSE) {
+      System.out.println("TEST: sorted terms:");
+      int idx = 0;
+      for(String term : sortedTerms) {
+        System.out.println(idx + ": " + term);
+        idx++;
+      }
+    }
+
+    int iters = atLeast(100);
+    for(int iter=0;iter<iters;iter++) {
+      int min, max;
+      while (true) {
+        min = random().nextInt(maxValue);
+        max = random().nextInt(maxValue);
+        if (min == max) {
+          continue;
+        } else if (min > max) {
+          int x = min;
+          min = max;
+          max = x;
+        }
+        break;
+      }
+      
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter + " min=" + min + " max=" + max);
+      }
+
+      boolean minInclusive = random().nextBoolean();
+      boolean maxInclusive = random().nextBoolean();
+      BytesRef minTerm = new BytesRef(String.format(Locale.ROOT, format, min));
+      BytesRef maxTerm = new BytesRef(String.format(Locale.ROOT, format, max));
+      CompiledAutomaton ca = new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive),
+                                                   true, false, Integer.MAX_VALUE, true);
+
+      TermsEnum te = ca.getTermsEnum(MultiFields.getTerms(r, "field"));
+      NumericDocValues docValues = MultiDocValues.getNumericValues(r, "field");
+      PostingsEnum postingsEnum = null;
+
+      VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), minTerm, maxTerm);
+
+      while (te.next() != null) {
+        if (VERBOSE) {
+          System.out.println("  got term=" + te.term().utf8ToString());
+        }
+        verifier.sawTerm(te.term());
+        postingsEnum = te.postings(null, postingsEnum);
+        int docID;
+        while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
+          long v = docValues.get(docID);
+          assert v >= min && v <= max: "docID=" + docID + " v=" + v;
+          // The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
+          if (VERBOSE) {
+            System.out.println("    got docID=" + docID + " v=" + v);
+          }
+          verifier.sawDoc(docID);
+        }
+      }
+      
+      int startLoc = Collections.binarySearch(sortedTerms, String.format(Locale.ROOT, format, min));
+      if (startLoc < 0) {
+        startLoc = -startLoc-1;
+      } else if (minInclusive == false) {
+        startLoc++;
+      }
+      int endLoc = Collections.binarySearch(sortedTerms, String.format(Locale.ROOT, format, max));
+      if (endLoc < 0) {
+        endLoc = -endLoc-2;
+      } else if (maxInclusive == false) {
+        endLoc--;
+      }
+      verifier.finish(endLoc-startLoc+1, maxTermsAutoPrefix);
+    }
+
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  private static BytesRef intToBytes(int v) {
+    int sortableBits = v ^ 0x80000000;
+    BytesRef token = new BytesRef(4);
+    token.length = 4;
+    int index = 3;
+    while (index >= 0) {
+      token.bytes[index] = (byte) (sortableBits & 0xff);
+      index--;
+      sortableBits >>>= 8;
+    }
+    return token;
+  }
+
+  // Numbers are encoded in full binary (4 byte ints):
+  public void testBinaryNumericRanges() throws Exception {
+    if (VERBOSE) {
+      System.out.println("TEST: minItemsPerBlock=" + minItemsPerBlock);
+      System.out.println("TEST: maxItemsPerBlock=" + maxItemsPerBlock);
+      System.out.println("TEST: minTermsAutoPrefix=" + minTermsAutoPrefix);
+      System.out.println("TEST: maxTermsAutoPrefix=" + maxTermsAutoPrefix);
+    }
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    iwc.setCodec(codec);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    int numTerms = TestUtil.nextInt(random(), 3000, 50000);
+    Set<Integer> terms = new HashSet<>();
+    while (terms.size() < numTerms) {
+      terms.add(random().nextInt());
+    }
+
+    for(Integer term : terms) {
+      Document doc = new Document();
+      doc.add(new BinaryField("field", intToBytes(term)));
+      doc.add(new NumericDocValuesField("field", term));
+      w.addDocument(doc);
+    }
+
+    if (random().nextBoolean()) {
+      if (VERBOSE) System.out.println("TEST: now force merge");
+      w.forceMerge(1);
+    }
+
+    IndexReader r = DirectoryReader.open(w, true);
+
+    List<Integer> sortedTerms = new ArrayList<>(terms);
+    Collections.sort(sortedTerms);
+
+    if (VERBOSE) {
+      System.out.println("TEST: sorted terms:");
+      int idx = 0;
+      for(Integer term : sortedTerms) {
+        System.out.println(idx + ": " + term);
+        idx++;
+      }
+    }
+
+    int iters = atLeast(100);
+    for(int iter=0;iter<iters;iter++) {
+
+      int min, max;
+      while (true) {
+        min = random().nextInt();
+        max = random().nextInt();
+        if (min == max) {
+          continue;
+        } else if (min > max) {
+          int x = min;
+          min = max;
+          max = x;
+        }
+        break;
+      }
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter + " min=" + min + " (" + intToBytes(min) + ") max=" + max + " (" + intToBytes(max) + ")");
+      }
+      
+      boolean minInclusive = random().nextBoolean();
+      BytesRef minTerm = intToBytes(min);
+      boolean maxInclusive = random().nextBoolean();
+      BytesRef maxTerm = intToBytes(max);
+      CompiledAutomaton ca = new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive),
+                                                   true, false, Integer.MAX_VALUE, true);
+
+      TermsEnum te = ca.getTermsEnum(MultiFields.getTerms(r, "field"));
+      NumericDocValues docValues = MultiDocValues.getNumericValues(r, "field");
+      PostingsEnum postingsEnum = null;
+      VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), minTerm, maxTerm);
+      while (te.next() != null) {
+        if (VERBOSE) {
+          System.out.println("  got term=" + te.term() + " docFreq=" + te.docFreq());
+        }
+        verifier.sawTerm(te.term());        
+        postingsEnum = te.postings(null, postingsEnum);
+        int docID;
+        while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
+          long v = docValues.get(docID);
+          assert v >= min && v <= max: "docID=" + docID + " v=" + v;
+          verifier.sawDoc(docID);
+        }
+      }
+      
+      int startLoc = Collections.binarySearch(sortedTerms, min);
+      if (startLoc < 0) {
+        startLoc = -startLoc-1;
+      } else if (minInclusive == false) {
+        startLoc++;
+      }
+      int endLoc = Collections.binarySearch(sortedTerms, max);
+      if (endLoc < 0) {
+        endLoc = -endLoc-2;
+      } else if (maxInclusive == false) {
+        endLoc--;
+      }
+      int expectedHits = endLoc-startLoc+1;
+      try {
+        verifier.finish(expectedHits, maxTermsAutoPrefix);
+      } catch (AssertionError ae) {
+        for(int i=0;i<numTerms;i++) {
+          if (verifier.allHits.get(i) == false) {
+            int v = (int) docValues.get(i);
+            boolean accept = (v > min || (v == min && minInclusive)) &&
+              (v < max || (v == max && maxInclusive));
+            if (accept) {
+              System.out.println("MISSING: docID=" + i + " v=" + v + " term=" + intToBytes(v));
+            }
+          }
+        }
+
+        throw ae;
+      }
+    }
+
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  // Non-numeric, simple prefix query
+  public void testBasicPrefixTerms() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    iwc.setCodec(codec);
+    iwc.setMergeScheduler(new SerialMergeScheduler());
+    IndexWriter w = new IndexWriter(dir, iwc);
+    int numTerms = TestUtil.nextInt(random(), 3000, 50000);
+    Set<String> terms = new HashSet<>();
+    while (terms.size() < numTerms) {
+      terms.add(TestUtil.randomSimpleString(random()));
+    }
+
+    for(String term : terms) {
+      Document doc = new Document();
+      doc.add(new StringField("field", term, Field.Store.NO));
+      doc.add(new BinaryDocValuesField("field", new BytesRef(term)));
+      w.addDocument(doc);
+    }
+
+    if (random().nextBoolean()) {
+      if (VERBOSE) {
+        System.out.println("TEST: now force merge");
+      }
+      w.forceMerge(1);
+    }
+
+    IndexReader r = DirectoryReader.open(w, true);
+
+    List<String> sortedTerms = new ArrayList<>(terms);
+    Collections.sort(sortedTerms);
+
+    if (VERBOSE) {
+      System.out.println("TEST: sorted terms:");
+      int idx = 0;
+      for(String term : sortedTerms) {
+        System.out.println(idx + ": " + term);
+        idx++;
+      }
+    }
+
+    if (VERBOSE) {
+      System.out.println("TEST: r=" + r);
+    }
+
+    int iters = atLeast(100);
+    for(int iter=0;iter<iters;iter++) {
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter);
+      }
+
+      String prefix;
+      if (random().nextInt(100) == 42) {
+        prefix = "";
+      } else {
+        prefix = TestUtil.randomSimpleString(random(), 1, 4);
+      }
+      BytesRef prefixBR = new BytesRef(prefix);
+      if (VERBOSE) {
+        System.out.println("  prefix=" + prefix);
+      }
+
+      CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(prefixBR), true, false, Integer.MAX_VALUE, true);
+      TermsEnum te = ca.getTermsEnum(MultiFields.getTerms(r, "field"));
+      BinaryDocValues docValues = MultiDocValues.getBinaryValues(r, "field");
+      PostingsEnum postingsEnum = null;
+
+      VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), prefixBR);
+
+      while (te.next() != null) {
+        if (VERBOSE) {
+          System.out.println("TEST: got term=" + te.term().utf8ToString() + " docFreq=" + te.docFreq());
+        }
+        verifier.sawTerm(te.term());        
+        postingsEnum = te.postings(null, postingsEnum);
+        int docID;
+        while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
+          assertTrue("prefixBR=" + prefixBR + " docBR=" + docValues.get(docID), StringHelper.startsWith(docValues.get(docID), prefixBR));
+          // The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
+          verifier.sawDoc(docID);
+        }
+      }
+      
+      int startLoc = Collections.binarySearch(sortedTerms, prefix);
+      if (startLoc < 0) {
+        startLoc = -startLoc-1;
+      }
+      int endLoc = Collections.binarySearch(sortedTerms, prefix + (char) ('z'+1));
+      if (endLoc < 0) {
+        endLoc = -endLoc-2;
+      }
+      int expectedHits = endLoc-startLoc+1;
+      try {
+        verifier.finish(expectedHits, maxTermsAutoPrefix);
+      } catch (AssertionError ae) {
+        for(int i=0;i<numTerms;i++) {
+          if (verifier.allHits.get(i) == false) {
+            String s = docValues.get(i).utf8ToString();
+            if (s.startsWith(prefix)) {
+              System.out.println("MISSING: docID=" + i + " term=" + s);
+            }
+          }
+        }
+
+        throw ae;
+      }
+    }
+
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testDemoPrefixTerms() throws Exception {
+    if (VERBOSE) {
+      System.out.println("\nTEST: minTermsAutoPrefix=" + minTermsAutoPrefix + " maxTermsAutoPrefix=" + maxTermsAutoPrefix);
+      System.out.println("\nTEST: minItemsPerBlock=" + minItemsPerBlock + " maxItemsPerBlock=" + maxItemsPerBlock);
+    }
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    iwc.setCodec(codec);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    int numDocs = 30;
+
+    for(int i=0;i<numDocs;i++) {
+      Document doc = new Document();
+      doc.add(new StringField("field", "" + (char) (97+i), Field.Store.NO));
+      w.addDocument(doc);
+      doc = new Document();
+      doc.add(new StringField("field", "a" + (char) (97+i), Field.Store.NO));
+      w.addDocument(doc);
+    }
+
+    if (random().nextBoolean()) {
+      w.forceMerge(1);
+    }
+
+    IndexReader r = DirectoryReader.open(w, true);
+    Terms terms = MultiFields.getTerms(r, "field");
+    if (VERBOSE) {
+      System.out.println("\nTEST: now intersect");
+    }
+    CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(new BytesRef("a")), false, false, Integer.MAX_VALUE, true);
+    TermsEnum te = ca.getTermsEnum(terms);
+    PostingsEnum postingsEnum = null;
+
+    VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), new BytesRef("a"));
+    //TermsEnum te = terms.intersect(new CompiledAutomaton(a, true, false), null);
+    while (te.next() != null) {
+      verifier.sawTerm(te.term());
+      postingsEnum = te.postings(null, postingsEnum);
+      int docID;
+      while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
+        // The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
+        verifier.sawDoc(docID);
+      }
+    }
+    // 1 document has exactly "a", and 30 documents had "a?"
+    verifier.finish(31, maxTermsAutoPrefix);
+    PrefixQuery q = new PrefixQuery(new Term("field", "a"));
+    q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
+    assertEquals(31, newSearcher(r).search(q, 1).totalHits);
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  static final class BinaryTokenStream extends TokenStream {
+    private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
+    private boolean available = true;
+  
+    public BinaryTokenStream(BytesRef bytes) {
+      bytesAtt.setBytesRef(bytes);
+    }
+  
+    @Override
+    public boolean incrementToken() {
+      if (available) {
+        clearAttributes();
+        available = false;
+        return true;
+      }
+      return false;
+    }
+  
+    @Override
+    public void reset() {
+      available = true;
+    }
+  
+    public interface ByteTermAttribute extends TermToBytesRefAttribute {
+      void setBytesRef(BytesRef bytes);
+    }
+  
+    public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
+      private BytesRef bytes;
+    
+      @Override
+      public void fillBytesRef() {
+        // no-op: the bytes was already filled by our owner's incrementToken
+      }
+    
+      @Override
+      public BytesRef getBytesRef() {
+        return bytes;
+      }
+
+      @Override
+      public void setBytesRef(BytesRef bytes) {
+        this.bytes = bytes;
+      }
+    
+      @Override
+      public void clear() {}
+    
+      @Override
+      public void copyTo(AttributeImpl target) {
+        ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
+        other.bytes = bytes;
+      }
+    }
+  }
+
+  /** Basically a StringField that accepts binary term. */
+  private static class BinaryField extends Field {
+
+    final static FieldType TYPE;
+    static {
+      TYPE = new FieldType(StringField.TYPE_NOT_STORED);
+      // Necessary so our custom tokenStream is used by Field.tokenStream:
+      TYPE.setTokenized(true);
+      TYPE.freeze();
+    }
+
+    public BinaryField(String name, BytesRef value) {
+      super(name, new BinaryTokenStream(value), TYPE);
+    }
+  }
+
+  /** Helper class to ensure auto-prefix terms 1) never overlap one another, and 2) are used when they should be. */
+  private static class VerifyAutoPrefixTerms {
+    final FixedBitSet allHits;
+    private final Map<BytesRef,Integer> prefixCounts = new HashMap<>();
+    private int totPrefixCount;
+    private final BytesRef[] bounds;
+    private int totTermCount;
+    private BytesRef lastTerm;
+
+    public VerifyAutoPrefixTerms(int maxDoc, BytesRef... bounds) {
+      allHits = new FixedBitSet(maxDoc);
+      assert bounds.length > 0;
+      this.bounds = bounds;
+    }
+
+    public void sawTerm(BytesRef term) {
+      //System.out.println("saw term=" + term);
+      if (lastTerm != null) {
+        assertTrue(lastTerm.compareTo(term) < 0);
+      }
+      lastTerm = BytesRef.deepCopyOf(term);
+      totTermCount++;
+      totPrefixCount += term.length;
+      for(int i=1;i<=term.length;i++) {
+        BytesRef prefix = BytesRef.deepCopyOf(term);
+        prefix.length = i;
+        Integer count = prefixCounts.get(prefix);
+        if (count == null) {
+          count = 1;
+        } else {
+          count += 1;
+        }
+        prefixCounts.put(prefix, count);
+      }
+    }
+
+    public void sawDoc(int docID) {
+      // The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
+      assertFalse(allHits.getAndSet(docID));
+    }
+
+    public void finish(int expectedNumHits, int maxPrefixCount) {
+
+      if (maxPrefixCount != -1) {
+        // Auto-terms were used in this test
+        long allowedMaxTerms;
+
+        if (bounds.length == 1) {
+          // Simple prefix query: we should never see more than maxPrefixCount terms:
+          allowedMaxTerms = maxPrefixCount;
+        } else {
+          // Trickier: we need to allow for maxPrefixTerms for each different leading byte in the min and max:
+          assert bounds.length == 2;
+          BytesRef minTerm = bounds[0];
+          BytesRef maxTerm = bounds[1];
+
+          int commonPrefix = 0;
+          for(int i=0;i<minTerm.length && i<maxTerm.length;i++) {
+            if (minTerm.bytes[minTerm.offset+i] != maxTerm.bytes[maxTerm.offset+i]) {
+              commonPrefix = i;
+              break;
+            }
+          }
+
+          allowedMaxTerms = maxPrefixCount * (long) ((minTerm.length-commonPrefix) + (maxTerm.length-commonPrefix));
+        }
+
+        assertTrue("totTermCount=" + totTermCount + " is > allowedMaxTerms=" + allowedMaxTerms, totTermCount <= allowedMaxTerms);
+      }
+
+      assertEquals(expectedNumHits, allHits.cardinality());
+      int sum = 0;
+      for(Map.Entry<BytesRef,Integer> ent : prefixCounts.entrySet()) {
+
+        BytesRef prefix = ent.getKey();
+        if (VERBOSE) {
+          System.out.println("  verify prefix=" + TestUtil.bytesRefToString(prefix) + " count=" + ent.getValue());
+        }
+
+        if (maxPrefixCount != -1) {
+          // Auto-terms were used in this test
+
+          int sumLeftoverSuffix = 0;
+          for(BytesRef bound : bounds) {
+
+            int minSharedLength = Math.min(bound.length, prefix.length);
+            int commonPrefix = minSharedLength;
+            for(int i=0;i<minSharedLength;i++) {
+              if (bound.bytes[bound.offset+i] != prefix.bytes[prefix.offset+i]) {
+                commonPrefix = i;
+                break;
+              }
+            }
+            sumLeftoverSuffix += bound.length - commonPrefix;
+          }
+
+          long limit = (1+sumLeftoverSuffix) * (long) maxPrefixCount;
+
+          assertTrue("maxPrefixCount=" + maxPrefixCount + " prefix=" + prefix + " sumLeftoverSuffix=" + sumLeftoverSuffix + " limit=" + limit + " vs actual=" +ent.getValue(),
+                     ent.getValue() <= limit);
+        }
+
+        sum += ent.getValue();
+      }
+
+      // Make sure no test bug:
+      assertEquals(totPrefixCount, sum);
+    }
+  }
+
+  /** Make sure you get clear exc. if you try to use this within anything but IndexOptions.DOCS fields. */
+  public void testWithFreqs() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    iwc.setCodec(codec);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
+    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+    Document doc = new Document();
+    doc.add(new Field("foo", "bar bar", ft));
+    w.addDocument(doc);
+    try {
+      w.commit();
+    } catch (IllegalStateException ise) {
+      assertEquals("ranges can only be indexed with IndexOptions.DOCS (field: foo)", ise.getMessage());
+    }
+    w.close();
+    dir.close();
+  }
+
+  /** Make sure you get clear exc. if you try to use this within anything but IndexOptions.DOCS fields. */
+  public void testWithPositions() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    iwc.setCodec(codec);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
+    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+    Document doc = new Document();
+    doc.add(new Field("foo", "bar bar", ft));
+    w.addDocument(doc);
+    try {
+      w.commit();
+    } catch (IllegalStateException ise) {
+      assertEquals("ranges can only be indexed with IndexOptions.DOCS (field: foo)", ise.getMessage());
+    }
+    w.close();
+    dir.close();
+  }
+
+  /** Make sure you get clear exc. if you try to use this within anything but IndexOptions.DOCS fields. */
+  public void testWithOffsets() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    iwc.setCodec(codec);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
+    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Document doc = new Document();
+    doc.add(new Field("foo", "bar bar", ft));
+    w.addDocument(doc);
+    try {
+      w.commit();
+    } catch (IllegalStateException ise) {
+      assertEquals("ranges can only be indexed with IndexOptions.DOCS (field: foo)", ise.getMessage());
+    }
+    w.close();
+    dir.close();
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
@ -16,6 +16,7 @@ package org.apache.lucene.codecs;
 * limitations under the License.
 */

+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; // javadocs
 import org.apache.lucene.index.OrdTermState;
 import org.apache.lucene.index.TermState;

@ -23,6 +24,8 @@ import org.apache.lucene.index.TermState;
 * Holds all state required for {@link PostingsReaderBase}
 * to produce a {@link org.apache.lucene.index.PostingsEnum} without re-seeking the
 * terms dict.
+ *
+ * @lucene.internal
 */
 public class BlockTermState extends OrdTermState {
  /** how many docs have this term */
@ -36,6 +39,11 @@ public class BlockTermState extends OrdTermState {
  // TODO: update BTR to nuke this
  public long blockFilePointer;

+  /** True if this term is "real" (e.g., not an auto-prefix term or
+   *  some other "secret" term; currently only {@link BlockTreeTermsReader}
+   *  sets this). */
+  public boolean isRealTerm;
+
  /** Sole constructor. (For invocation by subclass 
   *  constructors, typically implicit.) */
  protected BlockTermState() {
@ -50,10 +58,11 @@ public class BlockTermState extends OrdTermState {
    totalTermFreq = other.totalTermFreq;
    termBlockOrd = other.termBlockOrd;
    blockFilePointer = other.blockFilePointer;
+    isRealTerm = other.isRealTerm;
  }

  @Override
  public String toString() {
-    return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer;
+    return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer + " isRealTerm=" + isRealTerm;
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java
@ -62,6 +62,7 @@ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
   * @param name must be all ascii alphanumeric, and less than 128 characters in length.
   */
  protected PostingsFormat(String name) {
+    // TODO: can we somehow detect name conflicts here?  Two different classes trying to claim the same name?  Otherwise you see confusing errors...
    NamedSPILoader.checkServiceName(name);
    this.name = name;
  }
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java
@ -0,0 +1,415 @@
+package org.apache.lucene.codecs.blocktree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.index.FilteredTermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.StringHelper;
+
+// TODO: instead of inlining auto-prefix terms with normal terms,
+// we could write them into their own virtual/private field.  This
+// would make search time a bit more complex, since we'd need to
+// merge sort between two TermEnums, but it would also make stats
+// API (used by CheckIndex -verbose) easier to implement since we could
+// just walk this virtual field and gather its stats)
+
+/** Used in the first pass when writing a segment to locate
+ *  "appropriate" auto-prefix terms to pre-compile into the index.
+ *  This visits every term in the index to find prefixes that
+ *  match >= min and <= max number of terms. */
+
+class AutoPrefixTermsWriter {
+
+  //static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+  //static boolean DEBUG = false;
+  //static boolean DEBUG2 = BlockTreeTermsWriter.DEBUG2;
+  //static boolean DEBUG2 = true;
+
+  /** Describes a range of term-space to match, either a simple prefix
+   *  (foo*) or a floor-block range of a prefix (e.g. foo[a-m]*,
+   *  foo[n-z]*) when there are too many terms starting with foo*. */
+  public static final class PrefixTerm implements Comparable<PrefixTerm> {
+    /** Common prefix */
+    public final byte[] prefix;
+
+    /** If this is -2, this is a normal prefix (foo *), else it's the minimum lead byte of the suffix (e.g. 'd' in foo[d-m]*). */
+    public final int floorLeadStart;
+
+    /** The lead byte (inclusive) of the suffix for the term range we match (e.g. 'm' in foo[d-m*]); this is ignored when
+     *  floorLeadStart is -2. */
+    public final int floorLeadEnd;
+
+    public final BytesRef term;
+
+    /** Sole constructor. */
+    public PrefixTerm(byte[] prefix, int floorLeadStart, int floorLeadEnd) {
+      this.prefix = prefix;
+      this.floorLeadStart = floorLeadStart;
+      this.floorLeadEnd = floorLeadEnd;
+      this.term = toBytesRef(prefix, floorLeadStart);
+
+      assert floorLeadEnd >= floorLeadStart;
+      assert floorLeadEnd >= 0;
+      assert floorLeadStart == -2 || floorLeadStart >= 0;
+
+      // We should never create empty-string prefix term:
+      assert prefix.length > 0 || floorLeadStart != -2 || floorLeadEnd != 0xff;
+    }
+
+    @Override
+    public String toString() {
+      String s = brToString(new BytesRef(prefix));
+      if (floorLeadStart == -2) {
+        s += "[-" + Integer.toHexString(floorLeadEnd) + "]";
+      } else {
+        s += "[" + Integer.toHexString(floorLeadStart) + "-" + Integer.toHexString(floorLeadEnd) + "]";
+      }
+      return s;
+    }
+
+    @Override
+    public int compareTo(PrefixTerm other) {
+      int cmp = term.compareTo(other.term);
+      if (cmp == 0) {
+        if (prefix.length != other.prefix.length) {
+          return prefix.length - other.prefix.length;
+        }
+
+        // On tie, sort the bigger floorLeadEnd, earlier, since it
+        // spans more terms, so during intersect, we want to encounter this one
+        // first so we can use it if the automaton accepts the larger range:
+        cmp = other.floorLeadEnd - floorLeadEnd;
+      }
+
+      return cmp;
+    }
+
+    /** Returns the leading term for this prefix term, e.g. "foo" (for
+     *  the foo* prefix) or "foom" (for the foo[m-z]* case). */
+    private static BytesRef toBytesRef(byte[] prefix, int floorLeadStart) {
+      BytesRef br;
+      if (floorLeadStart != -2) {
+        assert floorLeadStart >= 0;
+        br = new BytesRef(prefix.length+1);
+      } else {
+        br = new BytesRef(prefix.length);
+      }
+      System.arraycopy(prefix, 0, br.bytes, 0, prefix.length);
+      br.length = prefix.length;
+      if (floorLeadStart != -2) {
+        assert floorLeadStart >= 0;
+        br.bytes[br.length++] = (byte) floorLeadStart;
+      }
+
+      return br;
+    }
+
+    public int compareTo(BytesRef term) {
+      return this.term.compareTo(term);
+    }
+
+    public TermsEnum getTermsEnum(TermsEnum in) {
+
+      final BytesRef prefixRef = new BytesRef(prefix);
+
+      return new FilteredTermsEnum(in) {
+          {
+            setInitialSeekTerm(term);
+          }
+
+          @Override
+          protected AcceptStatus accept(BytesRef term) {
+            if (StringHelper.startsWith(term, prefixRef) &&
+                (floorLeadEnd == -1 || term.length == prefixRef.length || (term.bytes[term.offset + prefixRef.length] & 0xff) <= floorLeadEnd)) {
+              return AcceptStatus.YES;
+            } else {
+              return AcceptStatus.END;
+            }
+          }
+        };
+    }
+  }
+
+  // for debugging
+  static String brToString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      // If BytesRef isn't actually UTF8, or it's eg a
+      // prefix of UTF8 that ends mid-unicode-char, we
+      // fallback to hex:
+      return b.toString();
+    }
+  }
+
+  final List<PrefixTerm> prefixes = new ArrayList<>();
+  private final int minItemsInPrefix;
+  private final int maxItemsInPrefix;
+
+  // Records index into pending where the current prefix at that
+  // length "started"; for example, if current term starts with 't',
+  // startsByPrefix[0] is the index into pending for the first
+  // term/sub-block starting with 't'.  We use this to figure out when
+  // to write a new block:
+  private final BytesRefBuilder lastTerm = new BytesRefBuilder();
+  private int[] prefixStarts = new int[8];
+  private List<Object> pending = new ArrayList<>();
+
+  //private final String segment;
+
+  public AutoPrefixTermsWriter(Terms terms, int minItemsInPrefix, int maxItemsInPrefix) throws IOException {
+    this.minItemsInPrefix = minItemsInPrefix;
+    this.maxItemsInPrefix = maxItemsInPrefix;
+    //this.segment = segment;
+
+    TermsEnum termsEnum = terms.iterator(null);
+    while (true) {
+      BytesRef term = termsEnum.next();
+      if (term == null) {
+        break;
+      }
+      //if (DEBUG) System.out.println("pushTerm: " + brToString(term));
+      pushTerm(term);
+    }
+
+    if (pending.size() > 1) {
+      pushTerm(BlockTreeTermsWriter.EMPTY_BYTES_REF);
+
+      // Also maybe save floor prefixes in root block; this can be a biggish perf gain for large ranges:
+      /*
+      System.out.println("root block pending.size=" + pending.size());
+      for(Object o : pending) {
+        System.out.println("  " + o);
+      }
+      */
+      while (pending.size() >= minItemsInPrefix) {
+        savePrefixes(0, pending.size());
+      }
+    }
+
+    Collections.sort(prefixes);
+  }
+
+  /** Pushes the new term to the top of the stack, and writes new blocks. */
+  private void pushTerm(BytesRef text) throws IOException {
+    int limit = Math.min(lastTerm.length(), text.length);
+
+    // Find common prefix between last term and current term:
+    int pos = 0;
+    while (pos < limit && lastTerm.byteAt(pos) == text.bytes[text.offset+pos]) {
+      pos++;
+    }
+
+    //if (DEBUG) System.out.println("  shared=" + pos + "  lastTerm.length=" + lastTerm.length());
+
+    // Close the "abandoned" suffix now:
+    for(int i=lastTerm.length()-1;i>=pos;i--) {
+
+      // How many items on top of the stack share the current suffix
+      // we are closing:
+      int prefixTopSize = pending.size() - prefixStarts[i];
+
+      while (prefixTopSize >= minItemsInPrefix) {       
+        //if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " minItemsInBlock=" + minItemsInPrefix);
+        savePrefixes(i+1, prefixTopSize);
+        //prefixStarts[i] -= prefixTopSize;
+        //System.out.println("    after savePrefixes: " + (pending.size() - prefixStarts[i]) + " pending.size()=" + pending.size() + " start=" + prefixStarts[i]);
+
+        // For large floor blocks, it's possible we should now re-run on the new prefix terms we just created:
+        prefixTopSize = pending.size() - prefixStarts[i];
+      }
+    }
+
+    if (prefixStarts.length < text.length) {
+      prefixStarts = ArrayUtil.grow(prefixStarts, text.length);
+    }
+
+    // Init new tail:
+    for(int i=pos;i<text.length;i++) {
+      prefixStarts[i] = pending.size();
+    }
+
+    lastTerm.copyBytes(text);
+
+    // Only append the first (optional) empty string, no the fake last one used to close all prefixes:
+    if (text.length > 0 || pending.isEmpty()) {
+      byte[] termBytes = new byte[text.length];
+      System.arraycopy(text.bytes, text.offset, termBytes, 0, text.length);
+      pending.add(termBytes);
+    }
+  }
+  
+  void savePrefixes(int prefixLength, int count) throws IOException {
+
+    assert count > 0;
+
+    //if (DEBUG2) {
+    //  BytesRef br = new BytesRef(lastTerm.bytes());
+    //  br.length = prefixLength;
+    //  System.out.println("  savePrefixes: seg=" + segment + " " + brToString(br) + " count=" + count + " pending.size()=" + pending.size());
+    //}
+
+    int lastSuffixLeadLabel = -2;
+
+    int start = pending.size()-count;
+    assert start >=0;
+
+    int end = pending.size();
+    int nextBlockStart = start;
+    int nextFloorLeadLabel = -1;
+    int prefixCount = 0;
+    int pendingCount = 0;
+    PrefixTerm lastPTEntry = null;
+    for (int i=start; i<end; i++) {
+
+      byte[] termBytes;
+      Object o = pending.get(i);
+      PrefixTerm ptEntry;
+      if (o instanceof byte[]) {
+        ptEntry = null;
+        termBytes = (byte[]) o;
+      } else {
+        ptEntry = (PrefixTerm) o;
+        termBytes = ptEntry.term.bytes;
+        if (ptEntry.prefix.length != prefixLength) {
+          assert ptEntry.prefix.length > prefixLength;
+          ptEntry = null;
+        }
+      }
+      pendingCount++;
+
+      //if (DEBUG) System.out.println("    check term=" + brToString(new BytesRef(termBytes)));
+
+      int suffixLeadLabel;
+
+      if (termBytes.length == prefixLength) {
+        // Suffix is 0, i.e. prefix 'foo' and term is
+        // 'foo' so the term has empty string suffix
+        // in this block
+        assert lastSuffixLeadLabel == -2;
+        suffixLeadLabel = -2;
+      } else {
+        suffixLeadLabel = termBytes[prefixLength] & 0xff;
+      }
+
+      // if (DEBUG) System.out.println("  i=" + i + " ent=" + ent + " suffixLeadLabel=" + suffixLeadLabel);
+
+      if (suffixLeadLabel != lastSuffixLeadLabel) {
+        // This is a boundary, a chance to make an auto-prefix term if we want:
+
+        // When we are "recursing" (generating auto-prefix terms on a block of
+        // floor'd auto-prefix terms), this assert is non-trivial because it
+        // ensures the floorLeadEnd of the previous terms is in fact less
+        // than the lead start of the current entry:
+        assert suffixLeadLabel > lastSuffixLeadLabel: "suffixLeadLabel=" + suffixLeadLabel + " vs lastSuffixLeadLabel=" + lastSuffixLeadLabel;
+
+        // NOTE: must check nextFloorLeadLabel in case minItemsInPrefix is 2 and prefix is 'a' and we've seen 'a' and then 'aa'
+        if (pendingCount >= minItemsInPrefix && end-nextBlockStart > maxItemsInPrefix && nextFloorLeadLabel != -1) {
+          // The count is too large for one block, so we must break it into "floor" blocks, where we record
+          // the leading label of the suffix of the first term in each floor block, so at search time we can
+          // jump to the right floor block.  We just use a naive greedy segmenter here: make a new floor
+          // block as soon as we have at least minItemsInBlock.  This is not always best: it often produces
+          // a too-small block as the final block:
+
+          // If the last entry was another prefix term of the same length, then it represents a range of terms, so we must use its ending
+          // prefix label as our ending label:
+          if (lastPTEntry != null) {
+            lastSuffixLeadLabel = lastPTEntry.floorLeadEnd;
+          }
+
+          savePrefix(prefixLength, nextFloorLeadLabel, lastSuffixLeadLabel);
+          pendingCount = 0;
+
+          prefixCount++;
+          nextFloorLeadLabel = suffixLeadLabel;
+          nextBlockStart = i;
+        }
+
+        if (nextFloorLeadLabel == -1) {
+          nextFloorLeadLabel = suffixLeadLabel;
+          //if (DEBUG) System.out.println("set first lead label=" + nextFloorLeadLabel);
+        }
+
+        lastSuffixLeadLabel = suffixLeadLabel;
+      }
+      lastPTEntry = ptEntry;
+    }
+
+    // Write last block, if any:
+    if (nextBlockStart < end) {
+      //System.out.println("  lastPTEntry=" + lastPTEntry + " lastSuffixLeadLabel=" + lastSuffixLeadLabel);
+      if (lastPTEntry != null) {
+        lastSuffixLeadLabel = lastPTEntry.floorLeadEnd;
+      }
+      assert lastSuffixLeadLabel >= nextFloorLeadLabel: "lastSuffixLeadLabel=" + lastSuffixLeadLabel + " nextFloorLeadLabel=" + nextFloorLeadLabel;
+      if (prefixCount == 0) {
+        if (prefixLength > 0) {
+          savePrefix(prefixLength, -2, 0xff);
+          prefixCount++;
+        } else {
+          // Don't add a prefix term for all terms in the index!
+        }
+      } else {
+        if (lastSuffixLeadLabel == -2) {
+          // Special case when closing the empty string root block:
+          lastSuffixLeadLabel = 0xff;
+        }
+        savePrefix(prefixLength, nextFloorLeadLabel, lastSuffixLeadLabel);
+        prefixCount++;
+      }
+    }
+
+    // Remove slice from the top of the pending stack, that we just wrote:
+    int sizeToClear = count;
+    if (prefixCount > 1) {
+      Object o = pending.get(pending.size()-count);
+      if (o instanceof byte[] && ((byte[]) o).length == prefixLength) {
+        // If we were just asked to write all f* terms, but there were too many and so we made floor blocks, the exact term 'f' will remain
+        // as its own item, followed by floor block terms like f[a-m]*, f[n-z]*, so in this case we leave 3 (not 2) items on the pending stack:
+        sizeToClear--;
+      }
+    }
+    pending.subList(pending.size()-sizeToClear, pending.size()).clear();
+
+    // Append prefix terms for each prefix, since these count like real terms that also need to be "rolled up":
+    for(int i=0;i<prefixCount;i++) {
+      PrefixTerm pt = prefixes.get(prefixes.size()-(prefixCount-i));
+      pending.add(pt);
+    }
+  }
+
+  private void savePrefix(int prefixLength, int floorLeadStart, int floorLeadEnd) {
+    byte[] prefix = new byte[prefixLength];
+    System.arraycopy(lastTerm.bytes(), 0, prefix, 0, prefixLength);
+    assert floorLeadStart != -1;
+    assert floorLeadEnd != -1;
+
+    PrefixTerm pt = new PrefixTerm(prefix, floorLeadStart, floorLeadEnd); 
+    //if (DEBUG2) System.out.println("    savePrefix: seg=" + segment + " " + pt + " count=" + count);
+    prefixes.add(pt);
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetPostingsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetPostingsEnum.java
@ -0,0 +1,95 @@
+package org.apache.lucene.codecs.blocktree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.BitSetIterator;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet; // javadocs
+
+/** Takes a {@link FixedBitSet} and creates a DOCS {@link PostingsEnum} from it. */
+
+class BitSetPostingsEnum extends PostingsEnum {
+  private final BitSet bits;
+  private DocIdSetIterator in;
+  
+  BitSetPostingsEnum(BitSet bits) {
+    this.bits = bits;
+    reset();
+  }
+
+  @Override
+  public int freq() throws IOException {
+    return 1;
+  }
+
+  @Override
+  public int docID() {
+    if (in == null) {
+      return -1;
+    } else {
+      return in.docID();
+    }
+  }
+
+  @Override
+  public int nextDoc() throws IOException {
+    if (in == null) {
+      in = new BitSetIterator(bits, 0);
+    }
+    return in.nextDoc();
+  }
+
+  @Override
+  public int advance(int target) throws IOException {
+    return in.advance(target);
+  }
+
+  @Override
+  public long cost() {
+    return in.cost();
+  }
+  
+  void reset() {
+    in = null;
+  }
+
+  @Override
+  public BytesRef getPayload() {
+    return null;
+  }
+
+  @Override
+  public int nextPosition() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public int startOffset() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public int endOffset() {
+    throw new UnsupportedOperationException();
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java
@ -0,0 +1,87 @@
+package org.apache.lucene.codecs.blocktree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+/** Silly stub class, used only when writing an auto-prefix
+ *  term in order to expose DocsEnum over a FixedBitSet.  We
+ *  pass this to {@link PostingsWriterBase#writeTerm} so 
+ *  that it can pull .docs() multiple times for the
+ *  current term. */
+
+class BitSetTermsEnum extends TermsEnum {
+  private final BitSetPostingsEnum postingsEnum;
+
+  public BitSetTermsEnum(BitSet docs) {
+    postingsEnum = new BitSetPostingsEnum(docs);
+  }
+
+  @Override
+  public SeekStatus seekCeil(BytesRef text) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void seekExact(long ord) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public BytesRef term() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public BytesRef next() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long ord() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public int docFreq() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long totalTermFreq() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) {
+    if (flags != PostingsEnum.NONE) {
+      // We only work with DOCS_ONLY fields
+      return null;
+    }
+    if (liveDocs != null) {
+      throw new IllegalArgumentException("cannot handle live docs");
+    }
+    postingsEnum.reset();
+    return postingsEnum;
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
@ -34,6 +34,8 @@ import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.PrefixQuery;  // javadocs
+import org.apache.lucene.search.TermRangeQuery;  // javadocs
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.Accountables;
@ -57,6 +59,14 @@ import org.apache.lucene.util.fst.Outputs;
 *  min/maxItemsPerBlock during indexing to control how
 *  much memory the terms index uses.</p>
 *
+ *  <p>If auto-prefix terms were indexed (see
+ *  {@link BlockTreeTermsWriter}), then the {@link Terms#intersect}
+ *  implementation here will make use of these terms only if the
+ *  automaton has a binary sink state, i.e. an accept state
+ *  which has a transition to itself accepting all byte values.
+ *  For example, both {@link PrefixQuery} and {@link TermRangeQuery}
+ *  pass such automata to {@link Terms#intersect}.</p>
+ *
 *  <p>The data structure used by this implementation is very
 *  similar to a burst trie
 *  (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499),
@ -90,8 +100,11 @@ public final class BlockTreeTermsReader extends FieldsProducer {
  /** Initial terms format. */
  public static final int VERSION_START = 0;

+  /** Auto-prefix terms. */
+  public static final int VERSION_AUTO_PREFIX_TERMS = 1;
+
  /** Current terms format. */
-  public static final int VERSION_CURRENT = VERSION_START;
+  public static final int VERSION_CURRENT = VERSION_AUTO_PREFIX_TERMS;

  /** Extension of terms index file */
  static final String TERMS_INDEX_EXTENSION = "tip";
@ -116,7 +129,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {

  final String segment;
  
-  private final int version;
+  final int version;

  /** Sole constructor. */
  public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
@ -25,11 +25,13 @@ import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.AutoPrefixTermsWriter.PrefixTerm;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@ -87,6 +89,16 @@ import org.apache.lucene.util.packed.PackedInts;
 * stride) each term's metadata for each set of terms
 * between two index terms.
 * <p>
+ *
+ * If {@code minItemsInAutoPrefix} is not zero, then for
+ * {@link IndexOptions#DOCS} fields we detect prefixes that match
+ * "enough" terms and insert auto-prefix terms into the index, which are
+ * used by {@link Terms#intersect}  at search time to speed up prefix
+ * and range queries.  Besides {@link Terms#intersect}, these
+ * auto-prefix terms are invisible to all other APIs (don't change terms
+ * stats, don't show up in normal {@link TermsEnum}s, etc.).
+ * <p>
+ *
 * Files:
 * <ul>
 *   <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
@ -200,7 +212,9 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
   *  #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
  public final static int DEFAULT_MAX_BLOCK_SIZE = 48;

-  // public final static boolean DEBUG = false;
+  //public static boolean DEBUG = false;
+  //public static boolean DEBUG2 = false;
+
  //private final static boolean SAVE_DOT_FILES = false;

  private final IndexOutput termsOut;
@ -208,6 +222,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
  final int maxDoc;
  final int minItemsInBlock;
  final int maxItemsInBlock;
+  final int minItemsInAutoPrefix;
+  final int maxItemsInAutoPrefix;

  final PostingsWriterBase postingsWriter;
  final FieldInfos fieldInfos;
@ -244,23 +260,67 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
  private final List<FieldMetaData> fields = new ArrayList<>();

  // private final String segment;
+  final FixedBitSet prefixDocs;
+
+  /** Reused in getAutoPrefixTermsEnum: */
+  final BitSetTermsEnum prefixFixedBitsTermsEnum;
+
+  /** Reused in getAutoPrefixTermsEnum: */
+  private TermsEnum prefixTermsEnum;
+
+  /** Reused in getAutoPrefixTermsEnum: */
+  private PostingsEnum prefixDocsEnum;
+
+  /** Create a new writer, using default values for auto-prefix terms. */
+  public BlockTreeTermsWriter(SegmentWriteState state,
+                              PostingsWriterBase postingsWriter,
+                              int minItemsInBlock,
+                              int maxItemsInBlock) throws IOException {
+    this(state, postingsWriter, minItemsInBlock, maxItemsInBlock, 0, 0);
+  }

  /** Create a new writer.  The number of items (terms or
   *  sub-blocks) per block will aim to be between
   *  minItemsPerBlock and maxItemsPerBlock, though in some
-   *  cases the blocks may be smaller than the min. */
+   *  cases the blocks may be smaller than the min.
+   *  For DOCS_ONLY fields, this terms dictionary will
+   *  insert automatically generated prefix terms for common
+   *  prefixes, as long as each prefix matches at least
+   *  {@code minItemsInAutoPrefix} other terms or prefixes,
+   *  and at most {@code maxItemsInAutoPrefix} other terms
+   *  or prefixes.  Set {@code minItemsInAutoPrefix} to 0
+   *  to disable auto-prefix terms. */
  public BlockTreeTermsWriter(SegmentWriteState state,
                              PostingsWriterBase postingsWriter,
                              int minItemsInBlock,
-                              int maxItemsInBlock)
+                              int maxItemsInBlock,
+                              int minItemsInAutoPrefix,
+                              int maxItemsInAutoPrefix)
    throws IOException
  {
-    validateSettings(minItemsInBlock, maxItemsInBlock);
+    validateSettings(minItemsInBlock,
+                     maxItemsInBlock);
+
+    this.minItemsInBlock = minItemsInBlock;
+    this.maxItemsInBlock = maxItemsInBlock;
+
+    validateAutoPrefixSettings(minItemsInAutoPrefix,
+                               maxItemsInAutoPrefix);
+
+    if (minItemsInAutoPrefix != 0) {
+      // TODO: can we used compressed bitset instead?  that auto-upgrades if it's dense enough...
+      prefixDocs = new FixedBitSet(state.segmentInfo.maxDoc());
+      prefixFixedBitsTermsEnum = new BitSetTermsEnum(prefixDocs);
+    } else {
+      prefixDocs = null;
+      prefixFixedBitsTermsEnum = null;
+    }
+
+    this.minItemsInAutoPrefix = minItemsInAutoPrefix;
+    this.maxItemsInAutoPrefix = maxItemsInAutoPrefix;

    this.maxDoc = state.segmentInfo.maxDoc();
    this.fieldInfos = state.fieldInfos;
-    this.minItemsInBlock = minItemsInBlock;
-    this.maxItemsInBlock = maxItemsInBlock;
    this.postingsWriter = postingsWriter;

    final String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_EXTENSION);
@ -269,12 +329,13 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
    IndexOutput indexOut = null;
    try {
      CodecUtil.writeIndexHeader(termsOut, BlockTreeTermsReader.TERMS_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
-                                  state.segmentInfo.getId(), state.segmentSuffix);
+                                 state.segmentInfo.getId(), state.segmentSuffix);

      final String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_INDEX_EXTENSION);
      indexOut = state.directory.createOutput(indexName, state.context);
      CodecUtil.writeIndexHeader(indexOut, BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
-                                   state.segmentInfo.getId(), state.segmentSuffix);
+                                 state.segmentInfo.getId(), state.segmentSuffix);
+      //segment = state.segmentInfo.name;

      postingsWriter.init(termsOut, state);                          // have consumer write its format/header
      
@ -311,34 +372,109 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
    }
  }

+  /** Throws {@code IllegalArgumentException} if any of these settings
+   *  is invalid. */
+  public static void validateAutoPrefixSettings(int minItemsInAutoPrefix,
+                                                int maxItemsInAutoPrefix) {
+    if (minItemsInAutoPrefix != 0) {
+      if (minItemsInAutoPrefix < 2) {
+        throw new IllegalArgumentException("minItemsInAutoPrefix must be at least 2; got minItemsInAutoPrefix=" + minItemsInAutoPrefix);
+      }
+      if (minItemsInAutoPrefix > maxItemsInAutoPrefix) {
+        throw new IllegalArgumentException("maxItemsInAutoPrefix must be >= minItemsInAutoPrefix; got maxItemsInAutoPrefix=" + maxItemsInAutoPrefix + " minItemsInAutoPrefix=" + minItemsInAutoPrefix);
+      }
+      if (2*(minItemsInAutoPrefix-1) > maxItemsInAutoPrefix) {
+        throw new IllegalArgumentException("maxItemsInAutoPrefix must be at least 2*(minItemsInAutoPrefix-1); got maxItemsInAutoPrefix=" + maxItemsInAutoPrefix + " minItemsInAutoPrefix=" + minItemsInAutoPrefix);
+      }
+    } else if (maxItemsInAutoPrefix != 0) {
+      throw new IllegalArgumentException("maxItemsInAutoPrefix must be 0 (disabled) when minItemsInAutoPrefix is 0");
+    }
+  }
+
  @Override
  public void write(Fields fields) throws IOException {
+    //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment);

    String lastField = null;
    for(String field : fields) {
      assert lastField == null || lastField.compareTo(field) < 0;
      lastField = field;

+      //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment + " field=" + field);
      Terms terms = fields.terms(field);
      if (terms == null) {
        continue;
      }
+      FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+
+      // First pass to find all prefix terms we should compile into the index:
+      List<PrefixTerm> prefixTerms;
+      if (minItemsInAutoPrefix != 0) {
+        if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
+          throw new IllegalStateException("ranges can only be indexed with IndexOptions.DOCS (field: " + fieldInfo.name + ")");
+        }
+        prefixTerms = new AutoPrefixTermsWriter(terms, minItemsInAutoPrefix, maxItemsInAutoPrefix).prefixes;
+        //if (DEBUG) {
+        //  for(PrefixTerm term : prefixTerms) {
+        //    System.out.println("field=" + fieldInfo.name + " PREFIX TERM: " + term);
+        //  }
+        //}
+      } else {
+        prefixTerms = null;
+      }

      TermsEnum termsEnum = terms.iterator(null);
-
      TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
+      int prefixTermUpto = 0;
      while (true) {
        BytesRef term = termsEnum.next();
+        //if (DEBUG) System.out.println("BTTW: next term " + term);
+
+        // Insert (merge sort) next prefix term(s):
+        if (prefixTerms != null) {
+          while (prefixTermUpto < prefixTerms.size() && (term == null || prefixTerms.get(prefixTermUpto).compareTo(term) <= 0)) {
+            PrefixTerm prefixTerm = prefixTerms.get(prefixTermUpto);
+            //if (DEBUG) System.out.println("seg=" + segment + " field=" + fieldInfo.name + " NOW INSERT prefix=" + prefixTerm);
+            termsWriter.write(prefixTerm.term, getAutoPrefixTermsEnum(terms, prefixTerm), prefixTerm);
+            prefixTermUpto++;
+          }
+        }
+
        if (term == null) {
          break;
        }
-        termsWriter.write(term, termsEnum);
+
+        //if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + brToString(term));
+        termsWriter.write(term, termsEnum, null);
      }

+      assert prefixTerms == null || prefixTermUpto == prefixTerms.size();
+
      termsWriter.finish();
+
+      //if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field);
    }
  }

+  private TermsEnum getAutoPrefixTermsEnum(Terms terms, final PrefixTerm prefix) throws IOException {
+    assert prefixDocs != null;
+    prefixDocs.clear(0, prefixDocs.length());
+
+    prefixTermsEnum = prefix.getTermsEnum(terms.iterator(prefixTermsEnum));
+
+    //System.out.println("BTTW.getAutoPrefixTE: prefix=" + prefix);
+    while (prefixTermsEnum.next() != null) {
+      //System.out.println("    got term=" + prefixTermsEnum.term().utf8ToString());
+      //termCount++;
+      prefixDocsEnum = prefixTermsEnum.postings(null, prefixDocsEnum, 0);
+      //System.out.println("      " + prefixDocsEnum + " doc=" + prefixDocsEnum.docID());
+      prefixDocs.or(prefixDocsEnum);
+    }
+
+    //System.out.println("  done terms: " + prefixDocs.cardinality() + " doc seen; " + termCount + " terms seen");
+    return prefixFixedBitsTermsEnum;
+  }
+  
  static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
    assert fp < (1L << 62);
    return (fp << 2) | (hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) | (isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0);
@ -356,30 +492,38 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
    public final byte[] termBytes;
    // stats + metadata
    public final BlockTermState state;
+    // Non-null if this is an auto-prefix-term:
+    public final PrefixTerm prefixTerm;
+    public PendingTerm other;

-    public PendingTerm(BytesRef term, BlockTermState state) {
+    public PendingTerm(BytesRef term, BlockTermState state, PrefixTerm prefixTerm) {
      super(true);
      this.termBytes = new byte[term.length];
      System.arraycopy(term.bytes, term.offset, termBytes, 0, term.length);
      this.state = state;
+      this.prefixTerm = prefixTerm;
    }

    @Override
    public String toString() {
-      return brToString(termBytes);
+      return "TERM: " + brToString(termBytes);
    }
  }

  // for debugging
  @SuppressWarnings("unused")
  static String brToString(BytesRef b) {
-    try {
-      return b.utf8ToString() + " " + b;
-    } catch (Throwable t) {
-      // If BytesRef isn't actually UTF8, or it's eg a
-      // prefix of UTF8 that ends mid-unicode-char, we
-      // fallback to hex:
-      return b.toString();
+    if (b == null) {
+      return "(null)";
+    } else {
+      try {
+        return b.utf8ToString() + " " + b;
+      } catch (Throwable t) {
+        // If BytesRef isn't actually UTF8, or it's eg a
+        // prefix of UTF8 that ends mid-unicode-char, we
+        // fallback to hex:
+        return b.toString();
+      }
    }
  }

@ -410,7 +554,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {

    @Override
    public String toString() {
-      return "BLOCK: " + brToString(prefix);
+      return "BLOCK: prefix=" + brToString(prefix);
    }

    public void compileIndex(List<PendingBlock> blocks, RAMOutputStream scratchBytes, IntsRefBuilder scratchIntsRef) throws IOException {
@ -493,6 +637,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
  private final RAMOutputStream scratchBytes = new RAMOutputStream();
  private final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();

+  static final BytesRef EMPTY_BYTES_REF = new BytesRef();
+
  class TermsWriter {
    private final FieldInfo fieldInfo;
    private final int longsSize;
@ -529,14 +675,11 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {

      assert count > 0;

-      /*
-      if (DEBUG) {
-        BytesRef br = new BytesRef(lastTerm.bytes);
-        br.offset = lastTerm.offset;
-        br.length = prefixLength;
-        System.out.println("writeBlocks: " + br.utf8ToString() + " count=" + count);
-      }
-      */
+      //if (DEBUG2) {
+      //  BytesRef br = new BytesRef(lastTerm.bytes());
+      //  br.length = prefixLength;
+      //  System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count=" + count);
+      //}

      // Root block better write all remaining pending entries:
      assert prefixLength > 0 || count == pending.size();
@ -547,6 +690,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
      // only points to sub-blocks in the terms index so we can avoid seeking
      // to it when we are looking for a term):
      boolean hasTerms = false;
+      boolean hasPrefixTerms = false;
      boolean hasSubBlocks = false;

      int start = pending.size()-count;
@ -566,7 +710,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
            // Suffix is 0, i.e. prefix 'foo' and term is
            // 'foo' so the term has empty string suffix
            // in this block
-            assert lastSuffixLeadLabel == -1;
+            assert lastSuffixLeadLabel == -1: "i=" + i + " lastSuffixLeadLabel=" + lastSuffixLeadLabel;
            suffixLeadLabel = -1;
          } else {
            suffixLeadLabel = term.termBytes[prefixLength] & 0xff;
@ -587,10 +731,11 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
            // block as soon as we have at least minItemsInBlock.  This is not always best: it often produces
            // a too-small block as the final block:
            boolean isFloor = itemsInBlock < count;
-            newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasSubBlocks));
+            newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasPrefixTerms, hasSubBlocks));

            hasTerms = false;
            hasSubBlocks = false;
+            hasPrefixTerms = false;
            nextFloorLeadLabel = suffixLeadLabel;
            nextBlockStart = i;
          }
@ -600,6 +745,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {

        if (ent.isTerm) {
          hasTerms = true;
+          hasPrefixTerms |= ((PendingTerm) ent).prefixTerm != null;
        } else {
          hasSubBlocks = true;
        }
@ -609,7 +755,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
      if (nextBlockStart < end) {
        int itemsInBlock = end - nextBlockStart;
        boolean isFloor = itemsInBlock < count;
-        newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasSubBlocks));
+        newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasPrefixTerms, hasSubBlocks));
      }

      assert newBlocks.isEmpty() == false;
@ -634,7 +780,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
     *  were too many (more than maxItemsInBlock) entries sharing the
     *  same prefix, and so we broke it into multiple floor blocks where
     *  we record the starting label of the suffix of each floor block. */
-    private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLeadLabel, int start, int end, boolean hasTerms, boolean hasSubBlocks) throws IOException {
+    private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLeadLabel, int start, int end,
+                                    boolean hasTerms, boolean hasPrefixTerms, boolean hasSubBlocks) throws IOException {

      assert end > start;

@ -646,6 +793,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
      System.arraycopy(lastTerm.get().bytes, 0, prefix.bytes, 0, prefixLength);
      prefix.length = prefixLength;

+      //if (DEBUG2) System.out.println("    writeBlock field=" + fieldInfo.name + " prefix=" + brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
+
      // Write block header:
      int numEntries = end - start;
      int code = numEntries << 1;
@ -666,31 +815,34 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {

      // We optimize the leaf block case (block has only terms), writing a more
      // compact format in this case:
-      boolean isLeafBlock = hasSubBlocks == false;
+      boolean isLeafBlock = hasSubBlocks == false && hasPrefixTerms == false;
+
+      //System.out.println("  isLeaf=" + isLeafBlock);

      final List<FST<BytesRef>> subIndices;

      boolean absolute = true;

      if (isLeafBlock) {
-        // Only terms:
+        // Block contains only ordinary terms:
        subIndices = null;
        for (int i=start;i<end;i++) {
          PendingEntry ent = pending.get(i);
          assert ent.isTerm: "i=" + i;

          PendingTerm term = (PendingTerm) ent;
+          assert term.prefixTerm == null;
+
          assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
          BlockTermState state = term.state;
          final int suffix = term.termBytes.length - prefixLength;
-          /*
-          if (DEBUG) {
-            BytesRef suffixBytes = new BytesRef(suffix);
-            System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
-            suffixBytes.length = suffix;
-            System.out.println("    write term suffix=" + brToString(suffixBytes));
-          }
-          */
+          //if (DEBUG2) {
+          //  BytesRef suffixBytes = new BytesRef(suffix);
+          //  System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
+          //  suffixBytes.length = suffix;
+          //  System.out.println("    write term suffix=" + brToString(suffixBytes));
+          //}
+
          // For leaf block we write suffix straight
          suffixWriter.writeVInt(suffix);
          suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
@ -714,27 +866,51 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
          absolute = false;
        }
      } else {
-        // Mixed terms and sub-blocks:
+        // Block has at least one prefix term or a sub block:
        subIndices = new ArrayList<>();
+        boolean sawAutoPrefixTerm = false;
        for (int i=start;i<end;i++) {
          PendingEntry ent = pending.get(i);
          if (ent.isTerm) {
            PendingTerm term = (PendingTerm) ent;
+
            assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
            BlockTermState state = term.state;
            final int suffix = term.termBytes.length - prefixLength;
-            /*
-            if (DEBUG) {
-              BytesRef suffixBytes = new BytesRef(suffix);
-              System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
-              suffixBytes.length = suffix;
-              System.out.println("    write term suffix=" + brToString(suffixBytes));
-            }
-            */
+            //if (DEBUG2) {
+            //  BytesRef suffixBytes = new BytesRef(suffix);
+            //  System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
+            //  suffixBytes.length = suffix;
+            //  System.out.println("      write term suffix=" + brToString(suffixBytes));
+            //  if (term.prefixTerm != null) {
+            //    System.out.println("        ** auto-prefix term: " + term.prefixTerm);
+            //  }
+            //}
+
            // For non-leaf block we borrow 1 bit to record
-            // if entry is term or sub-block
-            suffixWriter.writeVInt(suffix<<1);
+            // if entry is term or sub-block, and 1 bit to record if
+            // it's a prefix term.  Terms cannot be larger than ~32 KB
+            // so we won't run out of bits:
+            code = suffix<<2;
+            int floorLeadEnd = -1;
+            if (term.prefixTerm != null) {
+              sawAutoPrefixTerm = true;
+              PrefixTerm prefixTerm = term.prefixTerm;
+              floorLeadEnd = prefixTerm.floorLeadEnd;
+              assert floorLeadEnd != -1;
+
+              if (prefixTerm.floorLeadStart == -2) {
+                // Starts with empty string
+                code |= 2;
+              } else {
+                code |= 3;
+              }
+            }
+            suffixWriter.writeVInt(code);
            suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
+            if (floorLeadEnd != -1) {
+              suffixWriter.writeByte((byte) floorLeadEnd);
+            }
            assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;

            // Write term stats, to separate byte[] blob:
@ -765,33 +941,32 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
            PendingBlock block = (PendingBlock) ent;
            assert StringHelper.startsWith(block.prefix, prefix);
            final int suffix = block.prefix.length - prefixLength;
+            assert StringHelper.startsWith(block.prefix, prefix);

            assert suffix > 0;

            // For non-leaf block we borrow 1 bit to record
-            // if entry is term or sub-block
-            suffixWriter.writeVInt((suffix<<1)|1);
+            // if entry is term or sub-block, and 1 bit (unset here) to
+            // record if it's a prefix term:
+            suffixWriter.writeVInt((suffix<<2)|1);
            suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);

-            assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel;
+            //if (DEBUG2) {
+            //  BytesRef suffixBytes = new BytesRef(suffix);
+            //  System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+            //  suffixBytes.length = suffix;
+            //  System.out.println("      write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
+            //}

+            assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel: "floorLeadLabel=" + floorLeadLabel + " suffixLead=" + (block.prefix.bytes[prefixLength] & 0xff);
            assert block.fp < startFP;

-            /*
-            if (DEBUG) {
-              BytesRef suffixBytes = new BytesRef(suffix);
-              System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
-              suffixBytes.length = suffix;
-              System.out.println("    write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
-            }
-            */
-
            suffixWriter.writeVLong(startFP - block.fp);
            subIndices.add(block.index);
          }
        }

-        assert subIndices.size() != 0;
+        assert subIndices.size() != 0 || sawAutoPrefixTerm;
      }

      // TODO: we could block-write the term suffix pointers;
@ -835,7 +1010,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
    }
    
    /** Writes one term's worth of postings. */
-    public void write(BytesRef text, TermsEnum termsEnum) throws IOException {
+    public void write(BytesRef text, TermsEnum termsEnum, PrefixTerm prefixTerm) throws IOException {
      /*
      if (DEBUG) {
        int[] tmp = new int[lastTerm.length];
@ -846,19 +1021,25 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {

      BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen);
      if (state != null) {
+
        assert state.docFreq != 0;
        assert fieldInfo.getIndexOptions() == IndexOptions.DOCS || state.totalTermFreq >= state.docFreq: "postingsWriter=" + postingsWriter;
-        sumDocFreq += state.docFreq;
-        sumTotalTermFreq += state.totalTermFreq;
        pushTerm(text);
       
-        PendingTerm term = new PendingTerm(text, state);
+        PendingTerm term = new PendingTerm(text, state, prefixTerm);
        pending.add(term);
-        numTerms++;
-        if (firstPendingTerm == null) {
-          firstPendingTerm = term;
+        //if (DEBUG) System.out.println("    add pending term = " + text + " pending.size()=" + pending.size());
+
+        if (prefixTerm == null) {
+          // Only increment stats for real terms:
+          sumDocFreq += state.docFreq;
+          sumTotalTermFreq += state.totalTermFreq;
+          numTerms++;
+          if (firstPendingTerm == null) {
+            firstPendingTerm = term;
+          }
+          lastPendingTerm = term;
        }
-        lastPendingTerm = term;
      }
    }

@ -910,6 +1091,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
        // TODO: if pending.size() is already 1 with a non-zero prefix length
        // we can save writing a "degenerate" root block, but we have to
        // fix all the places that assume the root block's prefix is the empty string:
+        pushTerm(new BytesRef());
        writeBlocks(0, pending.size());

        // We better have one final "root" block:
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
@ -41,6 +41,8 @@ import org.apache.lucene.util.fst.FST;
 */
 public final class FieldReader extends Terms implements Accountable {

+  // private final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
  private static final long BASE_RAM_BYTES_USED =
      RamUsageEstimator.shallowSizeOfInstance(FieldReader.class)
      + 3 * RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
@ -125,6 +127,7 @@ public final class FieldReader extends Terms implements Accountable {
  /** For debugging -- used by CheckIndex too*/
  @Override
  public Stats getStats() throws IOException {
+    // TODO: add auto-prefix terms into stats
    return new SegmentTermsEnum(this).computeBlockStats();
  }

@ -175,10 +178,11 @@ public final class FieldReader extends Terms implements Accountable {

  @Override
  public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
-    if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
-      throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
-    }
-    return new IntersectTermsEnum(this, compiled, startTerm);
+    // if (DEBUG) System.out.println("  FieldReader.intersect startTerm=" + BlockTreeTermsWriter.brToString(startTerm));
+    //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
+    // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
+    // can we optimize knowing that...?
+    return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState);
  }
    
  @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
@ -21,6 +21,7 @@ import java.io.IOException;

 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
@ -28,23 +29,38 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Outputs;

-// NOTE: cannot seek!
+/** This is used to implement efficient {@link Terms#intersect} for
+ *  block-tree.  Note that it cannot seek, except for the initial term on
+ *  init.  It just "nexts" through the intersection of the automaton and
+ *  the terms.  It does not use the terms index at all: on init, it
+ *  loads the root block, and scans its way to the initial term.
+ *  Likewise, in next it scans until it finds a term that matches the
+ *  current automaton transition.  If the index has auto-prefix terms
+ *  (only for DOCS_ONLY fields currently) it will visit these terms
+ *  when possible and then skip the real terms that auto-prefix term
+ *  matched. */
+
 final class IntersectTermsEnum extends TermsEnum {
+
+  //static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
  final IndexInput in;
  final static Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();

-  private IntersectTermsEnumFrame[] stack;
+  IntersectTermsEnumFrame[] stack;
      
  @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];

  final RunAutomaton runAutomaton;
-  final CompiledAutomaton compiledAutomaton;
+  final Automaton automaton;
+  final BytesRef commonSuffix;

  private IntersectTermsEnumFrame currentFrame;

@ -52,19 +68,34 @@ final class IntersectTermsEnum extends TermsEnum {

  private final FST.BytesReader fstReader;

+  private final boolean allowAutoPrefixTerms;
+
  final FieldReader fr;

+  /** Which state in the automaton accepts all possible suffixes. */
+  private final int sinkState;
+
  private BytesRef savedStartTerm;
      
+  /** True if we did return the current auto-prefix term */
+  private boolean useAutoPrefixTerm;
+
  // TODO: in some cases we can filter by length?  eg
  // regexp foo*bar must be at least length 6 bytes
-  public IntersectTermsEnum(FieldReader fr, CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
-    // if (DEBUG) {
-    //   System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + brToString(compiled.commonSuffixRef));
-    // }
+  public IntersectTermsEnum(FieldReader fr, Automaton automaton, RunAutomaton runAutomaton, BytesRef commonSuffix, BytesRef startTerm, int sinkState) throws IOException {
+    //if (DEBUG) System.out.println("\nintEnum.init seg=" + fr.parent.segment + " commonSuffix=" + commonSuffix);
    this.fr = fr;
-    runAutomaton = compiled.runAutomaton;
-    compiledAutomaton = compiled;
+    this.sinkState = sinkState;
+
+    assert automaton != null;
+    assert runAutomaton != null;
+
+    //if (DEBUG) System.out.println("sinkState=" + sinkState + " AUTOMATON:\n" + automaton.toDot());
+    this.runAutomaton = runAutomaton;
+    this.allowAutoPrefixTerms = sinkState != -1;
+    this.automaton = automaton;
+    this.commonSuffix = commonSuffix;
+
    in = fr.parent.termsIn.clone();
    stack = new IntersectTermsEnumFrame[5];
    for(int idx=0;idx<stack.length;idx++) {
@ -152,7 +183,7 @@ final class IntersectTermsEnum extends TermsEnum {
        
    f.fp = f.fpOrig = currentFrame.lastSubFP;
    f.prefix = currentFrame.prefix + currentFrame.suffix;
-    // if (DEBUG) System.out.println("    pushFrame state=" + state + " prefix=" + f.prefix);
+    //if (DEBUG) System.out.println("    pushFrame state=" + state + " prefix=" + f.prefix);
    f.setState(state);

    // Walk the arc through the index -- we only
@ -220,7 +251,7 @@ final class IntersectTermsEnum extends TermsEnum {
  // arbitrary seekExact/Ceil.  Note that this is a
  // seekFloor!
  private void seekToStartTerm(BytesRef target) throws IOException {
-    //if (DEBUG) System.out.println("seek to startTerm=" + target.utf8ToString());
+    //if (DEBUG) System.out.println("seek to startTerm=" + target.utf8ToString() + " length=" + target.length);
    assert currentFrame.ord == 0;
    if (term.length < target.length) {
      term.bytes = ArrayUtil.grow(term.bytes, target.length);
@ -229,23 +260,29 @@ final class IntersectTermsEnum extends TermsEnum {
    assert arc == currentFrame.arc;

    for(int idx=0;idx<=target.length;idx++) {
+      //if (DEBUG) System.out.println("cycle idx=" + idx);

      while (true) {
+        final int savNextEnt = currentFrame.nextEnt;
        final int savePos = currentFrame.suffixesReader.getPosition();
        final int saveStartBytePos = currentFrame.startBytePos;
        final int saveSuffix = currentFrame.suffix;
        final long saveLastSubFP = currentFrame.lastSubFP;
        final int saveTermBlockOrd = currentFrame.termState.termBlockOrd;
+        final boolean saveIsAutoPrefixTerm = currentFrame.isAutoPrefixTerm;
+
+        //if (DEBUG) System.out.println("    cycle isAutoPrefix=" + saveIsAutoPrefixTerm + " ent=" + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") prefix=" + currentFrame.prefix + " suffix=" + currentFrame.suffix + " firstLabel=" + (currentFrame.suffix == 0 ? "" : (currentFrame.suffixBytes[currentFrame.startBytePos])&0xff));

        final boolean isSubBlock = currentFrame.next();

-        //if (DEBUG) System.out.println("    cycle ent=" + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") prefix=" + currentFrame.prefix + " suffix=" + currentFrame.suffix + " isBlock=" + isSubBlock + " firstLabel=" + (currentFrame.suffix == 0 ? "" : (currentFrame.suffixBytes[currentFrame.startBytePos])&0xff));
        term.length = currentFrame.prefix + currentFrame.suffix;
        if (term.bytes.length < term.length) {
          term.bytes = ArrayUtil.grow(term.bytes, term.length);
        }
        System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);

+        //if (DEBUG) System.out.println("      isSubBlock=" + isSubBlock + " term/prefix=" + brToString(term) + " saveIsAutoPrefixTerm=" + saveIsAutoPrefixTerm + " allowAutoPrefixTerms=" + allowAutoPrefixTerms);
+
        if (isSubBlock && StringHelper.startsWith(target, term)) {
          // Recurse
          //if (DEBUG) System.out.println("      recurse!");
@ -253,9 +290,11 @@ final class IntersectTermsEnum extends TermsEnum {
          break;
        } else {
          final int cmp = term.compareTo(target);
+          //if (DEBUG) System.out.println("      cmp=" + cmp);
          if (cmp < 0) {
            if (currentFrame.nextEnt == currentFrame.entCount) {
              if (!currentFrame.isLastInFloor) {
+                // Advance to next floor block
                //if (DEBUG) System.out.println("  load floorBlock");
                currentFrame.loadNextFloorBlock();
                continue;
@ -266,19 +305,24 @@ final class IntersectTermsEnum extends TermsEnum {
            }
            continue;
          } else if (cmp == 0) {
+            if (allowAutoPrefixTerms == false && currentFrame.isAutoPrefixTerm) {
+              continue;
+            }
            //if (DEBUG) System.out.println("  return term=" + brToString(term));
            return;
-          } else {
+          } else if (allowAutoPrefixTerms || currentFrame.isAutoPrefixTerm == false) {
            // Fallback to prior entry: the semantics of
            // this method is that the first call to
            // next() will return the term after the
            // requested term
-            currentFrame.nextEnt--;
+            //if (DEBUG) System.out.println("    fallback prior entry");
+            currentFrame.nextEnt = savNextEnt;
            currentFrame.lastSubFP = saveLastSubFP;
            currentFrame.startBytePos = saveStartBytePos;
            currentFrame.suffix = saveSuffix;
            currentFrame.suffixesReader.setPosition(savePos);
            currentFrame.termState.termBlockOrd = saveTermBlockOrd;
+            currentFrame.isAutoPrefixTerm = saveIsAutoPrefixTerm;
            System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
            term.length = currentFrame.prefix + currentFrame.suffix;
            // If the last entry was a block we don't
@ -297,77 +341,249 @@ final class IntersectTermsEnum extends TermsEnum {
  @Override
  public BytesRef next() throws IOException {

-    // if (DEBUG) {
-    //   System.out.println("\nintEnum.next seg=" + segment);
-    //   System.out.println("  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
-    // }
+    //if (DEBUG) {
+    //  System.out.println("\nintEnum.next seg=" + fr.parent.segment);
+    //  System.out.println("  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " outputPrefix=" + currentFrame.outputPrefix + " trans: " + currentFrame.transition + " useAutoPrefix=" + useAutoPrefixTerm);
+    //}

    nextTerm:
-    while(true) {
-      // Pop finished frames
-      while (currentFrame.nextEnt == currentFrame.entCount) {
-        if (!currentFrame.isLastInFloor) {
-          //if (DEBUG) System.out.println("    next-floor-block");
-          currentFrame.loadNextFloorBlock();
-          //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
-        } else {
-          //if (DEBUG) System.out.println("  pop frame");
-          if (currentFrame.ord == 0) {
-            return null;
+    while (true) {
+
+      boolean isSubBlock;
+
+      if (useAutoPrefixTerm) {
+
+        assert currentFrame.isAutoPrefixTerm;
+        useAutoPrefixTerm = false;
+        currentFrame.termState.isRealTerm = true;
+
+        //if (DEBUG) {
+        //  System.out.println("    now scan beyond auto-prefix term=" + brToString(term) + " floorSuffixLeadEnd=" + Integer.toHexString(currentFrame.floorSuffixLeadEnd));
+        //}
+        // If we last returned an auto-prefix term, we must now skip all
+        // actual terms sharing that prefix.  At most, that skipping
+        // requires popping one frame, but it can also require simply
+        // scanning ahead within the current frame.  This scanning will
+        // skip sub-blocks that contain many terms, which is why the
+        // optimization "works":
+        int floorSuffixLeadEnd = currentFrame.floorSuffixLeadEnd;
+        if (floorSuffixLeadEnd == -1) {
+          // An ordinary prefix, e.g. foo*
+          int prefix = currentFrame.prefix;
+          int suffix = currentFrame.suffix;
+          //if (DEBUG) System.out.println("    prefix=" + prefix + " suffix=" + suffix);
+          if (suffix == 0) {
+            //if (DEBUG) System.out.println("    pop frame & nextTerm");
+
+            // Easy case: the prefix term's suffix is the empty string,
+            // meaning the prefix corresponds to all terms in the
+            // current block, so we just pop this entire block:
+            if (currentFrame.ord == 0) {
+              //if (DEBUG) System.out.println("  return null");
+              return null;
+            }
+            currentFrame = stack[currentFrame.ord-1];
+            continue nextTerm;
+          } else {
+
+            // Just next() until we hit an entry that doesn't share this
+            // prefix.  The first next should be a sub-block sharing the
+            // same prefix, because if there are enough terms matching a
+            // given prefix to warrant an auto-prefix term, then there
+            // must also be enough to make a sub-block (assuming
+            // minItemsInPrefix > minItemsInBlock):
+            scanPrefix:
+            while (true) {
+              //if (DEBUG) System.out.println("    scan next");
+              if (currentFrame.nextEnt == currentFrame.entCount) {
+                if (currentFrame.isLastInFloor == false) {
+                  currentFrame.loadNextFloorBlock();
+                } else if (currentFrame.ord == 0) {
+                  //if (DEBUG) System.out.println("  return null0");
+                  return null;
+                } else {
+                  // Pop frame, which also means we've moved beyond this
+                  // auto-prefix term:
+                  //if (DEBUG) System.out.println("  pop; nextTerm");
+                  currentFrame = stack[currentFrame.ord-1];
+                  continue nextTerm;
+                }
+              }
+              isSubBlock = currentFrame.next();
+              //if (DEBUG) {
+              //  BytesRef suffixBytes = new BytesRef(currentFrame.suffix);
+              //  System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, suffixBytes.bytes, 0, currentFrame.suffix);
+              //  suffixBytes.length = currentFrame.suffix;
+              //  System.out.println("      currentFrame.suffix=" + brToString(suffixBytes));
+              //}
+              for(int i=0;i<suffix;i++) {
+                if (term.bytes[prefix+i] != currentFrame.suffixBytes[currentFrame.startBytePos+i]) {
+                  //if (DEBUG) System.out.println("      done; now stop scan");
+                  break scanPrefix;
+                }
+              }
+            }
+          }
+        } else {
+          // Floor'd auto-prefix term; in this case we must skip all
+          // terms e.g. matching foo[a-m]*.  We are currently "on" fooa,
+          // which the automaton accepted (fooa* through foom*), and
+          // floorSuffixLeadEnd is m, so we must now scan to foon:
+          int prefix = currentFrame.prefix;
+          int suffix = currentFrame.suffix;
+
+          if (currentFrame.floorSuffixLeadStart == -1) {
+            suffix++;
+          }
+
+          //if (DEBUG) System.out.println("      prefix=" + prefix + " suffix=" + suffix);
+
+          if (suffix == 0) {
+
+            //if (DEBUG) System.out.println("  pop frame");
+
+            // This means current frame is fooa*, so we have to first
+            // pop the current frame, then scan in parent frame:
+            if (currentFrame.ord == 0) {
+              //if (DEBUG) System.out.println("  return null");
+              return null;
+            }
+            currentFrame = stack[currentFrame.ord-1];
+
+            // Current (parent) frame is now foo*, so now we just scan
+            // until the lead suffix byte is > floorSuffixLeadEnd
+            //assert currentFrame.prefix == prefix-1;
+            //prefix = currentFrame.prefix;
+
+            // In case when we pop, and the parent block is not just prefix-1, e.g. in block 417* on
+            // its first term = floor prefix term 41[7-9], popping to block 4*:
+            prefix = currentFrame.prefix;
+
+            suffix = term.length - currentFrame.prefix;
+          } else {
+            // No need to pop; just scan in currentFrame:
+          }
+
+          //if (DEBUG) System.out.println("    start scan: prefix=" + prefix + " suffix=" + suffix);
+
+          // Now we scan until the lead suffix byte is > floorSuffixLeadEnd
+          scanFloor:
+          while (true) {
+            //if (DEBUG) System.out.println("      scan next");
+            if (currentFrame.nextEnt == currentFrame.entCount) {
+              if (currentFrame.isLastInFloor == false) {
+                //if (DEBUG) System.out.println("      next floor block");
+                currentFrame.loadNextFloorBlock();
+              } else if (currentFrame.ord == 0) {
+                //if (DEBUG) System.out.println("  return null");
+                return null;
+              } else {
+                // Pop frame, which also means we've moved beyond this
+                // auto-prefix term:
+                currentFrame = stack[currentFrame.ord-1];
+                //if (DEBUG) System.out.println("      pop, now curFrame.prefix=" + currentFrame.prefix);
+                continue nextTerm;
+              }
+            }
+            isSubBlock = currentFrame.next();
+            //if (DEBUG) {
+            //  BytesRef suffixBytes = new BytesRef(currentFrame.suffix);
+            //  System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, suffixBytes.bytes, 0, currentFrame.suffix);
+            //  suffixBytes.length = currentFrame.suffix;
+            //  System.out.println("      currentFrame.suffix=" + brToString(suffixBytes));
+            //}
+            for(int i=0;i<suffix-1;i++) {
+              if (term.bytes[prefix+i] != currentFrame.suffixBytes[currentFrame.startBytePos+i]) {
+                //if (DEBUG) System.out.println("      done; now stop scan");
+                break scanFloor;
+              }
+            }
+            //if (DEBUG) {
+            //  if (currentFrame.suffix >= suffix) {
+            //    System.out.println("      cmp label=" + Integer.toHexString(currentFrame.suffixBytes[currentFrame.startBytePos+suffix-1]) + " vs " + floorSuffixLeadEnd);
+            //  }
+            //}
+            if (currentFrame.suffix >= suffix && (currentFrame.suffixBytes[currentFrame.startBytePos+suffix-1]&0xff) > floorSuffixLeadEnd) {
+              // Done scanning: we are now on the first term after all
+              // terms matched by this auto-prefix term
+              //if (DEBUG) System.out.println("      done; now stop scan");
+              break;
+            }
          }
-          final long lastFP = currentFrame.fpOrig;
-          currentFrame = stack[currentFrame.ord-1];
-          assert currentFrame.lastSubFP == lastFP;
-          //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
        }
+      } else {
+        // Pop finished frames
+        while (currentFrame.nextEnt == currentFrame.entCount) {
+          if (!currentFrame.isLastInFloor) {
+            //if (DEBUG) System.out.println("    next-floor-block: trans: " + currentFrame.transition);
+            // Advance to next floor block
+            currentFrame.loadNextFloorBlock();
+            //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " outputPrefix=" + currentFrame.outputPrefix);
+            break;
+          } else {
+            //if (DEBUG) System.out.println("  pop frame");
+            if (currentFrame.ord == 0) {
+              //if (DEBUG) System.out.println("  return null");
+              return null;
+            }
+            final long lastFP = currentFrame.fpOrig;
+            currentFrame = stack[currentFrame.ord-1];
+            assert currentFrame.lastSubFP == lastFP;
+            //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " outputPrefix=" + currentFrame.outputPrefix);
+          }
+        }
+
+        isSubBlock = currentFrame.next();
      }

-      final boolean isSubBlock = currentFrame.next();
-      // if (DEBUG) {
-      //   final BytesRef suffixRef = new BytesRef();
-      //   suffixRef.bytes = currentFrame.suffixBytes;
-      //   suffixRef.offset = currentFrame.startBytePos;
-      //   suffixRef.length = currentFrame.suffix;
-      //   System.out.println("    " + (isSubBlock ? "sub-block" : "term") + " " + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + brToString(suffixRef));
-      // }
+      //if (DEBUG) {
+      //  final BytesRef suffixRef = new BytesRef();
+      //  suffixRef.bytes = currentFrame.suffixBytes;
+      //  suffixRef.offset = currentFrame.startBytePos;
+      //  suffixRef.length = currentFrame.suffix;
+      //  System.out.println("    " + (isSubBlock ? "sub-block" : "term") + " " + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + brToString(suffixRef));
+      //}

      if (currentFrame.suffix != 0) {
+        // Advance where we are in the automaton to match what terms
+        // dict next'd to:
        final int label = currentFrame.suffixBytes[currentFrame.startBytePos] & 0xff;
+        //if (DEBUG) {
+        //  System.out.println("    move automaton to label=" + label + " vs curMax=" + currentFrame.curTransitionMax);
+        // }
        while (label > currentFrame.curTransitionMax) {
          if (currentFrame.transitionIndex >= currentFrame.transitionCount-1) {
-            // Stop processing this frame -- no further
-            // matches are possible because we've moved
-            // beyond what the max transition will allow
-            //if (DEBUG) System.out.println("      break: trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]));
-
-            // sneaky!  forces a pop above
-            currentFrame.isLastInFloor = true;
-            currentFrame.nextEnt = currentFrame.entCount;
+            // Pop this frame: no further matches are possible because
+            // we've moved beyond what the max transition will allow
+            //if (DEBUG) System.out.println("      break: trans");
+            if (currentFrame.ord == 0) {
+              //if (DEBUG) System.out.println("  return null");
+              return null;
+            }
+            currentFrame = stack[currentFrame.ord-1];
            continue nextTerm;
          }
          currentFrame.transitionIndex++;
-          compiledAutomaton.automaton.getNextTransition(currentFrame.transition);
+          automaton.getNextTransition(currentFrame.transition);
          currentFrame.curTransitionMax = currentFrame.transition.max;
-          //if (DEBUG) System.out.println("      next trans=" + currentFrame.transitions[currentFrame.transitionIndex]);
+          //if (DEBUG) System.out.println("      next trans");
        }
      }

      // First test the common suffix, if set:
-      if (compiledAutomaton.commonSuffixRef != null && !isSubBlock) {
+      if (commonSuffix != null && !isSubBlock) {
        final int termLen = currentFrame.prefix + currentFrame.suffix;
-        if (termLen < compiledAutomaton.commonSuffixRef.length) {
+        if (termLen < commonSuffix.length) {
          // No match
-          // if (DEBUG) {
-          //   System.out.println("      skip: common suffix length");
-          // }
+          //if (DEBUG) System.out.println("      skip: common suffix length");
          continue nextTerm;
        }

        final byte[] suffixBytes = currentFrame.suffixBytes;
-        final byte[] commonSuffixBytes = compiledAutomaton.commonSuffixRef.bytes;
+        final byte[] commonSuffixBytes = commonSuffix.bytes;

-        final int lenInPrefix = compiledAutomaton.commonSuffixRef.length - currentFrame.suffix;
-        assert compiledAutomaton.commonSuffixRef.offset == 0;
+        final int lenInPrefix = commonSuffix.length - currentFrame.suffix;
+        assert commonSuffix.offset == 0;
        int suffixBytesPos;
        int commonSuffixBytesPos = 0;

@ -381,24 +597,20 @@ final class IntersectTermsEnum extends TermsEnum {
          final int termBytesPosEnd = currentFrame.prefix;
          while (termBytesPos < termBytesPosEnd) {
            if (termBytes[termBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
-              // if (DEBUG) {
-              //   System.out.println("      skip: common suffix mismatch (in prefix)");
-              // }
+              //if (DEBUG) System.out.println("      skip: common suffix mismatch (in prefix)");
              continue nextTerm;
            }
          }
          suffixBytesPos = currentFrame.startBytePos;
        } else {
-          suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - compiledAutomaton.commonSuffixRef.length;
+          suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - commonSuffix.length;
        }

        // Test overlapping suffix part:
-        final int commonSuffixBytesPosEnd = compiledAutomaton.commonSuffixRef.length;
+        final int commonSuffixBytesPosEnd = commonSuffix.length;
        while (commonSuffixBytesPos < commonSuffixBytesPosEnd) {
          if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
-            // if (DEBUG) {
-            //   System.out.println("      skip: common suffix mismatch");
-            // }
+            //if (DEBUG) System.out.println("      skip: common suffix mismatch");
            continue nextTerm;
          }
        }
@ -410,10 +622,19 @@ final class IntersectTermsEnum extends TermsEnum {
      // "temporarily" accepted, we just blindly .next()
      // until the limit

-      // See if the term prefix matches the automaton:
+      // TODO: for first iter of this loop can't we just use the current trans?  we already advanced it and confirmed it matches lead
+      // byte of the suffix
+
+      // See if the term suffix matches the automaton:
      int state = currentFrame.state;
+      int lastState = currentFrame.lastState;
+      //if (DEBUG) {
+      //  System.out.println("  a state=" + state + " curFrame.suffix.len=" + currentFrame.suffix + " curFrame.prefix=" + currentFrame.prefix);
+      // }
      for (int idx=0;idx<currentFrame.suffix;idx++) {
-        state = runAutomaton.step(state,  currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
+        lastState = state;
+        //if (DEBUG) System.out.println("    step label=" + (char) (currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff));
+        state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
        if (state == -1) {
          // No match
          //System.out.println("    no s=" + state);
@ -423,16 +644,59 @@ final class IntersectTermsEnum extends TermsEnum {
        }
      }

+      //if (DEBUG) System.out.println("    after suffix: state=" + state + " lastState=" + lastState);
+
      if (isSubBlock) {
        // Match!  Recurse:
        //if (DEBUG) System.out.println("      sub-block match to state=" + state + "; recurse fp=" + currentFrame.lastSubFP);
        copyTerm();
        currentFrame = pushFrame(state);
-        //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+        currentFrame.lastState = lastState;
+        //xif (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+      } else if (currentFrame.isAutoPrefixTerm) {
+        // We are on an auto-prefix term, meaning this term was compiled
+        // at indexing time, matching all terms sharing this prefix (or,
+        // a floor'd subset of them if that count was too high).  A
+        // prefix term represents a range of terms, so we now need to
+        // test whether, from the current state in the automaton, it
+        // accepts all terms in that range.  As long as it does, we can
+        // use this term and then later skip ahead past all terms in
+        // this range:
+        if (allowAutoPrefixTerms) {
+
+          if (currentFrame.floorSuffixLeadEnd == -1) {
+            // Simple prefix case
+            useAutoPrefixTerm = state == sinkState;
+          } else {
+            if (currentFrame.floorSuffixLeadStart == -1) {
+              // Must also accept the empty string in this case
+              if (automaton.isAccept(state)) {
+                //if (DEBUG) System.out.println("      state is accept");
+                useAutoPrefixTerm = acceptsSuffixRange(state, 0, currentFrame.floorSuffixLeadEnd);
+              }
+            } else {
+              useAutoPrefixTerm = acceptsSuffixRange(lastState, currentFrame.floorSuffixLeadStart, currentFrame.floorSuffixLeadEnd);
+            }
+          }
+
+          //if (DEBUG) System.out.println("  useAutoPrefixTerm=" + useAutoPrefixTerm);
+
+          if (useAutoPrefixTerm) {
+            copyTerm();
+            currentFrame.termState.isRealTerm = false;
+            //if (DEBUG) System.out.println("  return auto prefix term: " + brToString(term));
+            return term;
+          } else {
+            // We move onto the next term
+          }
+        } else {
+          // We are not allowed to use auto-prefix terms, so we just skip it
+        }
      } else if (runAutomaton.isAccept(state)) {
        copyTerm();
-        //if (DEBUG) System.out.println("      term match to state=" + state + "; return term=" + brToString(term));
+        //if (DEBUG) System.out.println("      term match to state=" + state);
        assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
+        //if (DEBUG) System.out.println("      return term=" + brToString(term));
        return term;
      } else {
        //System.out.println("    no s=" + state);
@ -440,6 +704,41 @@ final class IntersectTermsEnum extends TermsEnum {
    }
  }

+  private final Transition transition = new Transition();
+
+  /** Returns true if, from this state, the automaton accepts any suffix
+   *  starting with a label between start and end, inclusive.  We just
+   *  look for a transition, matching this range, to the sink state.  */
+  private boolean acceptsSuffixRange(int state, int start, int end) {
+
+    //xif (DEBUG) System.out.println("    acceptsSuffixRange state=" + state + " start=" + start + " end=" + end);
+
+    int count = automaton.initTransition(state, transition);
+    //xif (DEBUG) System.out.println("      transCount=" + count);
+    //xif (DEBUG) System.out.println("      trans=" + transition);
+    for(int i=0;i<count;i++) {
+      automaton.getNextTransition(transition);
+      if (start >= transition.min && end <= transition.max && transition.dest == sinkState) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  // for debugging
+  @SuppressWarnings("unused")
+  static String brToString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      // If BytesRef isn't actually UTF8, or it's eg a
+      // prefix of UTF8 that ends mid-unicode-char, we
+      // fallback to hex:
+      return b.toString();
+    }
+  }
+
  private void copyTerm() {
    //System.out.println("      copyTerm cur.prefix=" + currentFrame.prefix + " cur.suffix=" + currentFrame.suffix + " first=" + (char) currentFrame.suffixBytes[currentFrame.startBytePos]);
    final int len = currentFrame.prefix + currentFrame.suffix;
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
@ -35,9 +35,14 @@ final class IntersectTermsEnumFrame {
  long fpEnd;
  long lastSubFP;

+  // private static boolean DEBUG = IntersectTermsEnum.DEBUG;
+
  // State in automaton
  int state;

+  // State just before the last label
+  int lastState;
+
  int metaDataUpto;

  byte[] suffixBytes = new byte[128];
@ -73,6 +78,8 @@ final class IntersectTermsEnumFrame {
  int transitionIndex;
  int transitionCount;

+  final boolean versionAutoPrefix;
+
  FST.Arc<BytesRef> arc;

  final BlockTermState termState;
@ -89,6 +96,17 @@ final class IntersectTermsEnumFrame {
  int startBytePos;
  int suffix;

+  // When we are on an auto-prefix term this is the starting lead byte
+  // of the suffix (e.g. 'a' for the foo[a-m]* case):
+  int floorSuffixLeadStart;
+
+  // When we are on an auto-prefix term this is the ending lead byte
+  // of the suffix (e.g. 'm' for the foo[a-m]* case):
+  int floorSuffixLeadEnd;
+
+  // True if the term we are currently on is an auto-prefix term:
+  boolean isAutoPrefixTerm;
+
  private final IntersectTermsEnum ite;

  public IntersectTermsEnumFrame(IntersectTermsEnum ite, int ord) throws IOException {
@ -97,35 +115,39 @@ final class IntersectTermsEnumFrame {
    this.termState = ite.fr.parent.postingsReader.newTermState();
    this.termState.totalTermFreq = -1;
    this.longs = new long[ite.fr.longsSize];
+    this.versionAutoPrefix = ite.fr.parent.version >= BlockTreeTermsReader.VERSION_AUTO_PREFIX_TERMS;
  }

  void loadNextFloorBlock() throws IOException {
    assert numFollowFloorBlocks > 0;
-    //if (DEBUG) System.out.println("    loadNextFoorBlock trans=" + transitions[transitionIndex]);
+    //if (DEBUG) System.out.println("    loadNextFloorBlock transition.min=" + transition.min);

    do {
      fp = fpOrig + (floorDataReader.readVLong() >>> 1);
      numFollowFloorBlocks--;
-      // if (DEBUG) System.out.println("    skip floor block2!  nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[transitionIndex].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
+      //if (DEBUG) System.out.println("    skip floor block2!  nextFloorLabel=" + (char) nextFloorLabel + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
      if (numFollowFloorBlocks != 0) {
        nextFloorLabel = floorDataReader.readByte() & 0xff;
      } else {
        nextFloorLabel = 256;
      }
-      // if (DEBUG) System.out.println("    nextFloorLabel=" + (char) nextFloorLabel);
+      //if (DEBUG) System.out.println("    nextFloorLabel=" + (char) nextFloorLabel);
    } while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min);

+    //if (DEBUG) System.out.println("      done loadNextFloorBlock");
+
    load(null);
  }

  public void setState(int state) {
    this.state = state;
    transitionIndex = 0;
-    transitionCount = ite.compiledAutomaton.automaton.getNumTransitions(state);
+    transitionCount = ite.automaton.getNumTransitions(state);
    if (transitionCount != 0) {
-      ite.compiledAutomaton.automaton.initTransition(state, transition);
-      ite.compiledAutomaton.automaton.getNextTransition(transition);
+      ite.automaton.initTransition(state, transition);
+      ite.automaton.getNextTransition(transition);
      curTransitionMax = transition.max;
+      //if (DEBUG) System.out.println("    after setState state=" + state + " trans: " + transition + " transCount=" + transitionCount);
    } else {
      curTransitionMax = -1;
    }
@ -133,7 +155,7 @@ final class IntersectTermsEnumFrame {

  void load(BytesRef frameIndexData) throws IOException {

-    // if (DEBUG) System.out.println("    load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData=" + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state=" + state));
+    //xif (DEBUG) System.out.println("    load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData=" + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state=" + state));

    if (frameIndexData != null && transitionCount != 0) {
      // Floor frame
@ -148,7 +170,7 @@ final class IntersectTermsEnumFrame {
      if ((code & BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0) {
        numFollowFloorBlocks = floorDataReader.readVInt();
        nextFloorLabel = floorDataReader.readByte() & 0xff;
-        // if (DEBUG) System.out.println("    numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + nextFloorLabel);
+        //if (DEBUG) System.out.println("    numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + nextFloorLabel);

        // If current state is accept, we must process
        // first block in case it has empty suffix:
@ -158,7 +180,7 @@ final class IntersectTermsEnumFrame {
          while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
            fp = fpOrig + (floorDataReader.readVLong() >>> 1);
            numFollowFloorBlocks--;
-            // if (DEBUG) System.out.println("    skip floor block!  nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
+            //xif (DEBUG) System.out.println("    skip floor block!  nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
            if (numFollowFloorBlocks != 0) {
              nextFloorLabel = floorDataReader.readByte() & 0xff;
            } else {
@ -179,7 +201,7 @@ final class IntersectTermsEnumFrame {
    code = ite.in.readVInt();
    isLeafBlock = (code & 1) != 0;
    int numBytes = code >>> 1;
-    // if (DEBUG) System.out.println("      entCount=" + entCount + " lastInFloor?=" + isLastInFloor + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
+    //if (DEBUG) System.out.println("      entCount=" + entCount + " lastInFloor?=" + isLastInFloor + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
    if (suffixBytes.length < numBytes) {
      suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
@ -214,41 +236,106 @@ final class IntersectTermsEnumFrame {
      // written one after another -- tail recurse:
      fpEnd = ite.in.getFilePointer();
    }
+
+    // Necessary in case this ord previously was an auto-prefix
+    // term but now we recurse to a new leaf block
+    isAutoPrefixTerm = false;
  }

  // TODO: maybe add scanToLabel; should give perf boost

+  // Decodes next entry; returns true if it's a sub-block
  public boolean next() {
-    return isLeafBlock ? nextLeaf() : nextNonLeaf();
+    if (isLeafBlock) {
+      nextLeaf();
+      return false;
+    } else {
+      return nextNonLeaf();
+    }
  }

-  // Decodes next entry; returns true if it's a sub-block
-  public boolean nextLeaf() {
-    //if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+  public void nextLeaf() {
+    //if (DEBUG) {
+    //  System.out.println("  frame.nextLeaf ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+    //}
    assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
    nextEnt++;
    suffix = suffixesReader.readVInt();
    startBytePos = suffixesReader.getPosition();
    suffixesReader.skipBytes(suffix);
-    return false;
  }

  public boolean nextNonLeaf() {
-    //if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+    //if (DEBUG) {
+    //  System.out.println("  frame.nextNonLeaf ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount + " versionAutoPrefix=" + versionAutoPrefix + " fp=" + suffixesReader.getPosition());
+    // }
    assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
    nextEnt++;
    final int code = suffixesReader.readVInt();
-    suffix = code >>> 1;
-    startBytePos = suffixesReader.getPosition();
-    suffixesReader.skipBytes(suffix);
-    if ((code & 1) == 0) {
-      // A normal term
-      termState.termBlockOrd++;
-      return false;
+    if (versionAutoPrefix == false) {
+      suffix = code >>> 1;
+      startBytePos = suffixesReader.getPosition();
+      suffixesReader.skipBytes(suffix);
+      if ((code & 1) == 0) {
+        // A normal term
+        termState.termBlockOrd++;
+        return false;
+      } else {
+        // A sub-block; make sub-FP absolute:
+        lastSubFP = fp - suffixesReader.readVLong();
+        return true;
+      }
    } else {
-      // A sub-block; make sub-FP absolute:
-      lastSubFP = fp - suffixesReader.readVLong();
-      return true;
+      suffix = code >>> 2;
+      startBytePos = suffixesReader.getPosition();
+      suffixesReader.skipBytes(suffix);
+      switch (code & 3) {
+      case 0:
+        // A normal term
+        //if (DEBUG) System.out.println("    ret: term");
+        isAutoPrefixTerm = false;
+        termState.termBlockOrd++;
+        return false;
+      case 1:
+        // A sub-block; make sub-FP absolute:
+        isAutoPrefixTerm = false;
+        lastSubFP = fp - suffixesReader.readVLong();
+        //if (DEBUG) System.out.println("    ret: sub-block");
+        return true;
+      case 2:
+        // A normal prefix term, suffix leads with empty string
+        floorSuffixLeadStart = -1;
+        termState.termBlockOrd++;
+        floorSuffixLeadEnd = suffixesReader.readByte() & 0xff;
+        if (floorSuffixLeadEnd == 0xff) {
+          floorSuffixLeadEnd = -1;
+          //System.out.println("  fill in -1");
+        }
+        //if (DEBUG) System.out.println("    ret: floor prefix term: start=-1 end=" + floorSuffixLeadEnd);
+        isAutoPrefixTerm = true;
+        return false;
+      case 3:
+        // A floor'd prefix term, suffix leads with real byte
+        if (suffix == 0) {
+          // TODO: this is messy, but necessary because we are an auto-prefix term, but our suffix is the empty string here, so we have to
+          // look at the parent block to get the lead suffix byte:
+          assert ord > 0;
+          IntersectTermsEnumFrame parent = ite.stack[ord-1];
+          floorSuffixLeadStart = parent.suffixBytes[parent.startBytePos+parent.suffix-1] & 0xff;
+          //if (DEBUG) System.out.println("    peek-parent: suffix=" + floorSuffixLeadStart);
+        } else {
+          floorSuffixLeadStart = suffixBytes[startBytePos+suffix-1] & 0xff;
+        }
+        termState.termBlockOrd++;
+        isAutoPrefixTerm = true;
+        floorSuffixLeadEnd = suffixesReader.readByte() & 0xff;
+        //if (DEBUG) System.out.println("    ret: floor prefix term start=" + floorSuffixLeadStart + " end=" + floorSuffixLeadEnd);
+        return false;
+      default:
+        // Silly javac:
+        assert false;
+        return false;
+      }
    }
  }

--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
@ -34,7 +34,9 @@ import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Util;

-/** Iterates through terms in this field */
+/** Iterates through terms in this field.  This implementation skips
+ *  any auto-prefix terms it encounters. */
+
 final class SegmentTermsEnum extends TermsEnum {

  // Lazy init:
@ -48,7 +50,7 @@ final class SegmentTermsEnum extends TermsEnum {

  private int targetBeforeCurrentLength;

-  // static boolean DEBUG = false;
+  //static boolean DEBUG = BlockTreeTermsWriter.DEBUG;

  private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();

@ -119,6 +121,8 @@ final class SegmentTermsEnum extends TermsEnum {
   *  computing aggregate statistics. */
  public Stats computeBlockStats() throws IOException {

+    // TODO: add total auto-prefix term count
+
    Stats stats = new Stats(fr.parent.segment, fr.fieldInfo.name);
    if (fr.index != null) {
      stats.indexNodeCount = fr.index.getNodeCount();
@ -152,8 +156,10 @@ final class SegmentTermsEnum extends TermsEnum {
      while (currentFrame.nextEnt == currentFrame.entCount) {
        stats.endBlock(currentFrame);
        if (!currentFrame.isLastInFloor) {
+          // Advance to next floor block
          currentFrame.loadNextFloorBlock();
          stats.startBlock(currentFrame, true);
+          break;
        } else {
          if (currentFrame.ord == 0) {
            break allTerms;
@ -175,8 +181,6 @@ final class SegmentTermsEnum extends TermsEnum {
          // This is a "next" frame -- even if it's
          // floor'd we must pretend it isn't so we don't
          // try to scan to the right floor frame:
-          currentFrame.isFloor = false;
-          //currentFrame.hasTerms = true;
          currentFrame.loadBlock();
          stats.startBlock(currentFrame, !currentFrame.isLastInFloor);
        } else {
@ -294,6 +298,7 @@ final class SegmentTermsEnum extends TermsEnum {
    return true;
  }

+  /*
  // for debugging
  @SuppressWarnings("unused")
  static String brToString(BytesRef b) {
@ -307,8 +312,15 @@ final class SegmentTermsEnum extends TermsEnum {
    }
  }

+  // for debugging
+  @SuppressWarnings("unused")
+  static String brToString(BytesRefBuilder b) {
+    return brToString(b.get());
+  }
+  */
+
  @Override
-  public boolean seekExact(final BytesRef target) throws IOException {
+  public boolean seekExact(BytesRef target) throws IOException {

    if (fr.index == null) {
      throw new IllegalStateException("terms index was not loaded");
@ -565,7 +577,8 @@ final class SegmentTermsEnum extends TermsEnum {
  }

  @Override
-  public SeekStatus seekCeil(final BytesRef target) throws IOException {
+  public SeekStatus seekCeil(BytesRef target) throws IOException {
+
    if (fr.index == null) {
      throw new IllegalStateException("terms index was not loaded");
    }
@ -575,7 +588,7 @@ final class SegmentTermsEnum extends TermsEnum {
    assert clearEOF();

    // if (DEBUG) {
-    //   System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=  " + validIndexPrefix);
+    //   System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=  " + validIndexPrefix);
    //   printSeekState(System.out);
    // }

@ -617,7 +630,7 @@ final class SegmentTermsEnum extends TermsEnum {
      while (targetUpto < targetLimit) {
        cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
        //if (DEBUG) {
-        //System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
+        //System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")"   + " arc.output=" + arc.output + " output=" + output);
        //}
        if (cmp != 0) {
          break;
@ -647,7 +660,7 @@ final class SegmentTermsEnum extends TermsEnum {
        while (targetUpto < targetLimit2) {
          cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
          //if (DEBUG) {
-          //System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+          //System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")");
          //}
          if (cmp != 0) {
            break;
@ -733,7 +746,7 @@ final class SegmentTermsEnum extends TermsEnum {

        // Index is exhausted
        // if (DEBUG) {
-        //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
+        //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + targetLabel);
        // }
            
        validIndexPrefix = currentFrame.prefix;
@ -743,6 +756,7 @@ final class SegmentTermsEnum extends TermsEnum {

        currentFrame.loadBlock();

+        //if (DEBUG) System.out.println("  now scanToTerm");
        final SeekStatus result = currentFrame.scanToTerm(target, false);
        if (result == SeekStatus.END) {
          term.copyBytes(target);
@ -750,7 +764,7 @@ final class SegmentTermsEnum extends TermsEnum {

          if (next() != null) {
            //if (DEBUG) {
-            //System.out.println("  return NOT_FOUND term=" + brToString(term) + " " + term);
+            //System.out.println("  return NOT_FOUND term=" + brToString(term));
            //}
            return SeekStatus.NOT_FOUND;
          } else {
@ -761,7 +775,7 @@ final class SegmentTermsEnum extends TermsEnum {
          }
        } else {
          //if (DEBUG) {
-          //System.out.println("  return " + result + " term=" + brToString(term) + " " + term);
+          //System.out.println("  return " + result + " term=" + brToString(term));
          //}
          return result;
        }
@ -776,7 +790,7 @@ final class SegmentTermsEnum extends TermsEnum {
        }

        //if (DEBUG) {
-        //System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+        //System.out.println("    index: follow label=" + (target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
        //}
        targetUpto++;

@ -802,7 +816,7 @@ final class SegmentTermsEnum extends TermsEnum {
      termExists = false;
      if (next() != null) {
        //if (DEBUG) {
-        //System.out.println("  return NOT_FOUND term=" + term.utf8ToString() + " " + term);
+        //System.out.println("  return NOT_FOUND term=" + term.get().utf8ToString() + " " + term);
        //}
        return SeekStatus.NOT_FOUND;
      } else {
@ -906,7 +920,9 @@ final class SegmentTermsEnum extends TermsEnum {
    // Pop finished blocks
    while (currentFrame.nextEnt == currentFrame.entCount) {
      if (!currentFrame.isLastInFloor) {
+        // Advance to next floor block
        currentFrame.loadNextFloorBlock();
+        break;
      } else {
        //if (DEBUG) System.out.println("  pop frame");
        if (currentFrame.ord == 0) {
@ -946,11 +962,9 @@ final class SegmentTermsEnum extends TermsEnum {
        // This is a "next" frame -- even if it's
        // floor'd we must pretend it isn't so we don't
        // try to scan to the right floor frame:
-        currentFrame.isFloor = false;
-        //currentFrame.hasTerms = true;
        currentFrame.loadBlock();
      } else {
-        //if (DEBUG) System.out.println("  return term=" + term.utf8ToString() + " " + term + " currentFrame.ord=" + currentFrame.ord);
+        //if (DEBUG) System.out.println("  return term=" + brToString(term) + " currentFrame.ord=" + currentFrame.ord);
        return term.get();
      }
    }
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java
@ -37,6 +37,10 @@ final class SegmentTermsEnumFrame {

  FST.Arc<BytesRef> arc;

+  final boolean versionAutoPrefix;
+
+  //static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
  // File pointer where this block was loaded from
  long fp;
  long fpOrig;
@ -96,6 +100,7 @@ final class SegmentTermsEnumFrame {
    this.state = ste.fr.parent.postingsReader.newTermState();
    this.state.totalTermFreq = -1;
    this.longs = new long[ste.fr.longsSize];
+    this.versionAutoPrefix = ste.fr.parent.version >= BlockTreeTermsReader.VERSION_AUTO_PREFIX_TERMS;
  }

  public void setFloorData(ByteArrayDataInput in, BytesRef source) {
@ -262,12 +267,17 @@ final class SegmentTermsEnumFrame {
    */
  }

-  public boolean next() {
-    return isLeafBlock ? nextLeaf() : nextNonLeaf();
+  // Decodes next entry; returns true if it's a sub-block
+  public boolean next() throws IOException {
+    if (isLeafBlock) {
+      nextLeaf();
+      return false;
+    } else {
+      return nextNonLeaf();
+    }
  }

-  // Decodes next entry; returns true if it's a sub-block
-  public boolean nextLeaf() {
+  public void nextLeaf() {
    //if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
    assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
    nextEnt++;
@ -276,36 +286,78 @@ final class SegmentTermsEnumFrame {
    ste.term.setLength(prefix + suffix);
    ste.term.grow(ste.term.length());
    suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
-    // A normal term
    ste.termExists = true;
-    return false;
  }

-  public boolean nextNonLeaf() {
-    //if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
-    assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
-    nextEnt++;
-    final int code = suffixesReader.readVInt();
-    suffix = code >>> 1;
-    startBytePos = suffixesReader.getPosition();
-    ste.term.setLength(prefix + suffix);
-    ste.term.grow(ste.term.length());
-    suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
-    if ((code & 1) == 0) {
-      // A normal term
-      ste.termExists = true;
-      subCode = 0;
-      state.termBlockOrd++;
-      return false;
-    } else {
-      // A sub-block; make sub-FP absolute:
-      ste.termExists = false;
-      subCode = suffixesReader.readVLong();
-      lastSubFP = fp - subCode;
-      //if (DEBUG) {
-      //System.out.println("    lastSubFP=" + lastSubFP);
-      //}
-      return true;
+  public boolean nextNonLeaf() throws IOException {
+    //if (DEBUG) System.out.println("  stef.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + suffixesReader.getPosition());
+    while (true) {
+      if (nextEnt == entCount) {
+        assert arc == null || (isFloor && isLastInFloor == false): "isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor;
+        loadNextFloorBlock();
+        if (isLeafBlock) {
+          nextLeaf();
+          return false;
+        } else {
+          continue;
+        }
+      }
+        
+      assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+      nextEnt++;
+      final int code = suffixesReader.readVInt();
+      if (versionAutoPrefix == false) {
+        suffix = code >>> 1;
+      } else {
+        suffix = code >>> 2;
+      }
+      startBytePos = suffixesReader.getPosition();
+      ste.term.setLength(prefix + suffix);
+      ste.term.grow(ste.term.length());
+      suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
+      if (versionAutoPrefix == false) {
+        if ((code & 1) == 0) {
+          // A normal term
+          ste.termExists = true;
+          subCode = 0;
+          state.termBlockOrd++;
+          return false;
+        } else {
+          // A sub-block; make sub-FP absolute:
+          ste.termExists = false;
+          subCode = suffixesReader.readVLong();
+          lastSubFP = fp - subCode;
+          //if (DEBUG) {
+          //System.out.println("    lastSubFP=" + lastSubFP);
+          //}
+          return true;
+        }
+      } else {
+
+        switch(code & 3) {
+        case 0:
+          // A normal term
+          ste.termExists = true;
+          subCode = 0;
+          state.termBlockOrd++;
+          return false;
+        case 1:
+          // A sub-block; make sub-FP absolute:
+          ste.termExists = false;
+          subCode = suffixesReader.readVLong();
+          lastSubFP = fp - subCode;
+          //if (DEBUG) {
+          //System.out.println("    lastSubFP=" + lastSubFP);
+          //}
+          return true;
+        case 2:
+        case 3:
+          // A prefix term: skip it
+          state.termBlockOrd++;
+          suffixesReader.readByte();
+          continue;
+        }
+      }
    }
  }
        
@ -448,18 +500,38 @@ final class SegmentTermsEnumFrame {
      assert nextEnt < entCount;
      nextEnt++;
      final int code = suffixesReader.readVInt();
-      suffixesReader.skipBytes(isLeafBlock ? code : code >>> 1);
-      //if (DEBUG) System.out.println("    " + nextEnt + " (of " + entCount + ") ent isSubBlock=" + ((code&1)==1));
-      if ((code & 1) != 0) {
-        final long subCode = suffixesReader.readVLong();
-        //if (DEBUG) System.out.println("      subCode=" + subCode);
-        if (targetSubCode == subCode) {
-          //if (DEBUG) System.out.println("        match!");
-          lastSubFP = subFP;
-          return;
+      if (versionAutoPrefix == false) {
+        suffixesReader.skipBytes(code >>> 1);
+        if ((code & 1) != 0) {
+          final long subCode = suffixesReader.readVLong();
+          if (targetSubCode == subCode) {
+            //if (DEBUG) System.out.println("        match!");
+            lastSubFP = subFP;
+            return;
+          }
+        } else {
+          state.termBlockOrd++;
        }
      } else {
-        state.termBlockOrd++;
+        int flag = code & 3;
+        suffixesReader.skipBytes(code >>> 2);
+        //if (DEBUG) System.out.println("    " + nextEnt + " (of " + entCount + ") ent isSubBlock=" + ((code&1)==1));
+        if (flag == 1) {
+          // Sub-block
+          final long subCode = suffixesReader.readVLong();
+          //if (DEBUG) System.out.println("      subCode=" + subCode);
+          if (targetSubCode == subCode) {
+            //if (DEBUG) System.out.println("        match!");
+            lastSubFP = subFP;
+            return;
+          }
+        } else {
+          state.termBlockOrd++;
+          if (flag == 2 || flag == 3) {
+            // Floor'd prefix term
+            suffixesReader.readByte();
+          }
+        }
      }
    }
  }
@ -473,6 +545,21 @@ final class SegmentTermsEnumFrame {
  private int suffix;
  private long subCode;

+  // for debugging
+  /*
+  @SuppressWarnings("unused")
+  static String brToString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      // If BytesRef isn't actually UTF8, or it's eg a
+      // prefix of UTF8 that ends mid-unicode-char, we
+      // fallback to hex:
+      return b.toString();
+    }
+  }
+  */
+
  // Target's prefix matches this block's prefix; we
  // scan the entries check if the suffix matches.
  public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
@ -535,9 +622,6 @@ final class SegmentTermsEnumFrame {
          // keep scanning

          if (nextEnt == entCount) {
-            if (exactOnly) {
-              fillTerm();
-            }
            // We are done scanning this block
            break nextTerm;
          } else {
@ -590,7 +674,7 @@ final class SegmentTermsEnumFrame {
  // scan the entries check if the suffix matches.
  public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {

-    //if (DEBUG) System.out.println("    scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
+    //if (DEBUG) System.out.println("    scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(target));

    assert nextEnt != -1;

@ -605,30 +689,60 @@ final class SegmentTermsEnumFrame {
    assert prefixMatches(target);

    // Loop over each entry (term or sub-block) in this block:
-    //nextTerm: while(nextEnt < entCount) {
-    nextTerm: while (true) {
+    nextTerm: while(nextEnt < entCount) {
+
      nextEnt++;

      final int code = suffixesReader.readVInt();
-      suffix = code >>> 1;
-      // if (DEBUG) {
-      //   BytesRef suffixBytesRef = new BytesRef();
-      //   suffixBytesRef.bytes = suffixBytes;
-      //   suffixBytesRef.offset = suffixesReader.getPosition();
-      //   suffixBytesRef.length = suffix;
-      //   System.out.println("      cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
-      // }
+      if (versionAutoPrefix == false) {
+        suffix = code >>> 1;
+      } else {
+        suffix = code >>> 2;
+      }
+
+      //if (DEBUG) {
+      //  BytesRef suffixBytesRef = new BytesRef();
+      //  suffixBytesRef.bytes = suffixBytes;
+      //  suffixBytesRef.offset = suffixesReader.getPosition();
+      //  suffixBytesRef.length = suffix;
+      //  System.out.println("      cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
+      //}

-      ste.termExists = (code & 1) == 0;
      final int termLen = prefix + suffix;
      startBytePos = suffixesReader.getPosition();
      suffixesReader.skipBytes(suffix);
-      if (ste.termExists) {
-        state.termBlockOrd++;
-        subCode = 0;
+      if (versionAutoPrefix == false) {
+        ste.termExists = (code & 1) == 0;
+        if (ste.termExists) {
+          state.termBlockOrd++;
+          subCode = 0;
+        } else {
+          subCode = suffixesReader.readVLong();
+          lastSubFP = fp - subCode;
+        }
      } else {
-        subCode = suffixesReader.readVLong();
-        lastSubFP = fp - subCode;
+        switch (code & 3) {
+        case 0:
+          // Normal term
+          ste.termExists = true;
+          state.termBlockOrd++;
+          subCode = 0;
+          break;
+        case 1:
+          // Sub-block
+          ste.termExists = false;
+          subCode = suffixesReader.readVLong();
+          lastSubFP = fp - subCode;
+          break;
+        case 2:
+        case 3:
+          // Floor prefix term: skip it
+          //if (DEBUG) System.out.println("        skip floor prefix term");
+          suffixesReader.readByte();
+          ste.termExists = false;
+          state.termBlockOrd++;
+          continue;
+        }
      }

      final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
@ -637,7 +751,7 @@ final class SegmentTermsEnumFrame {
      // Loop over bytes in the suffix, comparing to
      // the target
      int bytePos = startBytePos;
-      while(true) {
+      while (true) {
        final int cmp;
        final boolean stop;
        if (targetPos < targetLimit) {
@ -652,24 +766,18 @@ final class SegmentTermsEnumFrame {
        if (cmp < 0) {
          // Current entry is still before the target;
          // keep scanning
-
-          if (nextEnt == entCount) {
-            if (exactOnly) {
-              fillTerm();
-              //termExists = true;
-            }
-            // We are done scanning this block
-            break nextTerm;
-          } else {
-            continue nextTerm;
-          }
+          continue nextTerm;
        } else if (cmp > 0) {

          // Done!  Current entry is after target --
          // return NOT_FOUND:
          fillTerm();

+          //if (DEBUG) System.out.println("        maybe done exactOnly=" + exactOnly + " ste.termExists=" + ste.termExists);
+
          if (!exactOnly && !ste.termExists) {
+            //System.out.println("  now pushFrame");
+            // TODO this 
            // We are on a sub-block, and caller wants
            // us to position to the next term after
            // the target, so we must recurse into the
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
@ -48,6 +48,8 @@ public class Stats {
  /** Total number of bytes (sum of term lengths) across all terms in the field. */
  public long totalTermBytes;

+  // TODO: add total auto-prefix term count
+
  /** The number of normal (non-floor) blocks in the terms file. */
  public int nonFloorBlockCount;

--- a/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
@ -43,9 +43,9 @@ import org.apache.lucene.util.automaton.Transition;
 * completely accepted. This is not possible when the language accepted by the
 * FSM is not finite (i.e. * operator).
 * </p>
- * @lucene.experimental
+ * @lucene.internal
 */
-class AutomatonTermsEnum extends FilteredTermsEnum {
+public class AutomatonTermsEnum extends FilteredTermsEnum {
  // a tableized array-based form of the DFA
  private final ByteRunAutomaton runAutomaton;
  // common suffix of the automaton
@ -70,9 +70,8 @@ class AutomatonTermsEnum extends FilteredTermsEnum {
  /**
   * Construct an enumerator based upon an automaton, enumerating the specified
   * field, working on a supplied TermsEnum
-   * <p>
+   *
   * @lucene.experimental 
-   * <p>
   * @param compiled CompiledAutomaton
   */
  public AutomatonTermsEnum(TermsEnum tenum, CompiledAutomaton compiled) {
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -25,7 +25,9 @@ import java.nio.file.Paths;
 import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Deque;
 import java.util.HashMap;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@ -56,6 +58,8 @@ import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LongBitSet;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.CompiledAutomaton;

 /**
 * Basic tool and API to check the health of an index and
@ -902,6 +906,180 @@ public class CheckIndex implements Closeable {
    return status;
  }

+  /** Visits all terms in the range minTerm (inclusive) to maxTerm (exclusive), marking all doc IDs encountered into allDocsSeen, and
+   *  returning the total number of terms visited. */
+  private static long getDocsFromTermRange(String field, int maxDoc, TermsEnum termsEnum, FixedBitSet docsSeen, BytesRef minTerm, BytesRef maxTerm, boolean isIntersect) throws IOException {
+    docsSeen.clear(0, docsSeen.length());
+
+    long termCount = 0;
+    PostingsEnum postingsEnum = null;
+    BytesRefBuilder lastTerm = null;
+    while (true) {
+      BytesRef term;
+
+      // Kinda messy: for intersect, we must first next(), but for "normal", we are already on our first term:
+      if (isIntersect || termCount != 0) {
+        term = termsEnum.next();
+      } else {
+        term = termsEnum.term();
+      }
+
+      if (term == null) {
+        if (isIntersect == false) {
+          throw new RuntimeException("didn't see max term field=" + field + " term=" + maxTerm);
+        }
+        return termCount;
+      }
+
+      assert term.isValid();
+        
+      if (lastTerm == null) {
+        lastTerm = new BytesRefBuilder();
+        lastTerm.copyBytes(term);
+      } else {
+        if (lastTerm.get().compareTo(term) >= 0) {
+          throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term);
+        }
+        lastTerm.copyBytes(term);
+      }
+
+      //System.out.println("    term=" + term);
+
+      // Caller already ensured terms enum positioned >= minTerm:
+      if (term.compareTo(minTerm) < 0) {
+        throw new RuntimeException("saw term before min term field=" + field + " term=" + minTerm);
+      }
+
+      if (isIntersect == false) {
+        int cmp = term.compareTo(maxTerm);
+        if (cmp == 0) {
+          // Done!
+          return termCount;
+        } else if (cmp > 0) {
+          throw new RuntimeException("didn't see end term field=" + field + " term=" + maxTerm);
+        }
+      }
+
+      postingsEnum = termsEnum.postings(null, postingsEnum, 0);
+
+      int lastDoc = -1;
+      while (true) {
+        int doc = postingsEnum.nextDoc();
+        if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+          break;
+        }
+        if (doc <= lastDoc) {
+          throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
+        }
+        if (doc >= maxDoc) {
+          throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
+        }
+
+        //System.out.println("      doc=" + doc);
+        docsSeen.set(doc);
+
+        lastDoc = doc;
+      }
+
+      termCount++;
+    }
+  }
+
+  /** Test Terms.intersect on this range, and validates that it returns the same doc ids as using non-intersect TermsEnum.  Returns true if
+   *  any fake terms were seen. */
+  private static boolean checkSingleTermRange(String field, int maxDoc, Terms terms, BytesRef minTerm, BytesRef maxTerm, FixedBitSet normalDocs, FixedBitSet intersectDocs) throws IOException {
+    // System.out.println("  check minTerm=" + minTerm + " maxTerm=" + maxTerm);
+
+    TermsEnum termsEnum = terms.iterator(null);
+    TermsEnum.SeekStatus status = termsEnum.seekCeil(minTerm);
+    if (status != TermsEnum.SeekStatus.FOUND) {
+      throw new RuntimeException("failed to seek to existing term field=" + field + " term=" + minTerm);
+    }
+
+    // Do "dumb" iteration to visit all terms in the range:
+    long normalTermCount = getDocsFromTermRange(field, maxDoc, termsEnum, normalDocs, minTerm, maxTerm, false);
+
+    // Now do the same operation using intersect:
+    long intersectTermCount = getDocsFromTermRange(field, maxDoc, terms.intersect(new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, true, maxTerm, false), true, false, Integer.MAX_VALUE, true), null), intersectDocs, minTerm, maxTerm, true);
+
+    if (intersectTermCount > normalTermCount) {
+      throw new RuntimeException("intersect returned too many terms: field=" + field + " intersectTermCount=" + intersectTermCount + " normalTermCount=" + normalTermCount);
+    }
+
+    if (normalDocs.equals(intersectDocs) == false) {
+      throw new RuntimeException("intersect visited different docs than straight terms enum: " + normalDocs.cardinality() + " for straight enum, vs " + intersectDocs.cardinality() + " for intersect, minTerm=" + minTerm + " maxTerm=" + maxTerm);
+    }
+    //System.out.println("    " + intersectTermCount + " vs " + normalTermCount);
+    return intersectTermCount != normalTermCount;
+  }
+
+  /** Make an effort to visit "fake" (e.g. auto-prefix) terms.  We do this by running term range intersections across an initially wide
+   *  interval of terms, at different boundaries, and then gradually decrease the interval.  This is not guaranteed to hit all non-real
+   *  terms (doing that in general is non-trivial), but it should hit many of them, and validate their postings against the postings for the
+   *  real terms. */
+  private static void checkTermRanges(String field, int maxDoc, Terms terms, long numTerms) throws IOException {
+
+    // We'll target this many terms in our interval for the current level:
+    double currentInterval = numTerms;
+
+    FixedBitSet normalDocs = new FixedBitSet(maxDoc);
+    FixedBitSet intersectDocs = new FixedBitSet(maxDoc);
+
+    TermsEnum termsEnum = null;
+    //System.out.println("CI.checkTermRanges field=" + field + " numTerms=" + numTerms);
+
+    while (currentInterval >= 10.0) {
+      //System.out.println("  cycle interval=" + currentInterval);
+
+      // We iterate this terms enum to locate min/max term for each sliding/overlapping interval we test at the current level:
+      termsEnum = terms.iterator(termsEnum);
+
+      long termCount = 0;
+
+      Deque<BytesRef> termBounds = new LinkedList<>();
+
+      long lastTermAdded = Long.MIN_VALUE;
+
+      BytesRefBuilder lastTerm = null;
+
+      while (true) {
+        BytesRef term = termsEnum.next();
+        if (term == null) {
+          break;
+        }
+        //System.out.println("  top: term=" + term.utf8ToString());
+        if (termCount >= lastTermAdded + currentInterval/4) {
+          termBounds.add(BytesRef.deepCopyOf(term));
+          lastTermAdded = termCount;
+          if (termBounds.size() == 5) {
+            BytesRef minTerm = termBounds.removeFirst();
+            BytesRef maxTerm = termBounds.getLast();
+            checkSingleTermRange(field, maxDoc, terms, minTerm, maxTerm, normalDocs, intersectDocs);
+          }
+        }
+        termCount++;
+
+        if (lastTerm == null) {
+          lastTerm = new BytesRefBuilder();
+          lastTerm.copyBytes(term);
+        } else {
+          if (lastTerm.get().compareTo(term) >= 0) {
+            throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term);
+          }
+          lastTerm.copyBytes(term);
+        }
+      }
+
+      if (lastTerm != null && termBounds.isEmpty() == false) {
+        BytesRef minTerm = termBounds.removeFirst();
+        BytesRef maxTerm = lastTerm.get();
+        checkSingleTermRange(field, maxDoc, terms, minTerm, maxTerm, normalDocs, intersectDocs);
+      }
+
+      currentInterval *= .75;
+    }
+  }
+
  /**
   * checks Fields api is consistent with itself.
   * searcher is optional, to verify with queries. Can be null.
@ -923,6 +1101,7 @@ public class CheckIndex implements Closeable {
    
    String lastField = null;
    for (String field : fields) {
+
      // MultiFieldsEnum relies upon this order...
      if (lastField != null && field.compareTo(lastField) <= 0) {
        throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field);
@ -1032,6 +1211,7 @@ public class CheckIndex implements Closeable {
        if (term == null) {
          break;
        }
+        // System.out.println("CI: field=" + field + " check term=" + term + " docFreq=" + termsEnum.docFreq());
        
        assert term.isValid();
        
@ -1332,6 +1512,14 @@ public class CheckIndex implements Closeable {
        // docs got deleted and then merged away):
        
      } else {
+
+        long fieldTermCount = (status.delTermCount+status.termCount)-termCountStart;
+
+        if (hasFreqs == false) {
+          // For DOCS_ONLY fields we recursively test term ranges:
+          checkTermRanges(field, maxDoc, fieldTerms, fieldTermCount);
+        }
+
        final Object stats = fieldTerms.getStats();
        assert stats != null;
        if (status.blockTreeStats == null) {
@ -1353,11 +1541,9 @@ public class CheckIndex implements Closeable {
          }
        }
        
-        if (fieldTerms != null) {
-          final int v = fieldTerms.getDocCount();
-          if (v != -1 && visitedDocs.cardinality() != v) {
-            throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
-          }
+        final int v = fieldTerms.getDocCount();
+        if (v != -1 && visitedDocs.cardinality() != v) {
+          throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
        }
        
        // Test seek to last term:
@ -1365,6 +1551,9 @@ public class CheckIndex implements Closeable {
          if (termsEnum.seekCeil(lastTerm.get()) != TermsEnum.SeekStatus.FOUND) { 
            throw new RuntimeException("seek to last term " + lastTerm + " failed");
          }
+          if (termsEnum.term().equals(lastTerm.get()) == false) {
+            throw new RuntimeException("seek to last term " + lastTerm.get() + " returned FOUND but seeked to the wrong term " + termsEnum.term());
+          }
          
          int expectedDocFreq = termsEnum.docFreq();
          PostingsEnum d = termsEnum.postings(null, null, PostingsEnum.NONE);
@ -1373,18 +1562,18 @@ public class CheckIndex implements Closeable {
            docFreq++;
          }
          if (docFreq != expectedDocFreq) {
-            throw new RuntimeException("docFreq for last term " + lastTerm + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq);
+            throw new RuntimeException("docFreq for last term " + lastTerm.toBytesRef() + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq);
          }
        }
        
        // check unique term count
        long termCount = -1;
        
-        if ((status.delTermCount+status.termCount)-termCountStart > 0) {
+        if (fieldTermCount > 0) {
          termCount = fields.terms(field).size();
          
-          if (termCount != -1 && termCount != status.delTermCount + status.termCount - termCountStart) {
-            throw new RuntimeException("termCount mismatch " + (status.delTermCount + termCount) + " vs " + (status.termCount - termCountStart));
+          if (termCount != -1 && termCount != fieldTermCount) {
+            throw new RuntimeException("termCount mismatch " + termCount + " vs " + fieldTermCount);
          }
        }

@ -1407,6 +1596,9 @@ public class CheckIndex implements Closeable {
              if (termsEnum.seekCeil(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) {
                throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
              }
+              if (termsEnum.term().equals(seekTerms[i]) == false) {
+                throw new RuntimeException("seek to existing term " + seekTerms[i] + " returned FOUND but seeked to the wrong term " + termsEnum.term());
+              }
              
              docs = termsEnum.postings(liveDocs, docs, PostingsEnum.NONE);
              if (docs == null) {
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
@ -151,7 +151,6 @@ class FreqProxFields extends Fields {
    }

    public SeekStatus seekCeil(BytesRef text) {
-
      // TODO: we could instead keep the BytesRefHash
      // intact so this is a hash lookup

@ -170,17 +169,19 @@ class FreqProxFields extends Fields {
        } else {
          // found:
          ord = mid;
+          assert term().compareTo(text) == 0;
          return SeekStatus.FOUND;
        }
      }

      // not found:
-      ord = lo + 1;
+      ord = lo;
      if (ord >= numTerms) {
        return SeekStatus.END;
      } else {
        int textStart = postingsArray.textStarts[sortedTermIDs[ord]];
        terms.bytePool.setBytesRef(scratch, textStart);
+        assert term().compareTo(text) > 0;
        return SeekStatus.NOT_FOUND;
      }
    }
@ -309,7 +310,7 @@ class FreqProxFields extends Fields {
    final FreqProxPostingsArray postingsArray;
    final ByteSliceReader reader = new ByteSliceReader();
    final boolean readTermFreq;
-    int docID;
+    int docID = -1;
    int freq;
    boolean ended;
    int termID;
@ -324,7 +325,7 @@ class FreqProxFields extends Fields {
      this.termID = termID;
      terms.initReader(reader, termID, 0);
      ended = false;
-      docID = 0;
+      docID = -1;
    }

    @Override
@ -365,6 +366,9 @@ class FreqProxFields extends Fields {

    @Override
    public int nextDoc() throws IOException {
+      if (docID == -1) {
+        docID = 0;
+      }
      if (reader.eof()) {
        if (ended) {
          return NO_MORE_DOCS;
@ -412,7 +416,7 @@ class FreqProxFields extends Fields {
    final ByteSliceReader reader = new ByteSliceReader();
    final ByteSliceReader posReader = new ByteSliceReader();
    final boolean readOffsets;
-    int docID;
+    int docID = -1;
    int freq;
    int pos;
    int startOffset;
@ -436,7 +440,7 @@ class FreqProxFields extends Fields {
      terms.initReader(reader, termID, 0);
      terms.initReader(posReader, termID, 1);
      ended = false;
-      docID = 0;
+      docID = -1;
      posLeft = 0;
    }

@ -452,6 +456,9 @@ class FreqProxFields extends Fields {

    @Override
    public int nextDoc() throws IOException {
+      if (docID == -1) {
+        docID = 0;
+      }
      while (posLeft != 0) {
        nextPosition();
      }
--- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
@ -49,6 +49,7 @@ final class MappingMultiPostingsEnum extends PostingsEnum {
    this.numSubs = postingsEnum.getNumSubs();
    this.subs = postingsEnum.getSubs();
    upto = -1;
+    doc = -1;
    current = null;
    this.multiDocsAndPositionsEnum = postingsEnum;
    return this;
--- a/lucene/core/src/java/org/apache/lucene/index/TermContext.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermContext.java
@ -17,6 +17,7 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

+import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.util.BytesRef;

 import java.io.IOException;
@ -165,4 +166,30 @@ public final class TermContext {
  public void setDocFreq(int docFreq) {
    this.docFreq = docFreq;
  }
+
+  /** Returns true if all terms stored here are real (e.g., not auto-prefix terms).
+   *
+   *  @lucene.internal */
+  public boolean hasOnlyRealTerms() {
+    for(TermState termState : states) {
+      if (termState instanceof BlockTermState && ((BlockTermState) termState).isRealTerm == false) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("TermContext\n");
+    for(TermState termState : states) {
+      sb.append("  state=");
+      sb.append(termState.toString());
+      sb.append('\n');
+    }
+
+    return sb.toString();
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/index/Terms.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Terms.java
@ -19,6 +19,7 @@ package org.apache.lucene.index;

 import java.io.IOException;

+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
@ -42,17 +43,23 @@ public abstract class Terms {
   *  implementation can do so. */
  public abstract TermsEnum iterator(TermsEnum reuse) throws IOException;

-  /** Returns a TermsEnum that iterates over all terms that
-   *  are accepted by the provided {@link
+  /** Returns a TermsEnum that iterates over all terms and
+   *  documents that are accepted by the provided {@link
   *  CompiledAutomaton}.  If the <code>startTerm</code> is
-   *  provided then the returned enum will only accept terms
+   *  provided then the returned enum will only return terms
   *  {@code > startTerm}, but you still must call
   *  next() first to get to the first term.  Note that the
   *  provided <code>startTerm</code> must be accepted by
   *  the automaton.
   *
   * <p><b>NOTE</b>: the returned TermsEnum cannot
-   * seek</p>. */
+   * seek</p>.
+   *
+   *  <p><b>NOTE</b>: the terms dictionary is free to
+   *  return arbitrary terms as long as the resulted visited
+   *  docs is the same.  E.g., {@link BlockTreeTermsWriter}
+   *  creates auto-prefix terms during indexing to reduce the
+   *  number of terms visited. */
  public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
    
    // TODO: could we factor out a common interface b/w
@ -64,13 +71,17 @@ public abstract class Terms {
    // TODO: eventually we could support seekCeil/Exact on
    // the returned enum, instead of only being able to seek
    // at the start
+
+    TermsEnum termsEnum = iterator(null);
+
    if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
      throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
    }
+
    if (startTerm == null) {
-      return new AutomatonTermsEnum(iterator(null), compiled);
+      return new AutomatonTermsEnum(termsEnum, compiled);
    } else {
-      return new AutomatonTermsEnum(iterator(null), compiled) {
+      return new AutomatonTermsEnum(termsEnum, compiled) {
        @Override
        protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
          if (term == null) {
--- a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
@ -99,6 +99,7 @@ public class AutomatonQuery extends MultiTermQuery {
    super(term.field());
    this.term = term;
    this.automaton = automaton;
+    // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
    this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
  }

--- a/lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java
@ -17,12 +17,7 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

-import java.io.IOException;
-
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.util.automaton.Automaton;
@ -33,6 +28,7 @@ import org.apache.lucene.util.automaton.Automaton;
 * <p>This query uses the {@link
 * MultiTermQuery#CONSTANT_SCORE_REWRITE}
 * rewrite method. */
+
 public class PrefixQuery extends AutomatonQuery {

  /** Constructs a query for terms starting with <code>prefix</code>. */
--- a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
@ -18,19 +18,19 @@ package org.apache.lucene.search;
 */

 import java.io.IOException;
+
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
-
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;

 /** 
 * Base rewrite method that translates each term into a query, and keeps
@ -112,7 +112,7 @@ public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewr
      for (int i = 0; i < size; i++) {
        final int pos = sort[i];
        final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
-        assert reader.docFreq(term) == termStates[pos].docFreq();
+        assert termStates[pos].hasOnlyRealTerms() == false || reader.docFreq(term) == termStates[pos].docFreq();
        addClause(result, term, termStates[pos].docFreq(), query.getBoost() * boost[pos], termStates[pos]);
      }
    }
@ -137,7 +137,7 @@ public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewr
      final int e = terms.add(bytes);
      final TermState state = termsEnum.termState();
      assert state != null; 
-      if (e < 0 ) {
+      if (e < 0) {
        // duplicate term: update docFreq
        final int pos = (-e)-1;
        array.termState[pos].register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
--- a/lucene/core/src/java/org/apache/lucene/search/TermRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermRangeQuery.java
@ -17,22 +17,17 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

-import java.io.IOException;
-
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Automaton;

 /**
 * A Query that matches documents within an range of terms.
 *
 * <p>This query matches the documents looking for terms that fall into the
- * supplied range according to {@link
- * Byte#compareTo(Byte)}. It is not intended
- * for numerical ranges; use {@link NumericRangeQuery} instead.
+ * supplied range according to {@link BytesRef#compareTo(BytesRef)}.
 *
 * <p>This query uses the {@link
 * MultiTermQuery#CONSTANT_SCORE_REWRITE}
@ -40,12 +35,11 @@ import org.apache.lucene.util.ToStringUtils;
 * @since 2.9
 */

-public class TermRangeQuery extends MultiTermQuery {
-  private BytesRef lowerTerm;
-  private BytesRef upperTerm;
-  private boolean includeLower;
-  private boolean includeUpper;
-
+public class TermRangeQuery extends AutomatonQuery {
+  private final BytesRef lowerTerm;
+  private final BytesRef upperTerm;
+  private final boolean includeLower;
+  private final boolean includeUpper;

  /**
   * Constructs a query selecting all terms greater/equal than <code>lowerTerm</code>
@ -70,13 +64,28 @@ public class TermRangeQuery extends MultiTermQuery {
   *          included in the range.
   */
  public TermRangeQuery(String field, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {
-    super(field);
+    super(new Term(field, lowerTerm), toAutomaton(lowerTerm, upperTerm, includeLower, includeUpper), Integer.MAX_VALUE, true);
    this.lowerTerm = lowerTerm;
    this.upperTerm = upperTerm;
    this.includeLower = includeLower;
    this.includeUpper = includeUpper;
  }

+  public static Automaton toAutomaton(BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {
+
+    if (lowerTerm == null) {
+      // makeBinaryInterval is more picky than we are:
+      includeLower = true;
+    }
+
+    if (upperTerm == null) {
+      // makeBinaryInterval is more picky than we are:
+      includeUpper = true;
+    }
+
+    return Automata.makeBinaryInterval(lowerTerm, includeLower, upperTerm, includeUpper);
+  }
+
  /**
   * Factory that creates a new TermRangeQuery using Strings for term text.
   */
@ -98,37 +107,22 @@ public class TermRangeQuery extends MultiTermQuery {
  /** Returns <code>true</code> if the upper endpoint is inclusive */
  public boolean includesUpper() { return includeUpper; }
  
-  @Override
-  protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
-    if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
-      return TermsEnum.EMPTY;
-    }
-    
-    TermsEnum tenum = terms.iterator(null);
-    
-    if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) {
-      return tenum;
-    }
-    return new TermRangeTermsEnum(tenum,
-        lowerTerm, upperTerm, includeLower, includeUpper);
-  }
-
  /** Prints a user-readable version of this query. */
  @Override
  public String toString(String field) {
-      StringBuilder buffer = new StringBuilder();
-      if (!getField().equals(field)) {
-          buffer.append(getField());
-          buffer.append(":");
-      }
-      buffer.append(includeLower ? '[' : '{');
-      // TODO: all these toStrings for queries should just output the bytes, it might not be UTF-8!
-      buffer.append(lowerTerm != null ? ("*".equals(Term.toString(lowerTerm)) ? "\\*" : Term.toString(lowerTerm))  : "*");
-      buffer.append(" TO ");
-      buffer.append(upperTerm != null ? ("*".equals(Term.toString(upperTerm)) ? "\\*" : Term.toString(upperTerm)) : "*");
-      buffer.append(includeUpper ? ']' : '}');
-      buffer.append(ToStringUtils.boost(getBoost()));
-      return buffer.toString();
+    StringBuilder buffer = new StringBuilder();
+    if (!getField().equals(field)) {
+      buffer.append(getField());
+      buffer.append(":");
+    }
+    buffer.append(includeLower ? '[' : '{');
+    // TODO: all these toStrings for queries should just output the bytes, it might not be UTF-8!
+    buffer.append(lowerTerm != null ? ("*".equals(Term.toString(lowerTerm)) ? "\\*" : Term.toString(lowerTerm))  : "*");
+    buffer.append(" TO ");
+    buffer.append(upperTerm != null ? ("*".equals(Term.toString(upperTerm)) ? "\\*" : Term.toString(upperTerm)) : "*");
+    buffer.append(includeUpper ? ']' : '}');
+    buffer.append(ToStringUtils.boost(getBoost()));
+    return buffer.toString();
  }

  @Override
@ -167,5 +161,4 @@ public class TermRangeQuery extends MultiTermQuery {
      return false;
    return true;
  }
-
 }
--- a/lucene/core/src/java/org/apache/lucene/search/TermRangeTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermRangeTermsEnum.java
@ -1,101 +0,0 @@
-package org.apache.lucene.search;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.FilteredTermsEnum;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * Subclass of FilteredTermEnum for enumerating all terms that match the
- * specified range parameters.  Each term in the enumeration is
- * greater than all that precede it.
- */
-public class TermRangeTermsEnum extends FilteredTermsEnum {
-
-  final private boolean includeLower;
-  final private boolean includeUpper;
-  final private BytesRef lowerBytesRef;
-  final private BytesRef upperBytesRef;
-
-  /**
-   * Enumerates all terms greater/equal than <code>lowerTerm</code>
-   * but less/equal than <code>upperTerm</code>. 
-   * 
-   * If an endpoint is null, it is said to be "open". Either or both 
-   * endpoints may be open.  Open endpoints may not be exclusive 
-   * (you can't select all but the first or last term without 
-   * explicitly specifying the term to exclude.)
-   * 
-   * @param tenum
-   *          TermsEnum to filter
-   * @param lowerTerm
-   *          The term text at the lower end of the range
-   * @param upperTerm
-   *          The term text at the upper end of the range
-   * @param includeLower
-   *          If true, the <code>lowerTerm</code> is included in the range.
-   * @param includeUpper
-   *          If true, the <code>upperTerm</code> is included in the range.
-   */
-  public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm, 
-    boolean includeLower, boolean includeUpper) {
-    super(tenum);
-
-    // do a little bit of normalization...
-    // open ended range queries should always be inclusive.
-    if (lowerTerm == null) {
-      this.lowerBytesRef = new BytesRef();
-      this.includeLower = true;
-    } else {
-      this.lowerBytesRef = lowerTerm;
-      this.includeLower = includeLower;
-    }
-
-    if (upperTerm == null) {
-      this.includeUpper = true;
-      upperBytesRef = null;
-    } else {
-      this.includeUpper = includeUpper;
-      upperBytesRef = upperTerm;
-    }
-
-    setInitialSeekTerm(lowerBytesRef);
-  }
-
-  @Override
-  protected AcceptStatus accept(BytesRef term) {
-    if (!this.includeLower && term.equals(lowerBytesRef))
-      return AcceptStatus.NO;
-    
-    // Use this field's default sort ordering
-    if (upperBytesRef != null) {
-      final int cmp = upperBytesRef.compareTo(term);
-      /*
-       * if beyond the upper term, or is exclusive and this is equal to
-       * the upper term, break out
-       */
-      if ((cmp < 0) ||
-          (!includeUpper && cmp==0)) {
-        return AcceptStatus.END;
-      }
-    }
-
-    return AcceptStatus.YES;
-  }
-}
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
@ -73,6 +73,18 @@ final public class Automata {
    return a;
  }

+  /**
+   * Returns a new (deterministic) automaton that accepts all binary terms.
+   */
+  public static Automaton makeAnyBinary() {
+    Automaton a = new Automaton();
+    int s = a.createState();
+    a.setAccept(s, true);
+    a.addTransition(s, s, 0, 255);
+    a.finishState();
+    return a;
+  }
+  
  /**
   * Returns a new (deterministic) automaton that accepts any single codepoint.
   */
@ -204,8 +216,172 @@ final public class Automata {
    return s;
  }

+  /** Creates a new deterministic, minimal automaton accepting
+   *  all binary terms in the specified interval.  Note that unlike
+   *  {@link #makeDecimalInterval}, the returned automaton is infinite,
+   *  because terms behave like floating point numbers leading with
+   *  a decimal point.  However, in the special case where min == max,
+   *  and both are inclusive, the automata will be finite and accept
+   *  exactly one term. */
+  public static Automaton makeBinaryInterval(BytesRef min, boolean minInclusive, BytesRef max, boolean maxInclusive) {
+
+    if (min == null && minInclusive == false) {
+      throw new IllegalArgumentException("minInclusive must be true when min is null (open ended)");
+    }
+
+    if (max == null && maxInclusive == false) {
+      throw new IllegalArgumentException("maxInclusive must be true when max is null (open ended)");
+    }
+
+    if (min != null && min.length == 0 && minInclusive == true) {
+      // Silly empty string corner case:
+      min = null;
+    }
+
+    if (min == null) {
+      if (max == null) {
+        // Accepts all terms:
+        return makeAnyBinary();
+      }
+      min = new BytesRef();
+      minInclusive = true;
+    }
+    int cmp;
+    if (max != null) {
+      cmp = min.compareTo(max);
+    } else {
+      cmp = -1;
+    }
+    if (cmp == 0) {
+      if (minInclusive == false || maxInclusive == false) {
+        return makeEmpty();
+      } else {
+        return makeBinary(min);
+      }
+    } else if (cmp > 0) {
+      // max > min
+      return makeEmpty();
+    }
+
+    Automaton a = new Automaton();
+    int startState = a.createState();
+    int sinkState = a.createState();
+    a.setAccept(sinkState, true);
+
+    // This state accepts all suffixes:
+    a.addTransition(sinkState, sinkState, 0, 255);
+
+    boolean equalPrefix = true;
+    int lastState = startState;
+    int firstMaxState = -1;
+    int sharedPrefixLength = 0;
+    for(int i=0;i<min.length;i++) {
+      int minLabel = min.bytes[min.offset+i] & 0xff;
+
+      int maxLabel;
+      if (max != null && equalPrefix && i < max.length) {
+        maxLabel = max.bytes[max.offset+i] & 0xff;
+      } else {
+        maxLabel = -1;
+      }
+
+      int nextState;
+      if (minInclusive && i == min.length-1 && (equalPrefix == false || minLabel != maxLabel)) {
+        nextState = sinkState;
+      } else {
+        nextState = a.createState();
+      }
+
+      if (equalPrefix) {
+
+        if (minLabel == maxLabel) {
+          // Still in shared prefix
+          a.addTransition(lastState, nextState, minLabel);
+        } else if (max == null) {
+          equalPrefix = false;
+          sharedPrefixLength = 0;
+          a.addTransition(lastState, sinkState, minLabel+1, 0xff);
+          a.addTransition(lastState, nextState, minLabel);
+        } else {
+          // This is the first point where min & max diverge:
+          assert maxLabel > minLabel;
+
+          a.addTransition(lastState, nextState, minLabel);
+
+          if (maxLabel > minLabel + 1) {
+            a.addTransition(lastState, sinkState, minLabel+1, maxLabel-1);
+          }
+
+          // Now fork off path for max:
+          if (maxInclusive || i < max.length-1) {
+            firstMaxState = a.createState();
+            if (i < max.length-1) {
+              a.setAccept(firstMaxState, true);
+            }
+            a.addTransition(lastState, firstMaxState, maxLabel);
+          }
+          equalPrefix = false;
+          sharedPrefixLength = i;
+        }
+      } else {
+        // OK, already diverged:
+        a.addTransition(lastState, nextState, minLabel);
+        if (minLabel < 255) {
+          a.addTransition(lastState, sinkState, minLabel+1, 255);
+        }
+      }
+      lastState = nextState;
+    }
+
+    // Accept any suffix appended to the min term:
+    if (equalPrefix == false && lastState != sinkState && lastState != startState) {
+      a.addTransition(lastState, sinkState, 0, 255);
+    }
+
+    if (minInclusive) {
+      // Accept exactly the min term:
+      a.setAccept(lastState, true);
+    }
+
+    if (max != null) {
+
+      // Now do max:
+      if (firstMaxState == -1) {
+        // Min was a full prefix of max
+        sharedPrefixLength = min.length;
+      } else {
+        lastState = firstMaxState;
+        sharedPrefixLength++;
+      }
+      for(int i=sharedPrefixLength;i<max.length;i++) {
+        int maxLabel = max.bytes[max.offset+i]&0xff;
+        if (maxLabel > 0) {
+          a.addTransition(lastState, sinkState, 0, maxLabel-1);
+        }
+        if (maxInclusive || i < max.length-1) {
+          int nextState = a.createState();
+          if (i < max.length-1) {
+            a.setAccept(nextState, true);
+          }
+          a.addTransition(lastState, nextState, maxLabel);
+          lastState = nextState;
+        }
+      }
+
+      if (maxInclusive) {
+        a.setAccept(lastState, true);
+      }
+    }
+
+    a.finishState();
+
+    assert a.isDeterministic(): a.toDot();
+
+    return a;
+  }
+
  /**
-   * Returns a new automaton that accepts strings representing decimal
+   * Returns a new automaton that accepts strings representing decimal (base 10)
   * non-negative integers in the given interval.
   * 
   * @param min minimal value of interval
@ -218,7 +394,7 @@ final public class Automata {
   *              interval cannot be expressed with the given fixed number of
   *              digits
   */
-  public static Automaton makeInterval(int min, int max, int digits)
+  public static Automaton makeDecimalInterval(int min, int max, int digits)
      throws IllegalArgumentException {
    String x = Integer.toString(min);
    String y = Integer.toString(max);
@ -275,7 +451,30 @@ final public class Automata {
    for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
      int state = a.createState();
      cp = s.codePointAt(i);
-      a.addTransition(lastState, state, cp, cp);
+      a.addTransition(lastState, state, cp);
+      lastState = state;
+    }
+
+    a.setAccept(lastState, true);
+    a.finishState();
+
+    assert a.isDeterministic();
+    assert Operations.hasDeadStates(a) == false;
+
+    return a;
+  }
+
+  /**
+   * Returns a new (deterministic) automaton that accepts the single given
+   * binary term.
+   */
+  public static Automaton makeBinary(BytesRef term) {
+    Automaton a = new Automaton();
+    int lastState = a.createState();
+    for (int i=0;i<term.length;i++) {
+      int state = a.createState();
+      int label = term.bytes[term.offset+i] & 0xff;
+      a.addTransition(lastState, state, label);
      lastState = state;
    }

--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
@ -491,11 +491,50 @@ public class Automaton implements Accountable {
  public void getNextTransition(Transition t) {
    // Make sure there is still a transition left:
    assert (t.transitionUpto+3 - states[2*t.source]) <= 3*states[2*t.source+1];
+
+    // Make sure transitions are in fact sorted:
+    assert transitionSorted(t);
+
    t.dest = transitions[t.transitionUpto++];
    t.min = transitions[t.transitionUpto++];
    t.max = transitions[t.transitionUpto++];
  }

+  private boolean transitionSorted(Transition t) {
+
+    int upto = t.transitionUpto;
+    if (upto == states[2*t.source]) {
+      // Transition isn't initialzed yet (this is the first transition); don't check:
+      return true;
+    }
+
+    int nextDest = transitions[upto];
+    int nextMin = transitions[upto+1];
+    int nextMax = transitions[upto+2];
+    if (nextMin > t.min) {
+      return true;
+    } else if (nextMin < t.min) {
+      return false;
+    }
+
+    // Min is equal, now test max:
+    if (nextMax > t.max) {
+      return true;
+    } else if (nextMax < t.max) {
+      return false;
+    }
+
+    // Max is also equal, now test dest:
+    if (nextDest > t.dest) {
+      return true;
+    } else if (nextDest < t.dest) {
+      return false;
+    }
+
+    // We should never see fully equal transitions here:
+    return false;
+  }
+
  /** Fill the provided {@link Transition} with the index'th
   *  transition leaving the specified state. */
  public void getTransition(int state, int index, Transition t) {
@ -565,7 +604,7 @@ public class Automaton implements Accountable {
      //System.out.println("toDot: state " + state + " has " + numTransitions + " transitions; t.nextTrans=" + t.transitionUpto);
      for(int i=0;i<numTransitions;i++) {
        getNextTransition(t);
-        //System.out.println("  t.nextTrans=" + t.transitionUpto);
+        //System.out.println("  t.nextTrans=" + t.transitionUpto + " t=" + t);
        assert t.max >= t.min;
        b.append("  ");
        b.append(state);
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
@ -28,8 +28,8 @@ public class ByteRunAutomaton extends RunAutomaton {
  }
  
  /** expert: if utf8 is true, the input is already byte-based */
-  public ByteRunAutomaton(Automaton a, boolean utf8, int maxDeterminizedStates) {
-    super(utf8 ? a : new UTF32ToUTF8().convert(a), 256, true, maxDeterminizedStates);
+  public ByteRunAutomaton(Automaton a, boolean isBinary, int maxDeterminizedStates) {
+    super(isBinary ? a : new UTF32ToUTF8().convert(a), 256, true, maxDeterminizedStates);
  }

  /**
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
@ -90,12 +90,41 @@ public class CompiledAutomaton {
   */
  public final Boolean finite;

+  /** Which state, if any, accepts all suffixes, else -1. */
+  public final int sinkState;
+
  /** Create this, passing simplify=true and finite=null, so that we try
   *  to simplify the automaton and determine if it is finite. */
  public CompiledAutomaton(Automaton automaton) {
    this(automaton, null, true);
  }

+  /** Returns sink state, if present, else -1. */
+  private static int findSinkState(Automaton automaton) {
+    int numStates = automaton.getNumStates();
+    Transition t = new Transition();
+    int foundState = -1;
+    for (int s=0;s<numStates;s++) {
+      if (automaton.isAccept(s)) {
+        int count = automaton.initTransition(s, t);
+        boolean isSinkState = false;
+        for(int i=0;i<count;i++) {
+          automaton.getNextTransition(t);
+          if (t.dest == s && t.min == 0 && t.max == 0xff) {
+            isSinkState = true;
+            break;
+          }
+        }
+        if (isSinkState) {
+          foundState = s;
+          break;
+        }
+      }
+    }
+
+    return foundState;
+  }
+
  /** Create this.  If finite is null, we use {@link Operations#isFinite}
   *  to determine whether it is finite.  If simplify is true, we run
   *  possibly expensive operations to determine if the automaton is one
@ -134,6 +163,7 @@ public class CompiledAutomaton {
        runAutomaton = null;
        this.automaton = null;
        this.finite = null;
+        sinkState = -1;
        return;
      }

@ -154,6 +184,7 @@ public class CompiledAutomaton {
        runAutomaton = null;
        this.automaton = null;
        this.finite = null;
+        sinkState = -1;
        return;
      }

@ -174,7 +205,7 @@ public class CompiledAutomaton {
        } else {
          term = new BytesRef(UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length));
        }
-
+        sinkState = -1;
        return;
      }
    }
@ -202,7 +233,8 @@ public class CompiledAutomaton {
    if (this.finite) {
      commonSuffixRef = null;
    } else {
-      // NOTE: this is a very costly operation!  We should test if it's really warranted in practice...
+      // NOTE: this is a very costly operation!  We should test if it's really warranted in practice... we could do a fast match
+      // by looking for a sink state (which means it has no common suffix).  Or maybe we shouldn't do it when simplify is false?:
      BytesRef suffix = Operations.getCommonSuffixBytesRef(binary, maxDeterminizedStates);
      if (suffix.length == 0) {
        commonSuffixRef = null;
@ -215,6 +247,10 @@ public class CompiledAutomaton {
    runAutomaton = new ByteRunAutomaton(binary, true, maxDeterminizedStates);

    this.automaton = runAutomaton.automaton;
+
+    // TODO: this is a bit fragile because if the automaton is not minimized there could be more than 1 sink state but auto-prefix will fail
+    // to run for those:
+    sinkState = findSinkState(this.automaton);
  }

  private Transition transition = new Transition();
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
@ -599,7 +599,7 @@ public class RegExp {
        a = aa;
        break;
      case REGEXP_INTERVAL:
-        a = Automata.makeInterval(min, max, digits);
+        a = Automata.makeDecimalInterval(min, max, digits);
        break;
    }
    return a;
--- a/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
@ -117,8 +117,8 @@ public class TestAutomatonQuery extends LuceneTestCase {
    assertAutomatonHits(2, Automata.makeString("doc"));
    assertAutomatonHits(1, Automata.makeChar('a'));
    assertAutomatonHits(2, Automata.makeCharRange('a', 'b'));
-    assertAutomatonHits(2, Automata.makeInterval(1233, 2346, 0));
-    assertAutomatonHits(1, Automata.makeInterval(0, 2000, 0));
+    assertAutomatonHits(2, Automata.makeDecimalInterval(1233, 2346, 0));
+    assertAutomatonHits(1, Automata.makeDecimalInterval(0, 2000, 0));
    assertAutomatonHits(2, Operations.union(Automata.makeChar('a'),
        Automata.makeChar('b')));
    assertAutomatonHits(0, Operations.intersection(Automata
@ -194,7 +194,6 @@ public class TestAutomatonQuery extends LuceneTestCase {
    Automaton pfx = Automata.makeString("do");
    Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString());
    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
-    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
    assertEquals(3, automatonQueryNrHits(aq));
  }
  
--- a/lucene/core/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
@ -17,16 +17,19 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

+import java.io.IOException;
+
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FilteredTermsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
@ -34,8 +37,6 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;

-import java.io.IOException;
-
 public class TestMultiTermQueryRewrites extends LuceneTestCase {

  static Directory dir, sdir1, sdir2;
@ -152,14 +153,27 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
    final MultiTermQuery mtq = new MultiTermQuery("data") {
      @Override
      protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
-        return new TermRangeTermsEnum(terms.iterator(null), new BytesRef("2"), new BytesRef("7"), true, true) {
+        return new FilteredTermsEnum(terms.iterator(null)) {
+
          final BoostAttribute boostAtt =
            attributes().addAttribute(BoostAttribute.class);
        
          @Override
          protected AcceptStatus accept(BytesRef term) {
            boostAtt.setBoost(Float.parseFloat(term.utf8ToString()));
-            return super.accept(term);
+            if (term.length == 0) {
+              return AcceptStatus.NO;
+            }
+            char c = (char) (term.bytes[term.offset] & 0xff);
+            if (c >= '2') {
+              if (c <= '7') {
+                return AcceptStatus.YES;
+              } else {
+                return AcceptStatus.END;
+              }
+            } else {
+              return AcceptStatus.NO;
+            }
          }
        };
      }
--- a/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java
@ -17,25 +17,32 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

+import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;

+import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.StringHelper;
@ -70,7 +77,6 @@ public class TestPrefixQuery extends LuceneTestCase {
    assertEquals("One in /Computers/Mac", 1, hits.length);

    query = new PrefixQuery(new Term("category", ""));
-    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "category");
    hits = searcher.search(query, 1000).scoreDocs;
    assertEquals("everything", 3, hits.length);
    writer.close();
@ -78,6 +84,92 @@ public class TestPrefixQuery extends LuceneTestCase {
    directory.close();
  }

+  /** Make sure auto prefix terms are used with PrefixQuery. */
+  public void testAutoPrefixTermsKickIn() throws Exception {
+
+    List<String> prefixes = new ArrayList<>();
+    for(int i=1;i<5;i++) {
+      char[] chars = new char[i];
+      Arrays.fill(chars, 'a');
+      prefixes.add(new String(chars));
+    }
+
+    Set<String> randomTerms = new HashSet<>();
+    int numTerms = atLeast(10000);
+    while (randomTerms.size() < numTerms) {
+      for(String prefix : prefixes) {
+        randomTerms.add(prefix + TestUtil.randomRealisticUnicodeString(random()));
+      }
+    }
+
+    int actualCount = 0;
+    for(String term : randomTerms) {
+      if (term.startsWith("aa")) {
+        actualCount++;
+      }
+    }
+
+    //System.out.println("actual count " + actualCount);
+
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+    int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
+    int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
+
+    // As long as this is never > actualCount, aa should always see at least one auto-prefix term:
+    int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, actualCount);
+    int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
+
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
+                                                                            minTermsAutoPrefix, maxTermsAutoPrefix)));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+    for (String term : randomTerms) {
+      Document doc = new Document();
+      doc.add(new StringField("field", term, Field.Store.NO));
+      w.addDocument(doc);
+    }
+
+    w.forceMerge(1);
+    IndexReader r = w.getReader();
+    final Terms terms = MultiFields.getTerms(r, "field");
+    IndexSearcher s = new IndexSearcher(r);
+    final int finalActualCount = actualCount;
+    PrefixQuery q = new PrefixQuery(new Term("field", "aa")) {
+      public PrefixQuery checkTerms() throws IOException {
+        TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
+        int count = 0;
+        while (termsEnum.next() != null) {
+          //System.out.println("got term: " + termsEnum.term().utf8ToString());
+          count++;
+        }
+
+        // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
+        assertTrue(count < finalActualCount);
+
+        return this;
+      }
+    }.checkTerms();
+
+    int x = BooleanQuery.getMaxClauseCount();
+    try {
+      BooleanQuery.setMaxClauseCount(randomTerms.size());
+      if (random().nextBoolean()) {
+        q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
+      } else if (random().nextBoolean()) {
+        q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
+      }
+
+      assertEquals(actualCount, s.search(q, 1).totalHits);
+    } finally {
+      BooleanQuery.setMaxClauseCount(x);
+    }
+
+    r.close();
+    w.close();
+    dir.close();
+  }
+
  public void testMatchAll() throws Exception {
    Directory directory = newDirectory();

@ -92,8 +184,6 @@ public class TestPrefixQuery extends LuceneTestCase {
    IndexSearcher searcher = newSearcher(reader);

    assertEquals(1, searcher.search(query, 1000).totalHits);
-
-    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
    writer.close();
    reader.close();
    directory.close();
--- a/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
@ -18,20 +18,32 @@ package org.apache.lucene.search;
 */

 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Set;

 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;


 public class TestTermRangeQuery extends LuceneTestCase {
@ -104,19 +116,24 @@ public class TestTermRangeQuery extends LuceneTestCase {
    initializeIndex(new String[]{"A", "B", "C", "D"});
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = newSearcher(reader);
+
    TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "content");
+    // Should return the unfiltered TermsEnum:
    assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
    assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
-    query = new TermRangeQuery("content", null, null, false, false);
+
+    query = TermRangeQuery.newStringRange("content", "", null, true, true);
+    // Should return the unfiltered TermsEnum:
    assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
    assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
+
    query = TermRangeQuery.newStringRange("content", "", null, true, false);
    assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
    assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
-    // and now anothe one
-    query = TermRangeQuery.newStringRange("content", "B", null, true, false);
-    assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
+
+    // and now another one
+    query = TermRangeQuery.newStringRange("content", "B", null, true, true);
    assertEquals(3, searcher.search(query, 1000).scoreDocs.length);
    reader.close();
  }
@ -336,4 +353,127 @@ public class TestTermRangeQuery extends LuceneTestCase {
    //assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
     reader.close();
  }
+
+  /** Make sure auto prefix terms are used with TermRangeQuery */
+  public void testAutoPrefixTermsKickIn() throws Exception {
+
+    List<String> prefixes = new ArrayList<>();
+    for(int i=1;i<5;i++) {
+      char[] chars = new char[i];
+      Arrays.fill(chars, 'a');
+      prefixes.add(new String(chars));
+    }
+
+    Set<String> randomTerms = new HashSet<>();
+    int numTerms = atLeast(10000);
+    while (randomTerms.size() < numTerms) {
+      for(String prefix : prefixes) {
+        randomTerms.add(prefix + TestUtil.randomSimpleString(random()));
+      }
+    }
+
+    // We make term range aa<start> - aa<end>
+    char start;
+    char end;
+
+    int actualCount;
+    boolean startInclusive = random().nextBoolean();
+    boolean endInclusive = random().nextBoolean();
+    String startTerm;
+    String endTerm;
+
+    while (true) {
+      start = (char) TestUtil.nextInt(random(), 'a', 'm');
+      end = (char) TestUtil.nextInt(random(), start+1, 'z');
+
+      actualCount = 0;
+
+      startTerm = "aa" + start;
+      endTerm = "aa" + end;
+
+      for(String term : randomTerms) {
+        int cmpStart = startTerm.compareTo(term);
+        int cmpEnd = endTerm.compareTo(term);
+        if ((cmpStart < 0 || (startInclusive && cmpStart == 0)) &&
+            (cmpEnd > 0 || (endInclusive && cmpEnd == 0))) {
+          actualCount++;
+        }
+      }
+
+      if (actualCount > 2000) {
+        break;
+      }
+    }
+
+    //System.out.println("start " + startTerm + " inclusive? " + startInclusive);
+    //System.out.println("end " + endTerm + " inclusive? " + endInclusive);
+    //System.out.println("actual count " + actualCount);
+
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+    int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
+    int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
+
+    int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
+    int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
+
+    //System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix);
+    //System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix);
+
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
+                                                                            minTermsAutoPrefix, maxTermsAutoPrefix)));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+    //System.out.println("TEST: index terms");
+    for (String term : randomTerms) {
+      Document doc = new Document();
+      doc.add(new StringField("field", term, Field.Store.NO));
+      w.addDocument(doc);
+      //System.out.println("  " + term);
+    }
+
+    //System.out.println("TEST: now force merge");
+    w.forceMerge(1);
+    IndexReader r = w.getReader();
+    final Terms terms = MultiFields.getTerms(r, "field");
+    IndexSearcher s = new IndexSearcher(r);
+    final int finalActualCount = actualCount;
+    //System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive);
+    TermRangeQuery q = new TermRangeQuery("field", new BytesRef(startTerm), new BytesRef(endTerm), startInclusive, endInclusive) {
+      public TermRangeQuery checkTerms() throws IOException {
+        TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
+        int count = 0;
+        while (termsEnum.next() != null) {
+          //System.out.println("got term: " + termsEnum.term().utf8ToString());
+          count++;
+        }
+        //System.out.println("count " + count + " vs finalActualCount=" + finalActualCount);
+
+        // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
+        assertTrue(count < finalActualCount);
+
+        return this;
+      }
+    }.checkTerms();
+
+    if (random().nextBoolean()) {
+      q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
+    } else if (random().nextBoolean()) {
+      q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
+    }
+
+    assertEquals(actualCount, s.search(q, 1).totalHits);
+
+    // Test when min == max:
+    List<String> randomTermsList = new ArrayList<>(randomTerms);
+    for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+      String term = randomTermsList.get(random().nextInt(randomTermsList.size()));
+      q = new TermRangeQuery("field", new BytesRef(term), new BytesRef(term), true, true);
+      assertEquals(1, s.search(q, 1).totalHits);
+    }
+
+    r.close();
+    w.close();
+    dir.close();
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
@ -34,13 +34,7 @@ import java.io.IOException;
 /**
 * TestWildcard tests the '*' and '?' wildcard characters.
 */
-public class TestWildcard
-    extends LuceneTestCase {
-  
-  @Override
-  public void setUp() throws Exception {
-    super.setUp();
-  }
+public class TestWildcard extends LuceneTestCase {
  
  public void testEquals() {
    WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a"));
@ -126,10 +120,10 @@ public class TestWildcard

    MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
    assertMatches(searcher, wq, 2);
-    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
    
    wq = new WildcardQuery(new Term("field", "*"));
    assertMatches(searcher, wq, 2);
+    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
    assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
    reader.close();
    indexStore.close();
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
@ -232,7 +232,7 @@ public class TestAutomaton extends LuceneTestCase {
  }

  public void testInterval() throws Exception {
-    Automaton a = Operations.determinize(Automata.makeInterval(17, 100, 3),
+    Automaton a = Operations.determinize(Automata.makeDecimalInterval(17, 100, 3),
      DEFAULT_MAX_DETERMINIZED_STATES);
    assertFalse(Operations.run(a, ""));
    assertTrue(Operations.run(a, "017"));
@ -431,7 +431,7 @@ public class TestAutomaton extends LuceneTestCase {
  }

  public void testOneInterval() throws Exception {
-    Automaton a = Automata.makeInterval(999, 1032, 0);
+    Automaton a = Automata.makeDecimalInterval(999, 1032, 0);
    a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
    assertTrue(Operations.run(a, "0999"));
    assertTrue(Operations.run(a, "00999"));
@ -439,7 +439,7 @@ public class TestAutomaton extends LuceneTestCase {
  }

  public void testAnotherInterval() throws Exception {
-    Automaton a = Automata.makeInterval(1, 2, 0);
+    Automaton a = Automata.makeDecimalInterval(1, 2, 0);
    a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
    assertTrue(Operations.run(a, "01"));
  }
@ -462,7 +462,7 @@ public class TestAutomaton extends LuceneTestCase {
      }
      String prefix = b.toString();

-      Automaton a = Operations.determinize(Automata.makeInterval(min, max, digits),
+      Automaton a = Operations.determinize(Automata.makeDecimalInterval(min, max, digits),
        DEFAULT_MAX_DETERMINIZED_STATES);
      if (random().nextBoolean()) {
        a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
@ -942,7 +942,7 @@ public class TestAutomaton extends LuceneTestCase {
          if (VERBOSE) {
            System.out.println("  op=union interval min=" + min + " max=" + max + " digits=" + digits);
          }
-          a = Operations.union(a, Automata.makeInterval(min, max, digits));
+          a = Operations.union(a, Automata.makeDecimalInterval(min, max, digits));
          StringBuilder b = new StringBuilder();
          for(int i=0;i<digits;i++) {
            b.append('0');
@ -1105,6 +1105,138 @@ public class TestAutomaton extends LuceneTestCase {
    }
  }

+  public void testMakeBinaryIntervalRandom() throws Exception {
+    int iters = atLeast(100);
+    for(int iter=0;iter<iters;iter++) {
+      BytesRef minTerm = TestUtil.randomBinaryTerm(random());
+      boolean minInclusive = random().nextBoolean();
+      BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
+      boolean maxInclusive = random().nextBoolean();
+
+      if (VERBOSE) {
+        System.out.println("TEST: iter=" + iter + " minTerm=" + minTerm + " minInclusive=" + minInclusive + " maxTerm=" + maxTerm + " maxInclusive=" + maxInclusive);
+      }
+
+      Automaton a = Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);
+
+      Automaton minA = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
+      if (minA.getNumStates() != a.getNumStates()) {
+        assertTrue(minA.getNumStates() < a.getNumStates());
+        System.out.println("Original was not minimal:");
+        System.out.println("Original:\n" + a.toDot());
+        System.out.println("Minimized:\n" + minA.toDot());
+        fail("auotmaton was not minimal");
+      }
+
+      if (VERBOSE) {
+        System.out.println(a.toDot());
+      }
+
+      for(int iter2=0;iter2<500;iter2++) {
+        BytesRef term = TestUtil.randomBinaryTerm(random());
+        int minCmp = minTerm.compareTo(term);
+        int maxCmp = maxTerm.compareTo(term);
+
+        boolean expected;
+        if (minCmp > 0 || maxCmp < 0) {
+          expected = false;
+        } else if (minCmp == 0 && maxCmp == 0) {
+          expected = minInclusive && maxInclusive;
+        } else if (minCmp == 0) {
+          expected = minInclusive;
+        } else if (maxCmp == 0) {
+          expected = maxInclusive;
+        } else {
+          expected = true;
+        }
+
+        if (VERBOSE) {
+          System.out.println("  check term=" + term + " expected=" + expected);
+        }
+        IntsRefBuilder intsBuilder = new IntsRefBuilder();
+        Util.toIntsRef(term, intsBuilder);
+        assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
+      }
+    }
+  }
+
+  private static IntsRef intsRef(String s) {
+    IntsRefBuilder intsBuilder = new IntsRefBuilder();
+    Util.toIntsRef(new BytesRef(s), intsBuilder);
+    return intsBuilder.toIntsRef();
+  }
+
+  public void testMakeBinaryIntervalBasic() throws Exception {
+    Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("foo"), true);
+    assertTrue(Operations.run(a, intsRef("bar")));
+    assertTrue(Operations.run(a, intsRef("foo")));
+    assertTrue(Operations.run(a, intsRef("beep")));
+    assertFalse(Operations.run(a, intsRef("baq")));
+    assertTrue(Operations.run(a, intsRef("bara")));
+  }
+
+  public void testMakeBinaryIntervalEqual() throws Exception {
+    Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("bar"), true);
+    assertTrue(Operations.run(a, intsRef("bar")));
+    assertTrue(Operations.isFinite(a));
+    assertEquals(1, Operations.getFiniteStrings(a, 10).size());
+  }
+
+  public void testMakeBinaryIntervalCommonPrefix() throws Exception {
+    Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("barfoo"), true);
+    assertFalse(Operations.run(a, intsRef("bam")));
+    assertTrue(Operations.run(a, intsRef("bar")));
+    assertTrue(Operations.run(a, intsRef("bara")));
+    assertTrue(Operations.run(a, intsRef("barf")));
+    assertTrue(Operations.run(a, intsRef("barfo")));
+    assertTrue(Operations.run(a, intsRef("barfoo")));
+    assertTrue(Operations.run(a, intsRef("barfonz")));
+    assertFalse(Operations.run(a, intsRef("barfop")));
+    assertFalse(Operations.run(a, intsRef("barfoop")));
+  }
+
+  public void testMakeBinaryIntervalOpenMax() throws Exception {
+    Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, null, true);
+    assertFalse(Operations.run(a, intsRef("bam")));
+    assertTrue(Operations.run(a, intsRef("bar")));
+    assertTrue(Operations.run(a, intsRef("bara")));
+    assertTrue(Operations.run(a, intsRef("barf")));
+    assertTrue(Operations.run(a, intsRef("barfo")));
+    assertTrue(Operations.run(a, intsRef("barfoo")));
+    assertTrue(Operations.run(a, intsRef("barfonz")));
+    assertTrue(Operations.run(a, intsRef("barfop")));
+    assertTrue(Operations.run(a, intsRef("barfoop")));
+    assertTrue(Operations.run(a, intsRef("zzz")));
+  }
+
+  public void testMakeBinaryIntervalOpenMin() throws Exception {
+    Automaton a = Automata.makeBinaryInterval(null, true, new BytesRef("foo"), true);
+    assertFalse(Operations.run(a, intsRef("foz")));
+    assertFalse(Operations.run(a, intsRef("zzz")));
+    assertTrue(Operations.run(a, intsRef("foo")));
+    assertTrue(Operations.run(a, intsRef("")));
+    assertTrue(Operations.run(a, intsRef("a")));
+    assertTrue(Operations.run(a, intsRef("aaa")));
+    assertTrue(Operations.run(a, intsRef("bz")));
+  }
+
+  public void testMakeBinaryIntervalOpenBoth() throws Exception {
+    Automaton a = Automata.makeBinaryInterval(null, true, null, true);
+    assertTrue(Operations.run(a, intsRef("foz")));
+    assertTrue(Operations.run(a, intsRef("zzz")));
+    assertTrue(Operations.run(a, intsRef("foo")));
+    assertTrue(Operations.run(a, intsRef("")));
+    assertTrue(Operations.run(a, intsRef("a")));
+    assertTrue(Operations.run(a, intsRef("aaa")));
+    assertTrue(Operations.run(a, intsRef("bz")));
+  }
+
+  public void testAcceptAllEmptyStringMin() throws Exception {
+    Automaton a = Automata.makeBinaryInterval(new BytesRef(), true, null, true);
+    System.out.println("HERE: " + a.toDot());
+    assertTrue(Operations.sameLanguage(Automata.makeAnyBinary(), a));
+  }
+
  private static IntsRef toIntsRef(String s) {
    IntsRefBuilder b = new IntsRefBuilder();
    for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
--- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
@ -258,14 +258,14 @@ public class AssertingLeafReader extends FilterLeafReader {
    public TermState termState() throws IOException {
      assertThread("Terms enums", creationThread);
      assert state == State.POSITIONED : "termState() called on unpositioned TermsEnum";
-      return super.termState();
+      return in.termState();
    }

    @Override
    public void seekExact(BytesRef term, TermState state) throws IOException {
      assertThread("Terms enums", creationThread);
      assert term.isValid();
-      super.seekExact(term, state);
+      in.seekExact(term, state);
      this.state = State.POSITIONED;
    }

--- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
@ -831,6 +831,14 @@ public final class TestUtil {
    return new String(buffer, 0, i);
  }

+  /** Returns a random binary term. */
+  public static BytesRef randomBinaryTerm(Random r) {
+    int length = r.nextInt(15);
+    BytesRef b = new BytesRef(length);
+    r.nextBytes(b.bytes);
+    b.length = length;
+    return b;
+  }
  
  /** Return a Codec that can read any of the
   *  default codecs and formats, but always writes in the specified
@ -858,7 +866,7 @@ public final class TestUtil {
    // (and maybe their params, too) to infostream on flush and merge.
    // otherwise in a real debugging situation we won't know whats going on!
    if (LuceneTestCase.VERBOSE) {
-      System.out.println("forcing docvalues format to:" + format);
+      System.out.println("TestUtil: forcing docvalues format to:" + format);
    }
    return new AssertingCodec() {
      @Override
@ -1285,6 +1293,24 @@ public final class TestUtil {
    }
  }

+  /** For debugging: tries to include br.utf8ToString(), but if that
+   *  fails (because it's not valid utf8, which is fine!), just
+   *  use ordinary toString. */
+  public static String bytesRefToString(BytesRef br) {
+    if (br == null) {
+      return "(null)";
+    } else {
+      try {
+        return br.utf8ToString() + " " + br.toString();
+      } catch (IllegalArgumentException t) {
+        // If BytesRef isn't actually UTF8, or it's eg a
+        // prefix of UTF8 that ends mid-unicode-char, we
+        // fallback to hex:
+        return br.toString();
+      }
+    }
+  }
+  
  /** Returns a copy of directory, entirely in RAM */
  public static RAMDirectory ramCopyOf(Directory dir) throws IOException {
    RAMDirectory ram = new RAMDirectory();