LUCENE-6367: PrefixQuery now subclasses AutomatonQuery

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1669522 13f79535-47bb-0310-9956-ffa450edef68
2015-03-27 08:22:54 +00:00 · 2015-03-27 08:22:54 +00:00 · 6a6c729920
parent ef0209189e
commit 6a6c729920
15 changed files with 409 additions and 137 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -244,6 +244,11 @@ Changes in Runtime Behavior
 * LUCENE-6298: SimpleQueryParser returns an empty query rather than
  null, if e.g. the terms were all stopwords. (Lee Hinman via Robert Muir)

+* LUCENE-6367: PrefixQuery now subclasses AutomatonQuery, removing the
+  specialized PrefixTermsEnum.  PrefixQuery now operates in binary
+  term space, meaning any binary term (not just valid UTF-8 terms)
+  are accepted.  (Robert Muir, Mike McCandless)
+
 ======================= Lucene 5.0.0 =======================

 New Features
--- a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
@ -78,10 +78,28 @@ public class AutomatonQuery extends MultiTermQuery {
   *   space but can process more complex automata.
   */
  public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates) {
+    this(term, automaton, maxDeterminizedStates, false);
+  }
+
+  /**
+   * Create a new AutomatonQuery from an {@link Automaton}.
+   * 
+   * @param term Term containing field and possibly some pattern structure. The
+   *        term text is ignored.
+   * @param automaton Automaton to run, terms that are accepted are considered a
+   *        match.
+   * @param maxDeterminizedStates maximum number of states in the resulting
+   *   automata.  If the automata would need more than this many states
+   *   TooComplextToDeterminizeException is thrown.  Higher number require more
+   *   space but can process more complex automata.
+   * @param isBinary if true, this automaton is already binary and
+   *   will not go through the UTF32ToUTF8 conversion
+   */
+  public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) {
    super(term.field());
    this.term = term;
    this.automaton = automaton;
-    this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates);
+    this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
  }

  @Override
--- a/lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java
@ -19,11 +19,13 @@ package org.apache.lucene.search;

 import java.io.IOException;

-import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.automaton.Automaton;

 /** A Query that matches documents containing terms with a specified prefix. A PrefixQuery
 * is built by QueryParser for input like <code>app*</code>.
@ -31,29 +33,38 @@ import org.apache.lucene.util.ToStringUtils;
 * <p>This query uses the {@link
 * MultiTermQuery#CONSTANT_SCORE_REWRITE}
 * rewrite method. */
-public class PrefixQuery extends MultiTermQuery {
-  private Term prefix;
+public class PrefixQuery extends AutomatonQuery {

  /** Constructs a query for terms starting with <code>prefix</code>. */
  public PrefixQuery(Term prefix) {
-    super(prefix.field());
-    this.prefix = prefix;
+    // It's OK to pass unlimited maxDeterminizedStates: the automaton is born small and determinized:
+    super(prefix, toAutomaton(prefix.bytes()), Integer.MAX_VALUE, true);
+    if (prefix == null) {
+      throw new NullPointerException("prefix cannot be null");
+    }
+  }
+
+  /** Build an automaton accepting all terms with the specified prefix. */
+  public static Automaton toAutomaton(BytesRef prefix) {
+    Automaton automaton = new Automaton();
+    int lastState = automaton.createState();
+    for(int i=0;i<prefix.length;i++) {
+      int state = automaton.createState();
+      automaton.addTransition(lastState, state, prefix.bytes[prefix.offset+i]&0xff);
+      lastState = state;
+    }
+    automaton.setAccept(lastState, true);
+    automaton.addTransition(lastState, lastState, 0, 255);
+    automaton.finishState();
+    assert automaton.isDeterministic();
+    return automaton;
  }

  /** Returns the prefix of this query. */
-  public Term getPrefix() { return prefix; }
-  
-  @Override  
-  protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
-    TermsEnum tenum = terms.iterator(null);
-    
-    if (prefix.bytes().length == 0) {
-      // no prefix -- match all terms for this field:
-      return tenum;
-    }
-    return new PrefixTermsEnum(tenum, prefix.bytes());
+  public Term getPrefix() {
+    return term;
  }
-
+  
  /** Prints a user-readable version of this query. */
  @Override
  public String toString(String field) {
@ -62,7 +73,7 @@ public class PrefixQuery extends MultiTermQuery {
      buffer.append(getField());
      buffer.append(":");
    }
-    buffer.append(prefix.text());
+    buffer.append(term.text());
    buffer.append('*');
    buffer.append(ToStringUtils.boost(getBoost()));
    return buffer.toString();
@ -72,25 +83,23 @@ public class PrefixQuery extends MultiTermQuery {
  public int hashCode() {
    final int prime = 31;
    int result = super.hashCode();
-    result = prime * result + ((prefix == null) ? 0 : prefix.hashCode());
+    result = prime * result + term.hashCode();
    return result;
  }

  @Override
  public boolean equals(Object obj) {
-    if (this == obj)
+    if (this == obj) {
      return true;
-    if (!super.equals(obj))
-      return false;
-    if (getClass() != obj.getClass())
+    }
+    if (!super.equals(obj)) {
      return false;
+    }
+    // super.equals() ensures we are the same class
    PrefixQuery other = (PrefixQuery) obj;
-    if (prefix == null) {
-      if (other.prefix != null)
-        return false;
-    } else if (!prefix.equals(other.prefix))
+    if (!term.equals(other.term)) {
      return false;
+    }
    return true;
  }
-
 }
--- a/lucene/core/src/java/org/apache/lucene/search/PrefixTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PrefixTermsEnum.java
@ -1,49 +0,0 @@
-package org.apache.lucene.search;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.FilteredTermsEnum;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.StringHelper;
-
-/**
- * Subclass of FilteredTermEnum for enumerating all terms that match the
- * specified prefix filter term.
- * <p>Term enumerations are always ordered by
- * {@link BytesRef#compareTo}.  Each term in the enumeration is
- * greater than all that precede it.</p>
- */
-public class PrefixTermsEnum extends FilteredTermsEnum {
-
-  private final BytesRef prefixRef;
-
-  public PrefixTermsEnum(TermsEnum tenum, BytesRef prefixText) {
-    super(tenum);
-    setInitialSeekTerm(this.prefixRef = prefixText);
-  }
-
-  @Override
-  protected AcceptStatus accept(BytesRef term) {
-    if (StringHelper.startsWith(term, prefixRef)) {
-      return AcceptStatus.YES;
-    } else {
-      return AcceptStatus.END;
-    }
-  }
-}
--- a/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
+++ b/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
@ -393,4 +393,20 @@ public abstract class StringHelper {
      return sb.toString();
    }
  }
+  
+  /** Just converts each int in the incoming {@link IntsRef} to each byte
+   *  in the returned {@link BytesRef}, throwing {@code IllegalArgumentException}
+   *  if any int value is out of bounds for a byte. */
+  public static BytesRef intsRefToBytesRef(IntsRef ints) {
+    byte[] bytes = new byte[ints.length];
+    for(int i=0;i<ints.length;i++) {
+      int x = ints.ints[ints.offset+i];
+      if (x < 0 || x > 255) {
+        throw new IllegalArgumentException("int at pos=" + i + " with value=" + x + " is out-of-bounds for byte");
+      }
+      bytes[i] = (byte) x;
+    }
+
+    return new BytesRef(bytes);
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
@ -347,6 +347,7 @@ public class Automaton implements Accountable {
  
  /** How many transitions this state has. */
  public int getNumTransitions(int state) {
+    assert state >= 0;
    int count = states[2*state+1];
    if (count == -1) {
      return 0;
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
@ -24,9 +24,11 @@ import java.util.List;
 import org.apache.lucene.index.SingleTermsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.PrefixTermsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.UnicodeUtil;

 /**
 * Immutable class holding compiled details for a given
@ -47,8 +49,6 @@ public class CompiledAutomaton {
    ALL, 
    /** Automaton that accepts only a single fixed string. */
    SINGLE, 
-    /** Automaton that matches all Strings with a constant prefix. */
-    PREFIX, 
    /** Catch-all for any other automata. */
    NORMAL
  };
@ -57,8 +57,7 @@ public class CompiledAutomaton {
  public final AUTOMATON_TYPE type;

  /** 
-   * For {@link AUTOMATON_TYPE#PREFIX}, this is the prefix term; 
-   * for {@link AUTOMATON_TYPE#SINGLE} this is the singleton term.
+   * For {@link AUTOMATON_TYPE#SINGLE} this is the singleton term.
   */
  public final BytesRef term;

@ -101,7 +100,7 @@ public class CompiledAutomaton {
   *  possibly expensive operations to determine if the automaton is one
   *  the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. */
  public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) {
-    this(automaton, finite, simplify, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
+    this(automaton, finite, simplify, Operations.DEFAULT_MAX_DETERMINIZED_STATES, false);
  }


@ -114,7 +113,7 @@ public class CompiledAutomaton {
   *  TooComplexToDeterminizeException.
   */
  public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify,
-      int maxDeterminizedStates) {
+                           int maxDeterminizedStates, boolean isBinary) {
    if (automaton.getNumStates() == 0) {
      automaton = new Automaton();
      automaton.createState();
@ -135,8 +134,18 @@ public class CompiledAutomaton {
        this.automaton = null;
        this.finite = null;
        return;
+      }
+
+      boolean isTotal;
+
      // NOTE: only approximate, because automaton may not be minimal:
-      } else if (Operations.isTotal(automaton)) {
+      if (isBinary) {
+        isTotal = Operations.isTotal(automaton, 0, 0xff);
+      } else {
+        isTotal = Operations.isTotal(automaton);
+      }
+
+      if (isTotal) {
        // matches all possible strings
        type = AUTOMATON_TYPE.ALL;
        term = null;
@ -145,43 +154,27 @@ public class CompiledAutomaton {
        this.automaton = null;
        this.finite = null;
        return;
-      } else {
+      }

-        automaton = Operations.determinize(automaton, maxDeterminizedStates);
+      automaton = Operations.determinize(automaton, maxDeterminizedStates);

-        final String commonPrefix = Operations.getCommonPrefix(automaton);
-        final String singleton;
+      IntsRef singleton = Operations.getSingleton(automaton);

-        if (commonPrefix.length() > 0 && Operations.sameLanguage(automaton, Automata.makeString(commonPrefix))) {
-          singleton = commonPrefix;
+      if (singleton != null) {
+        // matches a fixed string
+        type = AUTOMATON_TYPE.SINGLE;
+        commonSuffixRef = null;
+        runAutomaton = null;
+        this.automaton = null;
+        this.finite = null;
+
+        if (isBinary) {
+          term = StringHelper.intsRefToBytesRef(singleton);
        } else {
-          singleton = null;
+          term = new BytesRef(UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length));
        }

-        if (singleton != null) {
-          // matches a fixed string
-          type = AUTOMATON_TYPE.SINGLE;
-          term = new BytesRef(singleton);
-          commonSuffixRef = null;
-          runAutomaton = null;
-          this.automaton = null;
-          this.finite = null;
-          return;
-        } else if (commonPrefix.length() > 0) {
-          Automaton other = Operations.concatenate(Automata.makeString(commonPrefix), Automata.makeAnyString());
-          other = Operations.determinize(other, maxDeterminizedStates);
-          assert Operations.hasDeadStates(other) == false;
-          if (Operations.sameLanguage(automaton, other)) {
-            // matches a constant prefix
-            type = AUTOMATON_TYPE.PREFIX;
-            term = new BytesRef(commonPrefix);
-            commonSuffixRef = null;
-            runAutomaton = null;
-            this.automaton = null;
-            this.finite = null;
-            return;
-          }
-        }
+        return;
      }
    }

@ -194,14 +187,26 @@ public class CompiledAutomaton {
      this.finite = finite;
    }

-    Automaton utf8 = new UTF32ToUTF8().convert(automaton);
+    Automaton binary;
+    if (isBinary) {
+      // Caller already built binary automaton themselves, e.g. PrefixQuery
+      // does this since it can be provided with a binary (not necessarily
+      // UTF8!) term:
+      binary = automaton;
+    } else {
+      // Incoming automaton is unicode, and we must convert to UTF8 to match what's in the index:
+      binary = new UTF32ToUTF8().convert(automaton);
+    }
+
    if (this.finite) {
      commonSuffixRef = null;
    } else {
      // NOTE: this is a very costly operation!  We should test if it's really warranted in practice...
-      commonSuffixRef = Operations.getCommonSuffixBytesRef(utf8, maxDeterminizedStates);
+      commonSuffixRef = Operations.getCommonSuffixBytesRef(binary, maxDeterminizedStates);
    }
-    runAutomaton = new ByteRunAutomaton(utf8, true, maxDeterminizedStates);
+
+    // This will determinize the binary automaton for us:
+    runAutomaton = new ByteRunAutomaton(binary, true, maxDeterminizedStates);

    this.automaton = runAutomaton.automaton;
  }
@ -285,10 +290,6 @@ public class CompiledAutomaton {
      return terms.iterator(null);
    case SINGLE:
      return new SingleTermsEnum(terms.iterator(null), term);
-    case PREFIX:
-      // TODO: this is very likely faster than .intersect,
-      // but we should test and maybe cutover
-      return new PrefixTermsEnum(terms.iterator(null), term);
    case NORMAL:
      return terms.intersect(this, null);
    default:
@ -410,7 +411,7 @@ public class CompiledAutomaton {
    if (getClass() != obj.getClass()) return false;
    CompiledAutomaton other = (CompiledAutomaton) obj;
    if (type != other.type) return false;
-    if (type == AUTOMATON_TYPE.SINGLE || type == AUTOMATON_TYPE.PREFIX) {
+    if (type == AUTOMATON_TYPE.SINGLE) {
      if (!term.equals(other.term)) return false;
    } else if (type == AUTOMATON_TYPE.NORMAL) {
      if (!runAutomaton.equals(other.runAutomaton)) return false;
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
@ -834,11 +834,20 @@ final public class Operations {
   * Returns true if the given automaton accepts all strings.  The automaton must be minimized.
   */
  public static boolean isTotal(Automaton a) {
+    return isTotal(a, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+  }
+
+  /**
+   * Returns true if the given automaton accepts all strings for the specified min/max
+   * range of the alphabet.  The automaton must be minimized.
+   */
+  public static boolean isTotal(Automaton a, int minAlphabet, int maxAlphabet) {
    if (a.isAccept(0) && a.getNumTransitions(0) == 1) {
      Transition t = new Transition();
      a.getTransition(0, 0, t);
-      return t.dest == 0 && t.min == Character.MIN_CODE_POINT
-          && t.max == Character.MAX_CODE_POINT;
+      return t.dest == 0
+        && t.min == minAlphabet
+        && t.max == maxAlphabet;
    }
    return false;
  }
@ -1112,6 +1121,37 @@ final public class Operations {
    return builder.get();
  }

+  /** If this automaton accepts a single input, return it.  Else, return null.
+   *  The automaton must be deterministic. */
+  public static IntsRef getSingleton(Automaton a) {
+    if (a.isDeterministic() == false) {
+      throw new IllegalArgumentException("input automaton must be deterministic");
+    }
+    IntsRefBuilder builder = new IntsRefBuilder();
+    HashSet<Integer> visited = new HashSet<>();
+    int s = 0;
+    boolean done;
+    Transition t = new Transition();
+    while (true) {
+      visited.add(s);
+      if (a.isAccept(s) == false) {
+        if (a.getNumTransitions(s) == 1) {
+          a.getTransition(s, 0, t);
+          if (t.min == t.max && !visited.contains(t.dest)) {
+            builder.append(t.min);
+            s = t.dest;
+            continue;
+          }
+        }
+      } else if (a.getNumTransitions(s) == 0) {
+        return builder.get();
+      }
+
+      // Automaton accepts more than one string:
+      return null;
+    }
+  }
+
  /**
   * Returns the longest BytesRef that is a suffix of all accepted strings.
   * Worst case complexity: exponential in number of states (this calls
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@ -260,7 +260,7 @@ public class TestTermsEnum extends LuceneTestCase {
        a = Automata.makeStringUnion(sortedAcceptTerms);
      }
      
-      final CompiledAutomaton c = new CompiledAutomaton(a, true, false, 1000000);
+      final CompiledAutomaton c = new CompiledAutomaton(a, true, false, 1000000, false);

      final BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.size()];
      final Set<BytesRef> acceptTermsSet = new HashSet<>();
--- a/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
@ -195,7 +195,6 @@ public class TestAutomatonQuery extends LuceneTestCase {
    Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString());
    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
-    assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum);
    assertEquals(3, automatonQueryNrHits(aq));
  }
  
--- a/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java
@ -17,15 +17,29 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
-import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.TestUtil;

 /**
 * Tests {@link PrefixQuery} class.
@ -57,11 +71,145 @@ public class TestPrefixQuery extends LuceneTestCase {

    query = new PrefixQuery(new Term("category", ""));
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "category");
-    assertFalse(query.getTermsEnum(terms) instanceof PrefixTermsEnum);
    hits = searcher.search(query, 1000).scoreDocs;
    assertEquals("everything", 3, hits.length);
    writer.close();
    reader.close();
    directory.close();
  }
+
+  public void testMatchAll() throws Exception {
+    Directory directory = newDirectory();
+
+    RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
+    Document doc = new Document();
+    doc.add(newStringField("field", "field", Field.Store.YES));
+    writer.addDocument(doc);
+
+    IndexReader reader = writer.getReader();
+
+    PrefixQuery query = new PrefixQuery(new Term("field", ""));
+    IndexSearcher searcher = newSearcher(reader);
+
+    assertEquals(1, searcher.search(query, 1000).totalHits);
+
+    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
+    writer.close();
+    reader.close();
+    directory.close();
+  }
+
+  static final class BinaryTokenStream extends TokenStream {
+    private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
+    private boolean available = true;
+  
+    public BinaryTokenStream(BytesRef bytes) {
+      bytesAtt.setBytesRef(bytes);
+    }
+  
+    @Override
+    public boolean incrementToken() {
+      if (available) {
+        clearAttributes();
+        available = false;
+        return true;
+      }
+      return false;
+    }
+  
+    @Override
+    public void reset() {
+      available = true;
+    }
+  
+    public interface ByteTermAttribute extends TermToBytesRefAttribute {
+      public void setBytesRef(BytesRef bytes);
+    }
+  
+    public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
+      private BytesRef bytes;
+    
+      @Override
+      public void fillBytesRef() {
+       // no-op: the bytes was already filled by our owner's incrementToken
+      }
+    
+      @Override
+      public BytesRef getBytesRef() {
+        return bytes;
+      }
+
+      @Override
+      public void setBytesRef(BytesRef bytes) {
+        this.bytes = bytes;
+      }
+   
+      @Override
+      public void clear() {}
+    
+      @Override
+      public void copyTo(AttributeImpl target) {
+        ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
+        other.bytes = bytes;
+      }
+    }
+  }
+
+  /** Basically a StringField that accepts binary term. */
+  private static class BinaryField extends Field {
+
+    final static FieldType TYPE;
+    static {
+      TYPE = new FieldType(StringField.TYPE_NOT_STORED);
+      // Necessary so our custom tokenStream is used by Field.tokenStream:
+      TYPE.setTokenized(true);
+      TYPE.freeze();
+    }
+
+    public BinaryField(String name, BytesRef value) {
+      super(name, new BinaryTokenStream(value), TYPE);
+    }
+  }
+
+  public void testRandomBinaryPrefix() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+    int numTerms = atLeast(10000);
+    Set<BytesRef> terms = new HashSet<>();
+    while (terms.size() < numTerms) {
+      byte[] bytes = new byte[TestUtil.nextInt(random(), 1, 10)];
+      random().nextBytes(bytes);
+      terms.add(new BytesRef(bytes));
+    }
+
+    List<BytesRef> termsList = new ArrayList<>(terms);  
+    Collections.shuffle(termsList, random());
+    for(BytesRef term : termsList) {
+      Document doc = new Document();
+      doc.add(new BinaryField("field", term));
+      w.addDocument(doc);
+    }
+
+    IndexReader r = w.getReader();
+    IndexSearcher s = newSearcher(r);
+
+    int iters = atLeast(100);   
+    for(int iter=0;iter<iters;iter++) {
+      byte[] bytes = new byte[random().nextInt(3)];
+      random().nextBytes(bytes);
+      BytesRef prefix = new BytesRef(bytes);
+      PrefixQuery q = new PrefixQuery(new Term("field", prefix));
+      int count = 0;
+      for(BytesRef term : termsList) {
+        if (StringHelper.startsWith(term, prefix)) {
+          count++;
+        }
+      }
+      assertEquals(count, s.search(q, 1).totalHits);
+    }
+    r.close();
+    w.close();
+    dir.close();
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
@ -127,11 +127,9 @@ public class TestWildcard
    MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
    assertMatches(searcher, wq, 2);
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
-    assertTrue(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
    
    wq = new WildcardQuery(new Term("field", "*"));
    assertMatches(searcher, wq, 2);
-    assertFalse(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
    assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
    reader.close();
    indexStore.close();
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
@ -1104,4 +1104,51 @@ public class TestAutomaton extends LuceneTestCase {
      throw ae;
    }
  }
+
+  private static IntsRef toIntsRef(String s) {
+    IntsRefBuilder b = new IntsRefBuilder();
+    for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
+      cp = s.codePointAt(i);
+      b.append(cp);
+    }
+
+    return b.get();
+  }
+
+  public void testGetSingleton() {
+    int iters = atLeast(10000);
+    for(int iter=0;iter<iters;iter++) {
+      String s = TestUtil.randomRealisticUnicodeString(random());
+      Automaton a = Automata.makeString(s);
+      assertEquals(toIntsRef(s), Operations.getSingleton(a));
+    }
+  }
+
+  public void testGetSingletonEmptyString() {
+    Automaton a = new Automaton();
+    int s = a.createState();
+    a.setAccept(s, true);
+    a.finishState();
+    assertEquals(new IntsRef(), Operations.getSingleton(a));
+  }
+
+  public void testGetSingletonNothing() {
+    Automaton a = new Automaton();
+    a.createState();
+    a.finishState();
+    assertNull(Operations.getSingleton(a));
+  }
+
+  public void testGetSingletonTwo() {
+    Automaton a = new Automaton();
+    int s = a.createState();
+    int x = a.createState();
+    a.setAccept(x, true);
+    a.addTransition(s, x, 55);
+    int y = a.createState();
+    a.setAccept(y, true);
+    a.addTransition(s, y, 58);
+    a.finishState();
+    assertNull(Operations.getSingleton(a));
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
@ -38,7 +38,7 @@ public class TestCompiledAutomaton extends LuceneTestCase {
    }
    Collections.sort(terms);
    final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
-    return new CompiledAutomaton(a, true, false, maxDeterminizedStates);
+    return new CompiledAutomaton(a, true, false, maxDeterminizedStates, false);
  }

  private void testFloor(CompiledAutomaton c, String input, String expected) {
@ -121,4 +121,43 @@ public class TestCompiledAutomaton extends LuceneTestCase {
    testFloor(c, "aa", null);
    testFloor(c, "zzz", "goo");
  }
+  
+  // LUCENE-6367
+  public void testBinaryAll() throws Exception {
+    Automaton a = new Automaton();
+    int state = a.createState();
+    a.setAccept(state, true);
+    a.addTransition(state, state, 0, 0xff);
+    a.finishState();
+
+    CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, true);
+    assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
+  }
+
+  // LUCENE-6367
+  public void testUnicodeAll() throws Exception {
+    Automaton a = new Automaton();
+    int state = a.createState();
+    a.setAccept(state, true);
+    a.addTransition(state, state, 0, Character.MAX_CODE_POINT);
+    a.finishState();
+
+    CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false);
+    assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
+  }
+
+  // LUCENE-6367
+  public void testBinarySingleton() throws Exception {
+    // This is just ascii so we can pretend it's binary:
+    Automaton a = Automata.makeString("foobar");
+    CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, true);
+    assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
+  }
+
+  // LUCENE-6367
+  public void testUnicodeSingleton() throws Exception {
+    Automaton a = Automata.makeString(TestUtil.randomRealisticUnicodeString(random()));
+    CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false);
+    assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
+  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
@ -1238,7 +1238,7 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
    for(String field : fields.keySet()) {
      while (true) {
        Automaton a = AutomatonTestUtil.randomAutomaton(random());
-        CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE);
+        CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false);
        if (ca.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
          // Keep retrying until we get an A that will really "use" the PF's intersect code:
          continue;