LUCENE-588: Escaped wildcard character in wildcard term not handled correctly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1031765 13f79535-47bb-0310-9956-ffa450edef68
2010-11-05 20:07:20 +00:00 · 2010-11-05 20:07:20 +00:00 · a25dc8b9d5
parent 2bbfc85aad
commit a25dc8b9d5
11 changed files with 255 additions and 66 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -113,6 +113,12 @@ Changes in backwards compatibility policy
 * LUCENE-2674: MultiTermQuery.TermCollector.collect now accepts the
  TermsEnum as well.  (Robert Muir, Mike McCandless)

+* LUCENE-588: WildcardQuery and QueryParser now allows escaping with 
+  the '\' character. Previously this was impossible (you could not escape */?,
+  for example).  If your code somehow depends on the old behavior, you will
+  need to change it (e.g. using "\\" to escape '\' itself).  
+  (Sunil Kamath, Terry Yang via Robert Muir)
+ 
 Changes in Runtime Behavior

 * LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit
--- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java
+++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java
@ -49,9 +49,9 @@ public class WildcardQueryNode extends FieldQueryNode {
  @Override
  public CharSequence toQueryString(EscapeQuerySyntax escaper) {
    if (isDefaultField(this.field)) {
-      return getTermEscaped(escaper);
+      return this.text;
    } else {
-      return this.field + ":" + getTermEscaped(escaper);
+      return this.field + ":" + this.text;
    }
  }

--- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
+++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
@ -76,6 +76,7 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
+import org.junit.Ignore;

 /**
 * This test case is a copy of the core Lucene query parser test, it was adapted
@ -945,6 +946,15 @@ public class TestQPHelper extends LuceneTestCase {
    assertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&");
  }

+  @Ignore("contrib queryparser shouldn't escape wildcard terms")
+  public void testEscapedWildcard() throws Exception {
+    StandardQueryParser qp = new StandardQueryParser();
+    qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+
+    WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r"));
+    assertEquals(q, qp.parse("foo\\?ba?r", "field"));
+  }
+
  public void testTabNewlineCarriageReturn() throws Exception {
    assertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank");

--- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
@ -33,7 +33,11 @@ import org.apache.lucene.util.StringHelper;
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.Map;
+import java.util.Set;
 import java.util.HashMap;
+import java.util.TreeMap;
+import java.util.SortedMap;
+import java.util.Iterator;

 class SimpleTextFieldsReader extends FieldsProducer {

@ -78,7 +82,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
  private class SimpleTextFieldsEnum extends FieldsEnum {
    private final IndexInput in;
    private final BytesRef scratch = new BytesRef(10);
-    private boolean omitTF;
+    private String current;

    public SimpleTextFieldsEnum() {
      this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
@ -89,11 +93,12 @@ class SimpleTextFieldsReader extends FieldsProducer {
      while(true) {
        readLine(in, scratch);
        if (scratch.equals(END)) {
+          current = null;
          return null;
        }
        if (scratch.startsWith(FIELD)) {
          String field = StringHelper.intern(new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8"));
-          omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
+          current = field;
          return field;
        }
      }
@ -101,7 +106,7 @@ class SimpleTextFieldsReader extends FieldsProducer {

    @Override
    public TermsEnum terms() throws IOException {
-      return new SimpleTextTermsEnum(in.getFilePointer(), omitTF);
+      return SimpleTextFieldsReader.this.terms(current).iterator();
    }
  }

@ -109,21 +114,42 @@ class SimpleTextFieldsReader extends FieldsProducer {
    private final IndexInput in;
    private final boolean omitTF;
    private BytesRef current;
-    private final long fieldStart;
-    private final BytesRef scratch = new BytesRef(10);
-    private final BytesRef scratch2 = new BytesRef(10);
    private int docFreq;
    private long docsStart;
    private boolean ended;
+    private final TreeMap<BytesRef,TermData> allTerms;
+    private Iterator<Map.Entry<BytesRef,TermData>> iter;

-    public SimpleTextTermsEnum(long offset, boolean omitTF) throws IOException {
+    public SimpleTextTermsEnum(TreeMap<BytesRef,TermData> allTerms, boolean omitTF) throws IOException {
      this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
-      this.in.seek(offset);
+      this.allTerms = allTerms;
      this.omitTF = omitTF;
-      fieldStart = offset;
+      iter = allTerms.entrySet().iterator();
    }

    public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
+      
+      final SortedMap<BytesRef,TermData> tailMap = allTerms.tailMap(text);
+
+      if (tailMap.isEmpty()) {
+        current = null;
+        return SeekStatus.END;
+      } else {
+        current = tailMap.firstKey();
+        final TermData td = tailMap.get(current);
+        docsStart = td.docsStart;
+        docFreq = td.docFreq;
+        iter = tailMap.entrySet().iterator();
+        assert iter.hasNext();
+        iter.next();
+        if (current.equals(text)) {
+          return SeekStatus.FOUND;
+        } else {
+          return SeekStatus.NOT_FOUND;
+        }
+      }
+
+      /*
      if (current != null) {
        final int cmp = current.compareTo(text);
        if (cmp == 0) {
@ -153,6 +179,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
      current = null;
      ended = true;
      return SeekStatus.END;
+      */
    }

    @Override
@ -162,6 +189,20 @@ class SimpleTextFieldsReader extends FieldsProducer {
    @Override
    public BytesRef next() throws IOException {
      assert !ended;
+
+      if (iter.hasNext()) {
+        Map.Entry<BytesRef,TermData> ent = iter.next();
+        current = ent.getKey();
+        TermData td = ent.getValue();
+        docFreq = td.docFreq;
+        docsStart = td.docsStart;
+        return current;
+      } else {
+        current = null;
+        return null;
+      }
+
+      /*
      readLine(in, scratch);
      if (scratch.equals(END) || scratch.startsWith(FIELD)) {
        ended = true;
@ -192,6 +233,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
        in.seek(lineStart);
        return current;
      }
+      */
    }

    @Override
@ -447,20 +489,70 @@ class SimpleTextFieldsReader extends FieldsProducer {
    }
  }

+  static class TermData {
+    public long docsStart;
+    public int docFreq;
+
+    public TermData(long docsStart, int docFreq) {
+      this.docsStart = docsStart;
+      this.docFreq = docFreq;
+    }
+  }
+
  private class SimpleTextTerms extends Terms {
    private final String field;
    private final long termsStart;
    private final boolean omitTF;

-    public SimpleTextTerms(String field, long termsStart) {
+    // NOTE: horribly, horribly RAM consuming, but then
+    // SimpleText should never be used in production
+    private final TreeMap<BytesRef,TermData> allTerms = new TreeMap<BytesRef,TermData>();
+
+    private final BytesRef scratch = new BytesRef(10);
+
+    public SimpleTextTerms(String field, long termsStart) throws IOException {
      this.field = StringHelper.intern(field);
      this.termsStart = termsStart;
      omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
+      loadTerms();
+    }
+
+    private void loadTerms() throws IOException {
+      IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
+      in.seek(termsStart);
+      final BytesRef lastTerm = new BytesRef(10);
+      long lastDocsStart = -1;
+      int docFreq = 0;
+      while(true) {
+        readLine(in, scratch);
+        if (scratch.equals(END) || scratch.startsWith(FIELD)) {
+          if (lastDocsStart != -1) {
+            allTerms.put(new BytesRef(lastTerm),
+                         new TermData(lastDocsStart, docFreq));
+          }
+          break;
+        } else if (scratch.startsWith(DOC)) {
+          docFreq++;
+        } else if (scratch.startsWith(TERM)) {
+          if (lastDocsStart != -1) {
+            allTerms.put(new BytesRef(lastTerm),
+                         new TermData(lastDocsStart, docFreq));
+          }
+          lastDocsStart = in.getFilePointer();
+          final int len = scratch.length - TERM.length;
+          if (len > lastTerm.length) {
+            lastTerm.grow(len);
+          }
+          System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
+          lastTerm.length = len;
+          docFreq = 0;
+        }
+      }
    }

    @Override
    public TermsEnum iterator() throws IOException {
-      return new SimpleTextTermsEnum(termsStart, omitTF);
+      return new SimpleTextTermsEnum(allTerms, omitTF);
    }

    @Override
--- a/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
+++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
@ -1045,7 +1045,7 @@ public abstract class QueryParserBase {

    String termImage=discardEscapeChar(term.image);
    if (wildcard) {
-      q = getWildcardQuery(qfield, termImage);
+      q = getWildcardQuery(qfield, term.image);
    } else if (prefix) {
      q = getPrefixQuery(qfield,
          discardEscapeChar(term.image.substring
--- a/lucene/src/java/org/apache/lucene/search/WildcardQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/WildcardQuery.java
@ -45,6 +45,9 @@ public class WildcardQuery extends AutomatonQuery {
  /** Char equality with support for wildcards */
  public static final char WILDCARD_CHAR = '?';

+  /** Escape character */
+  public static final char WILDCARD_ESCAPE = '\\';
+  
  /**
   * Constructs a query for terms matching <code>term</code>. 
   */
@ -56,6 +59,7 @@ public class WildcardQuery extends AutomatonQuery {
   * Convert Lucene wildcard syntax into an automaton.
   * @lucene.internal
   */
+  @SuppressWarnings("fallthrough")
  public static Automaton toAutomaton(Term wildcardquery) {
    List<Automaton> automata = new ArrayList<Automaton>();
    
@ -63,6 +67,7 @@ public class WildcardQuery extends AutomatonQuery {
    
    for (int i = 0; i < wildcardText.length();) {
      final int c = wildcardText.codePointAt(i);
+      int length = Character.charCount(c);
      switch(c) {
        case WILDCARD_STRING: 
          automata.add(BasicAutomata.makeAnyString());
@ -70,10 +75,18 @@ public class WildcardQuery extends AutomatonQuery {
        case WILDCARD_CHAR:
          automata.add(BasicAutomata.makeAnyChar());
          break;
+        case WILDCARD_ESCAPE:
+          // add the next codepoint instead, if it exists
+          if (i + length < wildcardText.length()) {
+            final int nextChar = wildcardText.codePointAt(i + length);
+            length += Character.charCount(nextChar);
+            automata.add(BasicAutomata.makeChar(nextChar));
+            break;
+          } // else fallthru, lenient parsing with a trailing \
        default:
          automata.add(BasicAutomata.makeChar(c));
      }
-      i += Character.charCount(c);
+      i += length;
    }
    
    return BasicOperations.concatenate(automata);
--- a/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
@ -178,7 +178,7 @@ final public class SpecialOperations {
   * Reverses the language of the given (non-singleton) automaton while returning
   * the set of new initial states.
   */
-  static Set<State> reverse(Automaton a) {
+  public static Set<State> reverse(Automaton a) {
    a.expandSingleton();
    // reverse all edges
    HashMap<State, HashSet<Transition>> m = new HashMap<State, HashSet<Transition>>();
--- a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
+++ b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
@ -770,11 +770,11 @@ public class TestQueryParser extends LuceneTestCase {

    assertQueryEquals("a:b\\\\c*", a, "a:b\\c*");

-    assertQueryEquals("a:b\\-?c", a, "a:b-?c");
-    assertQueryEquals("a:b\\+?c", a, "a:b+?c");
-    assertQueryEquals("a:b\\:?c", a, "a:b:?c");
+    assertQueryEquals("a:b\\-?c", a, "a:b\\-?c");
+    assertQueryEquals("a:b\\+?c", a, "a:b\\+?c");
+    assertQueryEquals("a:b\\:?c", a, "a:b\\:?c");

-    assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
+    assertQueryEquals("a:b\\\\?c", a, "a:b\\\\?c");

    assertQueryEquals("a:b\\-c~", a, "a:b-c~2.0");
    assertQueryEquals("a:b\\+c~", a, "a:b+c~2.0");
@ -1062,6 +1062,12 @@ public class TestQueryParser extends LuceneTestCase {

  }

+  public void testEscapedWildcard() throws Exception {
+    QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+    WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r"));
+    assertEquals(q, qp.parse("foo\\?ba?r"));
+  }
+  
  public void testRegexps() throws Exception {
    QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
    RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
--- a/lucene/src/test/org/apache/lucene/search/TestWildcard.java
+++ b/lucene/src/test/org/apache/lucene/search/TestWildcard.java
@ -205,6 +205,38 @@ public class TestWildcard
    indexStore.close();
  }

+  /**
+   * Tests if wildcard escaping works
+   */
+  public void testEscapes() throws Exception {
+    Directory indexStore = getIndexStore("field", 
+        new String[]{"foo*bar", "foo??bar", "fooCDbar", "fooSOMETHINGbar", "foo\\"});
+    IndexSearcher searcher = new IndexSearcher(indexStore, true);
+
+    // without escape: matches foo??bar, fooCDbar, foo*bar, and fooSOMETHINGbar
+    WildcardQuery unescaped = new WildcardQuery(new Term("field", "foo*bar"));
+    assertMatches(searcher, unescaped, 4);
+    
+    // with escape: only matches foo*bar
+    WildcardQuery escaped = new WildcardQuery(new Term("field", "foo\\*bar"));
+    assertMatches(searcher, escaped, 1);
+    
+    // without escape: matches foo??bar and fooCDbar
+    unescaped = new WildcardQuery(new Term("field", "foo??bar"));
+    assertMatches(searcher, unescaped, 2);
+    
+    // with escape: matches foo??bar only
+    escaped = new WildcardQuery(new Term("field", "foo\\?\\?bar"));
+    assertMatches(searcher, escaped, 1);
+    
+    // check escaping at end: lenient parse yields "foo\"
+    WildcardQuery atEnd = new WildcardQuery(new Term("field", "foo\\"));
+    assertMatches(searcher, atEnd, 1);
+    
+    searcher.close();
+    indexStore.close();
+  }
+  
  private Directory getIndexStore(String field, String[] contents)
      throws IOException {
    Directory indexStore = newDirectory();
--- a/solr/src/java/org/apache/solr/search/SolrQueryParser.java
+++ b/solr/src/java/org/apache/solr/search/SolrQueryParser.java
@ -30,6 +30,7 @@ import org.apache.lucene.util.Version;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.SpecialOperations;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.solr.analysis.*;
 import org.apache.solr.common.SolrException;
@ -202,37 +203,36 @@ public class SolrQueryParser extends QueryParser {
    String type = schema.getFieldType(field).getTypeName();
    ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
    if (factory != null) {
+      Term term = new Term(field, termStr);
+      // fsa representing the query
+      Automaton automaton = WildcardQuery.toAutomaton(term);
+      // TODO: we should likely use the automaton to calculate shouldReverse, too.
      if (factory.shouldReverse(termStr)) {
-        int len = termStr.length();
-        char[] chars = new char[len+1];
-        chars[0] = factory.getMarkerChar();      
-        termStr.getChars(0, len, chars, 1);
-        ReversedWildcardFilter.reverse(chars, 1, len);
-        termStr = new String(chars);
+        automaton = BasicOperations.concatenate(automaton, BasicAutomata.makeChar(factory.getMarkerChar()));
+        SpecialOperations.reverse(automaton);
      } else { 
        // reverse wildcardfilter is active: remove false positives
-        Term term = new Term(field, termStr);
-        // fsa representing the query
-        Automaton a = WildcardQuery.toAutomaton(term);
        // fsa representing false positives (markerChar*)
        Automaton falsePositives = BasicOperations.concatenate(
            BasicAutomata.makeChar(factory.getMarkerChar()), 
            BasicAutomata.makeAnyString());
-        return new AutomatonQuery(term, BasicOperations.minus(a, falsePositives)) {
-          // override toString so its completely transparent
-          @Override
-          public String toString(String field) {
-            StringBuilder buffer = new StringBuilder();
-            if (!getField().equals(field)) {
-              buffer.append(getField());
-              buffer.append(":");
-            }
-            buffer.append(term.text());
-            buffer.append(ToStringUtils.boost(getBoost()));
-            return buffer.toString();
-          }
-        };
+        // subtract these away
+        automaton = BasicOperations.minus(automaton, falsePositives);
      }
+      return new AutomatonQuery(term, automaton) {
+        // override toString so its completely transparent
+        @Override
+        public String toString(String field) {
+          StringBuilder buffer = new StringBuilder();
+          if (!getField().equals(field)) {
+            buffer.append(getField());
+            buffer.append(":");
+          }
+          buffer.append(term.text());
+          buffer.append(ToStringUtils.boost(getBoost()));
+          return buffer.toString();
+        }
+      };
    }
    Query q = super.getWildcardQuery(field, termStr);
    if (q instanceof WildcardQuery) {
--- a/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
@ -19,6 +19,7 @@ package org.apache.solr.analysis;

 import java.io.IOException;
 import java.io.StringReader;
+import java.lang.reflect.Field;

 import java.util.HashMap;
 import java.util.Map;
@ -26,8 +27,10 @@ import java.util.Map;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.AutomatonQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.SpecialOperations;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.search.SolrQueryParser;
@ -51,6 +54,8 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
  public void setUp() throws Exception {
    super.setUp();
    schema = new IndexSchema(solrConfig, getSchemaFile(), null);
+    clearIndex();
+    assertU(commit());
  }

  @Test
@ -105,7 +110,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
  }
  
  @Test
-  public void testQueryParsing() throws IOException, ParseException {
+  public void testQueryParsing() throws Exception {

    SolrQueryParser parserOne = new SolrQueryParser(schema, "one");
    assertTrue(parserOne.getAllowLeadingWildcard());
@ -115,28 +120,53 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
    // XXX note: this should be false, but for now we return true for any field,
    // XXX if at least one field uses the reversing
    assertTrue(parserThree.getAllowLeadingWildcard());
-    String text = "one +two *hree f*ur fiv* *si\uD834\uDD1Ex";
-    String expectedOne = "one:one +one:two one:\u0001eerh* one:\u0001ru*f one:fiv* one:\u0001x\uD834\uDD1Eis*";
-    String expectedTwo = "two:one +two:two two:\u0001eerh* two:\u0001ru*f two:fiv* two:\u0001x\uD834\uDD1Eis*";
-    String expectedThree = "three:one +three:two three:*hree three:f*ur three:fiv* three:*si\uD834\uDD1Ex";
-    Query q = parserOne.parse(text);
-    assertEquals(expectedOne, q.toString());
-    q = parserTwo.parse(text);
-    assertEquals(expectedTwo, q.toString());
-    q = parserThree.parse(text);
-    assertEquals(expectedThree, q.toString());
+    
+    // add some docs
+    assertU(adoc("id", "1", "one", "one"));
+    assertU(adoc("id", "2", "two", "two"));
+    assertU(adoc("id", "3", "three", "three"));
+    assertU(adoc("id", "4", "one", "four"));
+    assertU(adoc("id", "5", "two", "five"));
+    assertU(adoc("id", "6", "three", "si\uD834\uDD1Ex"));
+    assertU(commit());
+    
+    assertQ("should have matched",
+        req("+id:1 +one:one"),
+        "//result[@numFound=1]");
+    
+    assertQ("should have matched",
+        req("+id:4 +one:f*ur"),
+        "//result[@numFound=1]");
+        
+    assertQ("should have matched",
+        req("+id:6 +three:*si\uD834\uDD1Ex"),
+        "//result[@numFound=1]");
+    
    // test conditional reversal
-    String condText = "*hree t*ree th*ee thr*e ?hree t?ree th?ee th?*ee " + 
-        "short*token ver*longtoken";
-    String expected = "two:\u0001eerh* two:\u0001eer*t two:\u0001ee*ht " +
-        "two:thr*e " +
-        "two:\u0001eerh? two:\u0001eer?t " +
-        "two:th?ee " +
-        "two:th?*ee " +
-        "two:short*token " +
-        "two:\u0001nekotgnol*rev";
-    q = parserTwo.parse(condText);
-    assertEquals(expected, q.toString());
+    assertTrue(wasReversed(parserTwo, "*hree"));
+    assertTrue(wasReversed(parserTwo, "t*ree"));
+    assertTrue(wasReversed(parserTwo, "th*ee"));
+    assertFalse(wasReversed(parserTwo, "thr*e"));
+    assertTrue(wasReversed(parserTwo, "?hree"));
+    assertTrue(wasReversed(parserTwo, "t?ree"));
+    assertFalse(wasReversed(parserTwo, "th?ee"));
+    assertFalse(wasReversed(parserTwo, "th?*ee"));
+    assertFalse(wasReversed(parserTwo, "short*token"));
+    assertTrue(wasReversed(parserTwo, "ver*longtoken"));
+  }
+  
+  /** fragile assert: depends on our implementation, but cleanest way to check for now */ 
+  private boolean wasReversed(SolrQueryParser qp, String query) throws Exception {
+    Query q = qp.parse(query);
+    if (!(q instanceof AutomatonQuery))
+      return false;
+    // this is a hack to get the protected Automaton field in AutomatonQuery, 
+    // may break in later lucene versions - we have no getter... for good reasons.
+    final Field automatonField = AutomatonQuery.class.getDeclaredField("automaton");
+    automatonField.setAccessible(true);
+    Automaton automaton = (Automaton) automatonField.get(q);
+    String prefix = SpecialOperations.getCommonPrefix(automaton);
+    return prefix.length() > 0 && prefix.charAt(0) == '\u0001';
  }

  @Test