LUCENE-800: removed backslash from the TERM_CHAR list in the QueryParser to support escaping of backslashes

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@515908 13f79535-47bb-0310-9956-ffa450edef68
2007-03-08 03:37:12 +00:00 · 2007-03-08 03:37:12 +00:00 · 397187494f
parent 35c0da7cab
commit 397187494f
6 changed files with 47 additions and 39 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -1,4 +1,4 @@
-Lucene Change Log
+Lucene Change Log

 $Id$

@ -48,6 +48,11 @@ Bug fixes
    a FileNotFoundException like Lucene pre-2.1 (before this fix you
    got an NPE).  (Mike McCandless)

+ 7. LUCENE-800: Removed backslash from the TERM_CHAR list in the queryparser, 
+    because the backslash is the escape character. Also changed the ESCAPED_CHAR
+    list to contain all possible characters, because every character that 
+    follows a backslash should be considered as escaped. (Michael Busch)
+
 New features

 1. LUCENE-759: Added two n-gram-producing TokenFilters.
--- a/src/java/org/apache/lucene/queryParser/QueryParser.java
+++ b/src/java/org/apache/lucene/queryParser/QueryParser.java
@ -794,7 +794,7 @@ public class QueryParser implements QueryParserConstants {
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < s.length(); i++) {
      char c = s.charAt(i);
-      // NOTE: keep this in sync with _ESCAPED_CHAR below!
+      // These characters are part of the query syntax and must be escaped
      if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
        || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
        || c == '*' || c == '?') {
--- a/src/java/org/apache/lucene/queryParser/QueryParser.jj
+++ b/src/java/org/apache/lucene/queryParser/QueryParser.jj
@ -818,7 +818,7 @@ public class QueryParser {
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < s.length(); i++) {
      char c = s.charAt(i);
-      // NOTE: keep this in sync with _ESCAPED_CHAR below!
+      // These characters are part of the query syntax and must be escaped
      if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
        || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
        || c == '*' || c == '?') {
@ -854,11 +854,10 @@ PARSER_END(QueryParser)

 <*> TOKEN : {
  <#_NUM_CHAR:   ["0"-"9"] >
-// NOTE: keep this in sync with escape(String) above!
-| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
-                          "[", "]", "\"", "{", "}", "~", "*", "?" ] >
+// every character that follows a backslash is considered as an escaped character
+| <#_ESCAPED_CHAR: "\\" ~[] >
 | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
-                           "[", "]", "\"", "{", "}", "~", "*", "?" ]
+                           "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ]
                       | <_ESCAPED_CHAR> ) >
 | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
 | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
--- a/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
+++ b/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
@ -220,8 +220,6 @@ private final int jjMoveNfa_3(int startState, int curPos)
                  jjCheckNAddTwoStates(25, 26);
                  break;
               case 27:
-                  if ((0x84002f0600000000L & l) == 0L)
-                     break;
                  if (kind > 21)
                     kind = 21;
                  jjCheckNAddTwoStates(25, 26);
@ -241,8 +239,6 @@ private final int jjMoveNfa_3(int startState, int curPos)
                  jjCheckNAddTwoStates(29, 30);
                  break;
               case 31:
-                  if ((0x84002f0600000000L & l) == 0L)
-                     break;
                  if (kind > 18)
                     kind = 18;
                  jjCheckNAddTwoStates(29, 30);
@ -252,8 +248,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
                     jjCheckNAddStates(10, 12);
                  break;
               case 34:
-                  if ((0x84002f0600000000L & l) != 0L)
-                     jjCheckNAddStates(10, 12);
+                  jjCheckNAddStates(10, 12);
                  break;
               default : break;
            }
@ -267,37 +262,37 @@ private final int jjMoveNfa_3(int startState, int curPos)
            switch(jjstateSet[--i])
            {
               case 36:
-                  if ((0x97ffffff97ffffffL & l) != 0L)
+                  if ((0x97ffffff87ffffffL & l) != 0L)
                  {
                     if (kind > 21)
                        kind = 21;
                     jjCheckNAddTwoStates(25, 26);
                  }
-                  if (curChar == 92)
+                  else if (curChar == 92)
                     jjCheckNAddTwoStates(27, 27);
                  break;
               case 0:
-                  if ((0x97ffffff97ffffffL & l) != 0L)
+                  if ((0x97ffffff87ffffffL & l) != 0L)
                  {
                     if (kind > 18)
                        kind = 18;
                     jjCheckNAddStates(0, 4);
                  }
+                  else if (curChar == 92)
+                     jjCheckNAddStates(13, 15);
                  else if (curChar == 126)
                  {
                     if (kind > 19)
                        kind = 19;
                     jjstateSet[jjnewStateCnt++] = 20;
                  }
-                  if ((0x97ffffff97ffffffL & l) != 0L)
+                  if ((0x97ffffff87ffffffL & l) != 0L)
                  {
                     if (kind > 21)
                        kind = 21;
                     jjCheckNAddTwoStates(25, 26);
                  }
-                  if (curChar == 92)
-                     jjCheckNAddStates(13, 15);
-                  else if (curChar == 78)
+                  if (curChar == 78)
                     jjstateSet[jjnewStateCnt++] = 11;
                  else if (curChar == 124)
                     jjstateSet[jjnewStateCnt++] = 8;
@ -361,14 +356,14 @@ private final int jjMoveNfa_3(int startState, int curPos)
                  jjstateSet[jjnewStateCnt++] = 20;
                  break;
               case 24:
-                  if ((0x97ffffff97ffffffL & l) == 0L)
+                  if ((0x97ffffff87ffffffL & l) == 0L)
                     break;
                  if (kind > 21)
                     kind = 21;
                  jjCheckNAddTwoStates(25, 26);
                  break;
               case 25:
-                  if ((0x97ffffff97ffffffL & l) == 0L)
+                  if ((0x97ffffff87ffffffL & l) == 0L)
                     break;
                  if (kind > 21)
                     kind = 21;
@ -379,21 +374,19 @@ private final int jjMoveNfa_3(int startState, int curPos)
                     jjCheckNAddTwoStates(27, 27);
                  break;
               case 27:
-                  if ((0x6800000078000000L & l) == 0L)
-                     break;
                  if (kind > 21)
                     kind = 21;
                  jjCheckNAddTwoStates(25, 26);
                  break;
               case 28:
-                  if ((0x97ffffff97ffffffL & l) == 0L)
+                  if ((0x97ffffff87ffffffL & l) == 0L)
                     break;
                  if (kind > 18)
                     kind = 18;
                  jjCheckNAddStates(0, 4);
                  break;
               case 29:
-                  if ((0x97ffffff97ffffffL & l) == 0L)
+                  if ((0x97ffffff87ffffffL & l) == 0L)
                     break;
                  if (kind > 18)
                     kind = 18;
@ -404,14 +397,12 @@ private final int jjMoveNfa_3(int startState, int curPos)
                     jjCheckNAddTwoStates(31, 31);
                  break;
               case 31:
-                  if ((0x6800000078000000L & l) == 0L)
-                     break;
                  if (kind > 18)
                     kind = 18;
                  jjCheckNAddTwoStates(29, 30);
                  break;
               case 32:
-                  if ((0x97ffffff97ffffffL & l) != 0L)
+                  if ((0x97ffffff87ffffffL & l) != 0L)
                     jjCheckNAddStates(10, 12);
                  break;
               case 33:
@ -419,8 +410,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
                     jjCheckNAddTwoStates(34, 34);
                  break;
               case 34:
-                  if ((0x6800000078000000L & l) != 0L)
-                     jjCheckNAddStates(10, 12);
+                  jjCheckNAddStates(10, 12);
                  break;
               case 35:
                  if (curChar == 92)
@ -443,6 +433,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
            {
               case 36:
               case 25:
+               case 27:
                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
                     break;
                  if (kind > 21)
@ -482,6 +473,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
                  jjCheckNAddStates(0, 4);
                  break;
               case 29:
+               case 31:
                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
                     break;
                  if (kind > 18)
@ -489,6 +481,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
                  jjCheckNAddTwoStates(29, 30);
                  break;
               case 32:
+               case 34:
                  if (jjCanMove_0(hiByte, i1, i2, l1, l2))
                     jjCheckNAddStates(10, 12);
                  break;
--- a/src/test/org/apache/lucene/queryParser/TestQueryParser.java
+++ b/src/test/org/apache/lucene/queryParser/TestQueryParser.java
@ -522,6 +522,8 @@ public class TestQueryParser extends TestCase {
    //assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
    //assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/

+    assertQueryEquals("\\a", a, "a");
+    
    assertQueryEquals("a\\-b:c", a, "a-b:c");
    assertQueryEquals("a\\+b:c", a, "a+b:c");
    assertQueryEquals("a\\:b:c", a, "a:b:c");
@ -585,6 +587,15 @@ public class TestQueryParser extends TestCase {
        assertQueryEquals("XY\\u005", a, "XYZ");
        fail("ParseException expected, not thrown");
    } catch (ParseException expected) {}
+    
+    // Tests bug LUCENE-800
+    assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\");
+    assertQueryEquals("\\*", a, "*");
+    assertQueryEquals("\\\\", a, "\\");  // escaped backslash
+    try {
+      assertQueryEquals("\\", a, "\\");
+      fail("ParseException expected not thrown (backslash must be escaped)");
+    } catch (ParseException expected) {}
  }

  public void testQueryStringEscaping() throws Exception {
--- a/src/test/org/apache/lucene/search/TestWildcard.java
+++ b/src/test/org/apache/lucene/search/TestWildcard.java
@ -170,13 +170,13 @@ public class TestWildcard
    QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer());
    qp.setAllowLeadingWildcard(true);
    String docs[] = {
-        "abcdefg1",
-        "hijklmn1",
-        "opqrstu1",
+        "\\ abcdefg1",
+        "\\79 hijklmn1",
+        "\\\\ opqrstu1",
    };
    // queries that should find all docs
    String matchAll[] = {
-        "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***",
+        "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*"
    };
    // queries that should find no docs
    String matchNone[] = {
@ -184,9 +184,9 @@ public class TestWildcard
    };
    // queries that should be parsed to prefix queries
    String matchOneDocPrefix[][] = {
-        {"a*", "ab*", "abc*"}, // these should find only doc 0 
-        {"h*", "hi*", "hij*"}, // these should find only doc 1
-        {"o*", "op*", "opq*"}, // these should find only doc 2
+        {"a*", "ab*", "abc*", }, // these should find only doc 0 
+        {"h*", "hi*", "hij*", "\\\\7*"}, // these should find only doc 1
+        {"o*", "op*", "opq*", "\\\\\\\\*"}, // these should find only doc 2
    };
    // queries that should be parsed to wildcard queries
    String matchOneDocWild[][] = {
@ -200,7 +200,7 @@ public class TestWildcard
    IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer());
    for (int i = 0; i < docs.length; i++) {
      Document doc = new Document();
-      doc.add(new Field(field,docs[i],Store.NO,Index.UN_TOKENIZED));
+      doc.add(new Field(field,docs[i],Store.NO,Index.TOKENIZED));
      iw.addDocument(doc);
    }
    iw.close();