diff --git a/CHANGES.txt b/CHANGES.txt index cb67b1838f0..7794c2d2ca1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,4 @@ -Lucene Change Log +Lucene Change Log $Id$ @@ -48,6 +48,11 @@ Bug fixes a FileNotFoundException like Lucene pre-2.1 (before this fix you got an NPE). (Mike McCandless) + 7. LUCENE-800: Removed backslash from the TERM_CHAR list in the queryparser, + because the backslash is the escape character. Also changed the ESCAPED_CHAR + list to contain all possible characters, because every character that + follows a backslash should be considered as escaped. (Michael Busch) + New features 1. LUCENE-759: Added two n-gram-producing TokenFilters. diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.java b/src/java/org/apache/lucene/queryParser/QueryParser.java index 00b365c2fed..94cbbc931ab 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParser.java +++ b/src/java/org/apache/lucene/queryParser/QueryParser.java @@ -794,7 +794,7 @@ public class QueryParser implements QueryParserConstants { StringBuffer sb = new StringBuffer(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); - // NOTE: keep this in sync with _ESCAPED_CHAR below! + // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?') { diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.jj b/src/java/org/apache/lucene/queryParser/QueryParser.jj index e286c2da051..c1a29248434 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParser.jj +++ b/src/java/org/apache/lucene/queryParser/QueryParser.jj @@ -818,7 +818,7 @@ public class QueryParser { StringBuffer sb = new StringBuffer(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); - // NOTE: keep this in sync with _ESCAPED_CHAR below! + // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?') { @@ -854,11 +854,10 @@ PARSER_END(QueryParser) <*> TOKEN : { <#_NUM_CHAR: ["0"-"9"] > -// NOTE: keep this in sync with escape(String) above! -| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?" ] > +// every character that follows a backslash is considered as an escaped character +| <#_ESCAPED_CHAR: "\\" ~[] > | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?" ] + "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ] | <_ESCAPED_CHAR> ) > | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") > diff --git a/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java b/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java index 5da5b2d123b..a769c59f88d 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java +++ b/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java @@ -220,8 +220,6 @@ private final int jjMoveNfa_3(int startState, int curPos) jjCheckNAddTwoStates(25, 26); break; case 27: - if ((0x84002f0600000000L & l) == 0L) - break; if (kind > 21) kind = 21; jjCheckNAddTwoStates(25, 26); @@ -241,8 +239,6 @@ private final int jjMoveNfa_3(int startState, int curPos) jjCheckNAddTwoStates(29, 30); break; case 31: - if ((0x84002f0600000000L & l) == 0L) - break; if (kind > 18) kind = 18; jjCheckNAddTwoStates(29, 30); @@ -252,8 +248,7 @@ private final int jjMoveNfa_3(int startState, int curPos) jjCheckNAddStates(10, 12); break; case 34: - if ((0x84002f0600000000L & l) != 0L) - jjCheckNAddStates(10, 12); + jjCheckNAddStates(10, 12); break; default : break; } @@ -267,37 +262,37 @@ private final int jjMoveNfa_3(int startState, int curPos) switch(jjstateSet[--i]) { case 36: - if ((0x97ffffff97ffffffL & l) != 0L) + if ((0x97ffffff87ffffffL & l) != 0L) { if (kind > 21) kind = 21; jjCheckNAddTwoStates(25, 26); } - if (curChar == 92) + else if (curChar == 92) jjCheckNAddTwoStates(27, 27); break; case 0: - if ((0x97ffffff97ffffffL & l) != 0L) + if ((0x97ffffff87ffffffL & l) != 0L) { if (kind > 18) kind = 18; jjCheckNAddStates(0, 4); } + else if (curChar == 92) + jjCheckNAddStates(13, 15); else if (curChar == 126) { if (kind > 19) kind = 19; jjstateSet[jjnewStateCnt++] = 20; } - if ((0x97ffffff97ffffffL & l) != 0L) + if ((0x97ffffff87ffffffL & l) != 0L) { if (kind > 21) kind = 21; jjCheckNAddTwoStates(25, 26); } - if (curChar == 92) - jjCheckNAddStates(13, 15); - else if (curChar == 78) + if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; else if (curChar == 124) jjstateSet[jjnewStateCnt++] = 8; @@ -361,14 +356,14 @@ private final int jjMoveNfa_3(int startState, int curPos) jjstateSet[jjnewStateCnt++] = 20; break; case 24: - if ((0x97ffffff97ffffffL & l) == 0L) + if ((0x97ffffff87ffffffL & l) == 0L) break; if (kind > 21) kind = 21; jjCheckNAddTwoStates(25, 26); break; case 25: - if ((0x97ffffff97ffffffL & l) == 0L) + if ((0x97ffffff87ffffffL & l) == 0L) break; if (kind > 21) kind = 21; @@ -379,21 +374,19 @@ private final int jjMoveNfa_3(int startState, int curPos) jjCheckNAddTwoStates(27, 27); break; case 27: - if ((0x6800000078000000L & l) == 0L) - break; if (kind > 21) kind = 21; jjCheckNAddTwoStates(25, 26); break; case 28: - if ((0x97ffffff97ffffffL & l) == 0L) + if ((0x97ffffff87ffffffL & l) == 0L) break; if (kind > 18) kind = 18; jjCheckNAddStates(0, 4); break; case 29: - if ((0x97ffffff97ffffffL & l) == 0L) + if ((0x97ffffff87ffffffL & l) == 0L) break; if (kind > 18) kind = 18; @@ -404,14 +397,12 @@ private final int jjMoveNfa_3(int startState, int curPos) jjCheckNAddTwoStates(31, 31); break; case 31: - if ((0x6800000078000000L & l) == 0L) - break; if (kind > 18) kind = 18; jjCheckNAddTwoStates(29, 30); break; case 32: - if ((0x97ffffff97ffffffL & l) != 0L) + if ((0x97ffffff87ffffffL & l) != 0L) jjCheckNAddStates(10, 12); break; case 33: @@ -419,8 +410,7 @@ private final int jjMoveNfa_3(int startState, int curPos) jjCheckNAddTwoStates(34, 34); break; case 34: - if ((0x6800000078000000L & l) != 0L) - jjCheckNAddStates(10, 12); + jjCheckNAddStates(10, 12); break; case 35: if (curChar == 92) @@ -443,6 +433,7 @@ private final int jjMoveNfa_3(int startState, int curPos) { case 36: case 25: + case 27: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; if (kind > 21) @@ -482,6 +473,7 @@ private final int jjMoveNfa_3(int startState, int curPos) jjCheckNAddStates(0, 4); break; case 29: + case 31: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; if (kind > 18) @@ -489,6 +481,7 @@ private final int jjMoveNfa_3(int startState, int curPos) jjCheckNAddTwoStates(29, 30); break; case 32: + case 34: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) jjCheckNAddStates(10, 12); break; diff --git a/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/src/test/org/apache/lucene/queryParser/TestQueryParser.java index f889eee102f..aa4ecec5215 100644 --- a/src/test/org/apache/lucene/queryParser/TestQueryParser.java +++ b/src/test/org/apache/lucene/queryParser/TestQueryParser.java @@ -522,6 +522,8 @@ public class TestQueryParser extends TestCase { //assertQueryEquals("foo \\|| bar", a, "foo \\|| bar"); //assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/ + assertQueryEquals("\\a", a, "a"); + assertQueryEquals("a\\-b:c", a, "a-b:c"); assertQueryEquals("a\\+b:c", a, "a+b:c"); assertQueryEquals("a\\:b:c", a, "a:b:c"); @@ -585,6 +587,15 @@ public class TestQueryParser extends TestCase { assertQueryEquals("XY\\u005", a, "XYZ"); fail("ParseException expected, not thrown"); } catch (ParseException expected) {} + + // Tests bug LUCENE-800 + assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\"); + assertQueryEquals("\\*", a, "*"); + assertQueryEquals("\\\\", a, "\\"); // escaped backslash + try { + assertQueryEquals("\\", a, "\\"); + fail("ParseException expected not thrown (backslash must be escaped)"); + } catch (ParseException expected) {} } public void testQueryStringEscaping() throws Exception { diff --git a/src/test/org/apache/lucene/search/TestWildcard.java b/src/test/org/apache/lucene/search/TestWildcard.java index f44900afe68..a454d05ad5b 100644 --- a/src/test/org/apache/lucene/search/TestWildcard.java +++ b/src/test/org/apache/lucene/search/TestWildcard.java @@ -170,13 +170,13 @@ public class TestWildcard QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer()); qp.setAllowLeadingWildcard(true); String docs[] = { - "abcdefg1", - "hijklmn1", - "opqrstu1", + "\\ abcdefg1", + "\\79 hijklmn1", + "\\\\ opqrstu1", }; // queries that should find all docs String matchAll[] = { - "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", + "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*" }; // queries that should find no docs String matchNone[] = { @@ -184,9 +184,9 @@ public class TestWildcard }; // queries that should be parsed to prefix queries String matchOneDocPrefix[][] = { - {"a*", "ab*", "abc*"}, // these should find only doc 0 - {"h*", "hi*", "hij*"}, // these should find only doc 1 - {"o*", "op*", "opq*"}, // these should find only doc 2 + {"a*", "ab*", "abc*", }, // these should find only doc 0 + {"h*", "hi*", "hij*", "\\\\7*"}, // these should find only doc 1 + {"o*", "op*", "opq*", "\\\\\\\\*"}, // these should find only doc 2 }; // queries that should be parsed to wildcard queries String matchOneDocWild[][] = { @@ -200,7 +200,7 @@ public class TestWildcard IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer()); for (int i = 0; i < docs.length; i++) { Document doc = new Document(); - doc.add(new Field(field,docs[i],Store.NO,Index.UN_TOKENIZED)); + doc.add(new Field(field,docs[i],Store.NO,Index.TOKENIZED)); iw.addDocument(doc); } iw.close();