diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java index 6f7e01e70..853537d3b 100644 --- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java +++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java @@ -124,83 +124,17 @@ public class SearchTokenizer { || isOtherDelimsForWord(character); } - /** - * - * searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark - *

- * qchar-no-AMP-DQUOTE = qchar-unescaped / escape ( escape / quotation-mark ) - *

- * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / - * ":" / "@" / "/" / "?" / "$" / "'" / "=" - *

- * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - *

- * escape = "\" / "%5C" ; reverse solidus U+005C - *

- * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG - * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F ) - * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" ) - *

- * other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";" - *

- * quotation-mark = DQUOTE / "%22" - *

- * ALPHA = %x41-5A / %x61-7A - *
- * DIGIT = %x30-39 - *
- * DQUOTE = %x22 - *
- * - * Checks if given character is allowed for a search phrase. - * ATTENTION: Escaping and percent encoding is not be validated here (and can not be validated on - * a single character).
- * Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link #QUOTATION_MARK} characters this method will - * return FALSE.
- * Furthermore percent encoded characters are also not validated (and can not be validated on - * a single character).
- * Hence for the % character this method assumeS that it was percent encoded and is now decoded - * and will return TRUE.
+ /** + * The check for allowed characters in a SearchPhrase assumes that + * the whole phrase is already percent decoded. + * Hence, all characters are allowed besides the double quote ("). * * @param character which is checked * @return true if character is allowed for a phrase */ static boolean isAllowedPhrase(final char character) { - // the '%' is allowed because it is assumed that it was percent encoded and is now decoded - return isQCharUnescaped(character) - || character == '%' - || Character.isUnicodeIdentifierStart(character); - } - - /** - * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "=" - * @param character which is checked - * @return true if character is allowed - */ - private static boolean isQCharUnescaped(final char character) { - return isUnreserved(character) - || isOtherDelims(character) - || character == ':' - || character == '@' - || character == '/' - || character == '$' - || character == '\'' - || character == '='; - } - - /** - * other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";" - * @param character which is checked - * @return true if character is allowed - */ - private static boolean isOtherDelims(final char character) { - return character == '!' - || character == '(' - || character == ')' - || character == '*' - || character == '+' - || character == ',' - || character == ';'; + return Character.isUnicodeIdentifierStart(character) + || character != '"'; } /** @@ -212,7 +146,6 @@ public class SearchTokenizer { return character == '!' || character == '*' || character == '+' - || character == ',' || character == ':' || character == '@' || character == '/' @@ -234,7 +167,7 @@ public class SearchTokenizer { || character == '<' || character == '`'; } - + /** * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * @param character which is checked @@ -355,11 +288,11 @@ public class SearchTokenizer { } /** - * - * As per the updated abnf + * + * As per the updated abnf * https://github.com/oasis-tcs/odata-abnf/blob/master/abnf/odata-abnf-construction-rules.txt#L332-L356. * searchWord = 1*( ALPHA / DIGIT / COMMA / "." / "-" / pct-encoded ) - * This includes Unicode characters of categories + * This includes Unicode characters of categories * L or N using UTF-8 and percent-encoding. */ private class SearchWordState extends LiteralState { diff --git a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java index d8c6a7cc7..cea6d3012 100644 --- a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java +++ b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -107,6 +107,13 @@ public class SearchTokenizerTest { assertQuery("abc or \"xyz\"").resultsIn(WORD, WORD, PHRASE); } + @Test + public void parsePhrase_decoded() throws Exception { + assertQuery("\"a & b\"").resultsIn(PHRASE); + assertQuery("\" ! # $ % & ' ( ) * + , / : ; = ? @ [ ] \"").resultsIn(PHRASE); + assertQuery("\" - . < > ^ _ ` { | } ~ \"").resultsIn(PHRASE); + } + @Test public void parseNot() throws Exception { assertQuery("NOT").resultsIn(NOT); @@ -401,4 +408,4 @@ public class SearchTokenizerTest { } } } -} \ No newline at end of file +}