diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
index 6f7e01e70..853537d3b 100644
--- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
+++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
@@ -124,83 +124,17 @@ public class SearchTokenizer {
|| isOtherDelimsForWord(character);
}
- /**
- *
- * searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
- *
- * qchar-no-AMP-DQUOTE = qchar-unescaped / escape ( escape / quotation-mark )
- *
- * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims /
- * ":" / "@" / "/" / "?" / "$" / "'" / "="
- *
- * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- *
- * escape = "\" / "%5C" ; reverse solidus U+005C
- *
- * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
- * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
- * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
- *
- * other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";"
- *
- * quotation-mark = DQUOTE / "%22"
- *
- * ALPHA = %x41-5A / %x61-7A
- *
- * DIGIT = %x30-39
- *
- * DQUOTE = %x22
- *
- *
- * Checks if given character
is allowed for a search phrase.
- * ATTENTION: Escaping and percent encoding is not be validated here (and can not be validated on
- * a single character).
- * Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link #QUOTATION_MARK} characters this method will
- * return FALSE
.
- * Furthermore percent encoded characters are also not validated (and can not be validated on
- * a single character).
- * Hence for the %
character this method assumeS that it was percent encoded and is now decoded
- * and will return TRUE
.
+ /**
+ * The check for allowed characters in a SearchPhrase
assumes that
+ * the whole phrase is already percent decoded.
+ * Hence, all characters are allowed besides the double quote ("
).
*
* @param character which is checked
* @return true if character is allowed for a phrase
*/
static boolean isAllowedPhrase(final char character) {
- // the '%' is allowed because it is assumed that it was percent encoded and is now decoded
- return isQCharUnescaped(character)
- || character == '%'
- || Character.isUnicodeIdentifierStart(character);
- }
-
- /**
- * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
- * @param character which is checked
- * @return true if character is allowed
- */
- private static boolean isQCharUnescaped(final char character) {
- return isUnreserved(character)
- || isOtherDelims(character)
- || character == ':'
- || character == '@'
- || character == '/'
- || character == '$'
- || character == '\''
- || character == '=';
- }
-
- /**
- * other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";"
- * @param character which is checked
- * @return true if character is allowed
- */
- private static boolean isOtherDelims(final char character) {
- return character == '!'
- || character == '('
- || character == ')'
- || character == '*'
- || character == '+'
- || character == ','
- || character == ';';
+ return Character.isUnicodeIdentifierStart(character)
+ || character != '"';
}
/**
@@ -212,7 +146,6 @@ public class SearchTokenizer {
return character == '!'
|| character == '*'
|| character == '+'
- || character == ','
|| character == ':'
|| character == '@'
|| character == '/'
@@ -234,7 +167,7 @@ public class SearchTokenizer {
|| character == '<'
|| character == '`';
}
-
+
/**
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* @param character which is checked
@@ -355,11 +288,11 @@ public class SearchTokenizer {
}
/**
- *
- * As per the updated abnf
+ *
+ * As per the updated abnf
* https://github.com/oasis-tcs/odata-abnf/blob/master/abnf/odata-abnf-construction-rules.txt#L332-L356.
* searchWord = 1*( ALPHA / DIGIT / COMMA / "." / "-" / pct-encoded )
- * This includes Unicode characters of categories
+ * This includes Unicode characters of categories
* L or N using UTF-8 and percent-encoding.
*/
private class SearchWordState extends LiteralState {
diff --git a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
index d8c6a7cc7..cea6d3012 100644
--- a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
+++ b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -107,6 +107,13 @@ public class SearchTokenizerTest {
assertQuery("abc or \"xyz\"").resultsIn(WORD, WORD, PHRASE);
}
+ @Test
+ public void parsePhrase_decoded() throws Exception {
+ assertQuery("\"a & b\"").resultsIn(PHRASE);
+ assertQuery("\" ! # $ % & ' ( ) * + , / : ; = ? @ [ ] \"").resultsIn(PHRASE);
+ assertQuery("\" - . < > ^ _ ` { | } ~ \"").resultsIn(PHRASE);
+ }
+
@Test
public void parseNot() throws Exception {
assertQuery("NOT").resultsIn(NOT);
@@ -401,4 +408,4 @@ public class SearchTokenizerTest {
}
}
}
-}
\ No newline at end of file
+}