diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java index 2146438a0..546135d31 100644 --- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java +++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java @@ -123,14 +123,7 @@ public class SearchTokenizer { } static boolean isAllowedWord(final char character) { - // TODO mibo: add missing allowed characters - int type = Character.getType(character); - return (type == Character.LETTER_NUMBER - || type == Character.LOWERCASE_LETTER - || type == Character.MODIFIER_LETTER - || type == Character.OTHER_LETTER - || type == Character.TITLECASE_LETTER - || type == Character.UPPERCASE_LETTER); + return Character.isUnicodeIdentifierStart(character); } /** @@ -168,25 +161,17 @@ public class SearchTokenizer { * return FALSE.
* Furthermore percent encoded characters are also not validated (and can not be validated on * a single character).
- * Hence for the % character this method will return FALSE.
+ * Hence for the % character this method assumeS that it was percent encoded and is now decoded + * and will return TRUE.
* * @param character which is checked * @return true if character is allowed for a phrase */ static boolean isAllowedPhrase(final char character) { - // FIXME mibo: check missing - return isQCharUnescaped(character);// || isEscaped(character); + // the '%' is allowed because it is assumed that it was percent encoded and is now decoded + return isQCharUnescaped(character) || character == '%'; } -// /** -// * escape = "\" / "%5C" ; reverse solidus U+005C -// * @param character which is checked -// * @return true if character is allowed -// */ -// private static boolean isEscaped(char character) { -// // TODO: mibo(151130): is checked in SearchPhraseState -// return false; -// } /** * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "=" @@ -195,7 +180,6 @@ public class SearchTokenizer { */ private static boolean isQCharUnescaped(char character) { return isUnreserved(character) -// || isPctEncodedUnescaped(character) || isOtherDelims(character) || character == ':' || character == '@' @@ -220,36 +204,6 @@ public class SearchTokenizer { || character == ';'; } -// /** -// * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG -// * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F ) -// * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" ) -// * -// * HEXDIG = DIGIT / A-to-F -// * -// * @param character which is checked -// * @return true if character is allowed -// */ -// private static boolean isPctEncodedUnescaped(char character) { -// String hex = Integer.toHexString(character); -// char aschar[] = hex.toCharArray(); -// if(aschar[0] == '%') { -// if(aschar[1] == '2') { -// return aschar[2] != '2' && isHexDigit(aschar[2]); -// } else if(aschar[1] == '5') { -// return aschar[2] != 'C' && isHexDigit(aschar[2]); -// } else if(isHexDigit(aschar[1])) { -// return isHexDigit(aschar[2]); -// } -// } -// return false; -// } - -// private static boolean isHexDigit(char character) { -// return 'A' <= character && character <= 'F' // case A..F -// || '0' <= character && character <= '9'; // case 0..9 -// } - /** * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * @param character which is checked diff --git a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java index 46c929061..61cd28bff 100644 --- a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java +++ b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java @@ -250,6 +250,8 @@ public class SearchTokenizerTest { @Test public void characterInPhrase() throws Exception { assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE); + assertQuery("\"100%Olingo\"").resultsIn(new Validator.Tuple(PHRASE, "\"100%Olingo\"")); + assertQuery("\"100'Olingo\"").resultsIn(new Validator.Tuple(PHRASE, "\"100'Olingo\"")); //escaped characters assertQuery("\"\\\"123\" OR \"\\\\abc\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"123\""), new Validator.Tuple(OR), new Validator.Tuple(PHRASE, "\"\\abc\"")); diff --git a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java index 3c0200370..27fa2c197 100644 --- a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java +++ b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java @@ -5462,6 +5462,10 @@ public class TestFullResourcePath { testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)"); testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)"); + // percent encoded characters + testUri.run("ESTwoKeyNav", "$search=%41%42%43"); + testUri.run("ESTwoKeyNav", "$search=\"100%25\""); + // escaped characters testUri.run("ESTwoKeyNav", "$search=\"abc\""); testUri.run("ESTwoKeyNav", "$search=\"a\\\"bc\""); @@ -5474,6 +5478,8 @@ public class TestFullResourcePath { .isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION); testUri.runEx("ESTwoKeyNav", "$search=%22a%5Cbc%22") .isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION); + testUri.runEx("ESTwoKeyNav", "$search=not%27allowed") + .isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION); } @Test