diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
index 2146438a0..546135d31 100644
--- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
+++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
@@ -123,14 +123,7 @@ public class SearchTokenizer {
}
static boolean isAllowedWord(final char character) {
- // TODO mibo: add missing allowed characters
- int type = Character.getType(character);
- return (type == Character.LETTER_NUMBER
- || type == Character.LOWERCASE_LETTER
- || type == Character.MODIFIER_LETTER
- || type == Character.OTHER_LETTER
- || type == Character.TITLECASE_LETTER
- || type == Character.UPPERCASE_LETTER);
+ return Character.isUnicodeIdentifierStart(character);
}
/**
@@ -168,25 +161,17 @@ public class SearchTokenizer {
* return FALSE
.
* Furthermore percent encoded characters are also not validated (and can not be validated on
* a single character).
- * Hence for the %
character this method will return FALSE
.
+ * Hence for the %
character this method assumeS that it was percent encoded and is now decoded
+ * and will return TRUE
.
*
* @param character which is checked
* @return true if character is allowed for a phrase
*/
static boolean isAllowedPhrase(final char character) {
- // FIXME mibo: check missing
- return isQCharUnescaped(character);// || isEscaped(character);
+ // the '%' is allowed because it is assumed that it was percent encoded and is now decoded
+ return isQCharUnescaped(character) || character == '%';
}
-// /**
-// * escape = "\" / "%5C" ; reverse solidus U+005C
-// * @param character which is checked
-// * @return true if character is allowed
-// */
-// private static boolean isEscaped(char character) {
-// // TODO: mibo(151130): is checked in SearchPhraseState
-// return false;
-// }
/**
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
@@ -195,7 +180,6 @@ public class SearchTokenizer {
*/
private static boolean isQCharUnescaped(char character) {
return isUnreserved(character)
-// || isPctEncodedUnescaped(character)
|| isOtherDelims(character)
|| character == ':'
|| character == '@'
@@ -220,36 +204,6 @@ public class SearchTokenizer {
|| character == ';';
}
-// /**
-// * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
-// * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
-// * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
-// *
-// * HEXDIG = DIGIT / A-to-F
-// *
-// * @param character which is checked
-// * @return true if character is allowed
-// */
-// private static boolean isPctEncodedUnescaped(char character) {
-// String hex = Integer.toHexString(character);
-// char aschar[] = hex.toCharArray();
-// if(aschar[0] == '%') {
-// if(aschar[1] == '2') {
-// return aschar[2] != '2' && isHexDigit(aschar[2]);
-// } else if(aschar[1] == '5') {
-// return aschar[2] != 'C' && isHexDigit(aschar[2]);
-// } else if(isHexDigit(aschar[1])) {
-// return isHexDigit(aschar[2]);
-// }
-// }
-// return false;
-// }
-
-// private static boolean isHexDigit(char character) {
-// return 'A' <= character && character <= 'F' // case A..F
-// || '0' <= character && character <= '9'; // case 0..9
-// }
-
/**
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* @param character which is checked
diff --git a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
index 46c929061..61cd28bff 100644
--- a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
+++ b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
@@ -250,6 +250,8 @@ public class SearchTokenizerTest {
@Test
public void characterInPhrase() throws Exception {
assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE);
+ assertQuery("\"100%Olingo\"").resultsIn(new Validator.Tuple(PHRASE, "\"100%Olingo\""));
+ assertQuery("\"100'Olingo\"").resultsIn(new Validator.Tuple(PHRASE, "\"100'Olingo\""));
//escaped characters
assertQuery("\"\\\"123\" OR \"\\\\abc\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"123\""),
new Validator.Tuple(OR), new Validator.Tuple(PHRASE, "\"\\abc\""));
diff --git a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
index 3c0200370..27fa2c197 100644
--- a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
+++ b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
@@ -5462,6 +5462,10 @@ public class TestFullResourcePath {
testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)");
testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)");
+ // percent encoded characters
+ testUri.run("ESTwoKeyNav", "$search=%41%42%43");
+ testUri.run("ESTwoKeyNav", "$search=\"100%25\"");
+
// escaped characters
testUri.run("ESTwoKeyNav", "$search=\"abc\"");
testUri.run("ESTwoKeyNav", "$search=\"a\\\"bc\"");
@@ -5474,6 +5478,8 @@ public class TestFullResourcePath {
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
testUri.runEx("ESTwoKeyNav", "$search=%22a%5Cbc%22")
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
+ testUri.runEx("ESTwoKeyNav", "$search=not%27allowed")
+ .isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
}
@Test