From a9eac6cb0a46bd1b8e252f5afd9e17260c27c4eb Mon Sep 17 00:00:00 2001 From: ramya vasanth Date: Wed, 8 May 2019 11:03:32 +0530 Subject: [PATCH] [OLINGO-1356] not supporting alpha numeric characters --- .../uri/parser/search/SearchTokenizer.java | 19 +++++++++++++++++-- .../parser/search/SearchTokenizerTest.java | 11 ++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java index 5d7c94824..42c0c2a10 100644 --- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java +++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java @@ -53,6 +53,9 @@ public class SearchTokenizer { protected static final char CHAR_R = 'R'; protected static final char CHAR_CLOSE = ')'; protected static final char CHAR_OPEN = '('; + protected static final char CHAR_COMMA = ','; + protected static final char CHAR_DOT = '.'; + protected static final char CHAR_HYPEN = '-'; public State() {} @@ -120,7 +123,7 @@ public class SearchTokenizer { static boolean isAllowedWord(final char character) { return Character.isUnicodeIdentifierStart(character); } - + /** * * searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark @@ -317,6 +320,14 @@ public class SearchTokenizer { } } + /** + * + * As per the updated abnf + * https://github.com/oasis-tcs/odata-abnf/blob/master/abnf/odata-abnf-construction-rules.txt#L332-L356. + * searchWord = 1*( ALPHA / DIGIT / COMMA / "." / "-" / pct-encoded ) + * This includes Unicode characters of categories + * L or N using UTF-8 and percent-encoding. + */ private class SearchWordState extends LiteralState { public SearchWordState(final char c) throws SearchTokenizerException { super(Token.WORD, c); @@ -336,7 +347,11 @@ public class SearchTokenizer { @Override public State nextChar(final char c) throws SearchTokenizerException { - if (isAllowedWord(c)) { + if (isAllowedWord(c) || + ('0' <= c && c <= '9') || + (c == CHAR_COMMA) || + (c == CHAR_DOT) || + (c == CHAR_HYPEN)) { return allowed(c); } else if (c == CHAR_CLOSE) { finish(); diff --git a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java index 0448ca8d7..23866e375 100644 --- a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java +++ b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java @@ -53,8 +53,17 @@ public class SearchTokenizerTest { assertQuery("A").resultsIn(word("A")); assertQuery("AN").resultsIn(word("AN")); assertQuery("O").resultsIn(word("O")); + assertQuery("notAw0rd").resultsIn(word("notAw0rd")); + assertQuery("not,").resultsIn(word("not,")); + assertQuery("not.").resultsIn(word("not.")); + assertQuery("B-B").resultsIn(word("B-B")); + assertQuery("Dž").resultsIn(word("Dž")); // invalid - assertQuery("notAw0rd").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER); + assertQuery("%2F").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER); + assertQuery("%3A").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER); + assertQuery("not%5B").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER); + assertQuery("not%7B").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER); + assertQuery("not%6A").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER); } private Validator.Tuple word(final String literal) {