[OLINGO-568] Search grammar rewritten

This commit is contained in:
Christian Holzer 2015-11-24 17:02:17 +01:00
parent 8674a1f299
commit 6dd0a0f3e5
6 changed files with 157 additions and 157 deletions

View File

@ -18,6 +18,9 @@
*/
package org.apache.olingo.server.core.uri.parser.search;
import java.util.Iterator;
import java.util.List;
import org.apache.olingo.server.api.uri.queryoption.SearchOption;
import org.apache.olingo.server.api.uri.queryoption.search.SearchBinaryOperatorKind;
import org.apache.olingo.server.api.uri.queryoption.search.SearchExpression;
@ -25,8 +28,15 @@ import org.apache.olingo.server.api.uri.queryoption.search.SearchTerm;
import org.apache.olingo.server.core.uri.parser.search.SearchQueryToken.Token;
import org.apache.olingo.server.core.uri.queryoption.SearchOptionImpl;
import java.util.Iterator;
import java.util.List;
/*
* Rewritten grammar
*
* SearchExpr ::= ExprOR
* ExprOR ::= ExprAnd ('OR' ExprAnd)*
* ExprAnd ::= Term ('AND'? Term)*
* Term ::= ('NOT')? (Word | Phrase)
* | '(' Expr ')'
*/
public class SearchParser {
@ -54,60 +64,58 @@ public class SearchParser {
if (token == null) {
throw new SearchParserException("No search String", SearchParserException.MessageKeys.NO_EXPRESSION_FOUND);
}
SearchExpression se = processSearchExpression(null);
SearchExpression searchExpression = processSearchExpression();
if(!isEof()) {
throw new SearchParserException("Token left after end of search query parsing.",
SearchParserException.MessageKeys.INVALID_END_OF_QUERY_TOKEN_LEFT, token.getToken().name());
SearchParserException.MessageKeys.INVALID_END_OF_QUERY, getTokenAsString());
}
return se;
return searchExpression;
}
private SearchExpression processSearchExpression(SearchExpression left) throws SearchParserException {
if (isEof()) {
return left;
private SearchExpression processSearchExpression() throws SearchParserException {
return processExprOr();
}
private SearchExpression processExprOr() throws SearchParserException {
SearchExpression left = processExprAnd();
while(isToken(Token.OR)) {
nextToken(); // Match OR
final SearchExpression right = processExprAnd();
left = new SearchBinaryImpl(left, SearchBinaryOperatorKind.OR, right);
}
if (left == null && (isToken(SearchQueryToken.Token.AND) || isToken(SearchQueryToken.Token.OR))) {
throw new SearchParserException(token.getToken() + " needs a left operand.",
SearchParserException.MessageKeys.INVALID_BINARY_OPERATOR_POSITION, token.getToken().toString());
}
return left;
}
SearchExpression expression = left;
if (isToken(SearchQueryToken.Token.OPEN)) {
processOpen();
expression = processSearchExpression(left);
if (expression == null) {
throw new SearchParserException("Brackets must contain an expression.",
SearchParserException.MessageKeys.NO_EXPRESSION_FOUND);
private SearchExpression processExprAnd() throws SearchParserException {
SearchExpression left = processTerm();
while(isToken(Token.AND) || isTerm()) {
if(isToken(Token.AND)) {
nextToken(); // Match AND
}
final SearchExpression right = processTerm();
left = new SearchBinaryImpl(left, SearchBinaryOperatorKind.AND, right);
}
return left;
}
private SearchExpression processTerm() throws SearchParserException {
if(isToken(SearchQueryToken.Token.OPEN)) {
nextToken(); // Match OPEN
final SearchExpression expr = processExprOr();
processClose();
} else if (isTerm()) {
expression = processTerm();
return expr;
} else {
// ('NOT')? (Word | Phrase)
if (isToken(SearchQueryToken.Token.NOT)) {
return processNot();
}
return processWordOrPhrase();
}
if (isToken(SearchQueryToken.Token.AND) || isToken(SearchQueryToken.Token.OPEN) || isTerm()) {
expression = processAnd(expression);
} else if (isToken(SearchQueryToken.Token.OR)) {
expression = processOr(expression);
} else if (isEof()) {
return expression;
}
return expression;
}
private boolean isTerm() {
return isToken(SearchQueryToken.Token.NOT)
|| isToken(SearchQueryToken.Token.PHRASE)
|| isToken(SearchQueryToken.Token.WORD);
}
private boolean isEof() {
return token == null;
}
private boolean isToken(SearchQueryToken.Token toCheckToken) {
return token != null && token.getToken() == toCheckToken;
}
private void processClose() throws SearchParserException {
@ -119,73 +127,15 @@ public class SearchParser {
}
}
private void processOpen() {
nextToken();
}
private SearchExpression processAnd(SearchExpression left) throws SearchParserException {
if (isToken(SearchQueryToken.Token.AND)) {
nextToken();
}
SearchExpression se = left;
if (isTerm()) {
se = processTerm();
if(isTerm()) {
se = processAnd(se);
}
se = new SearchBinaryImpl(left, SearchBinaryOperatorKind.AND, se);
return processSearchExpression(se);
} else {
if (isToken(SearchQueryToken.Token.AND) || isToken(SearchQueryToken.Token.OR)) {
throw new SearchParserException("Operators must not be followed by an AND or an OR",
SearchParserException.MessageKeys.INVALID_OPERATOR_AFTER_AND, token.getToken().name());
} else if(isEof()) {
throw new SearchParserException("Missing search expression after AND (found end of search query)",
SearchParserException.MessageKeys.INVALID_END_OF_QUERY, Token.AND.name());
}
se = processSearchExpression(se);
return new SearchBinaryImpl(left, SearchBinaryOperatorKind.AND, se);
}
}
public SearchExpression processOr(SearchExpression left) throws SearchParserException {
if (isToken(SearchQueryToken.Token.OR)) {
nextToken();
}
if(isEof()) {
throw new SearchParserException("Missing search expression after OR (found end of search query)",
SearchParserException.MessageKeys.INVALID_END_OF_QUERY, Token.OR.name());
}
SearchExpression se = processSearchExpression(left);
return new SearchBinaryImpl(left, SearchBinaryOperatorKind.OR, se);
}
private SearchExpression processNot() throws SearchParserException {
nextToken();
if (isToken(Token.WORD) || isToken(Token.PHRASE)) {
return new SearchUnaryImpl(processWordOrPhrase());
}
if(isEof()) {
throw new SearchParserException("NOT must be followed by a term.",
SearchParserException.MessageKeys.INVALID_NOT_OPERAND, "EOF");
}
throw new SearchParserException("NOT must be followed by a term not a " + token.getToken(),
SearchParserException.MessageKeys.INVALID_NOT_OPERAND, token.getToken().toString());
}
private void nextToken() {
if (tokens.hasNext()) {
token = tokens.next();
} else {
token = null;
}
}
private SearchExpression processTerm() throws SearchParserException {
if (isToken(SearchQueryToken.Token.NOT)) {
return processNot();
}
return processWordOrPhrase();
final String tokenAsString = getTokenAsString();
throw new SearchParserException("NOT must be followed by a term not a " + tokenAsString,
SearchParserException.MessageKeys.INVALID_NOT_OPERAND, tokenAsString);
}
private SearchTerm processWordOrPhrase() throws SearchParserException {
@ -194,9 +144,11 @@ public class SearchParser {
} else if (isToken(Token.WORD)) {
return processWord();
}
throw new SearchParserException("Expected PHRASE||WORD found: " + token.getToken(),
final String tokenName = getTokenAsString();
throw new SearchParserException("Expected PHRASE||WORD found: " + tokenName,
SearchParserException.MessageKeys.EXPECTED_DIFFERENT_TOKEN,
Token.PHRASE.name() + "" + Token.WORD.name(), token.getToken().toString());
Token.PHRASE.name() + "" + Token.WORD.name(), tokenName);
}
private SearchTerm processWord() {
@ -210,4 +162,31 @@ public class SearchParser {
nextToken();
return new SearchTermImpl(literal.substring(1,literal.length()-1));
}
private boolean isTerm() {
return isToken(SearchQueryToken.Token.NOT)
|| isToken(SearchQueryToken.Token.PHRASE)
|| isToken(SearchQueryToken.Token.WORD)
|| isToken(SearchQueryToken.Token.OPEN);
}
private boolean isEof() {
return token == null;
}
private boolean isToken(SearchQueryToken.Token toCheckToken) {
return token != null && token.getToken() == toCheckToken;
}
private void nextToken() {
if (tokens.hasNext()) {
token = tokens.next();
} else {
token = null;
}
}
private String getTokenAsString() {
return token == null ? "<EOF>" : token.getToken().name();
}
}

View File

@ -28,22 +28,14 @@ public class SearchParserException extends UriParserSyntaxException {
NO_EXPRESSION_FOUND,
/** parameter: message */
TOKENIZER_EXCEPTION,
/** parameter: tokenCharacter */
INVALID_TOKEN_CHARACTER_FOUND,
/** parameter: operatorkind */
INVALID_BINARY_OPERATOR_POSITION,
/** parameter: operatorkind */
INVALID_NOT_OPERAND,
/** parameters: - */
MISSING_CLOSE,
/** parameters: expectedToken actualToken */
EXPECTED_DIFFERENT_TOKEN,
/** parameters: actualToken */
INVALID_END_OF_QUERY,
/** parameters: left_over_token */
INVALID_END_OF_QUERY_TOKEN_LEFT,
/** parameter: operatorkind */
INVALID_OPERATOR_AFTER_AND;
/** parameter: actual token */
INVALID_END_OF_QUERY;
@Override
public String getKey() {

View File

@ -37,16 +37,12 @@ UriParserSyntaxException.SYSTEM_QUERY_OPTION_LEVELS_NOT_ALLOWED_HERE=The system
UriParserSyntaxException.SYNTAX=The URI is malformed.
UriParserSyntaxException.DUPLICATED_ALIAS=Duplicated alias. An alias '%1$s' was already specified!.
SearchParserException.TOKENIZER_EXCEPTION=Exception during tokenizer creation with message '%1$s'.
SearchParserException.INVALID_TOKEN_CHARACTER_FOUND=Invalid token character with value '%1$s' found.
SearchParserException.INVALID_BINARY_OPERATOR_POSITION=Invalid binary operator position for kind '%1$s' found.
SearchParserException.INVALID_NOT_OPERAND=Invalid not operand for kind '%1$s' found.
SearchParserException.EXPECTED_DIFFERENT_TOKEN=Expected token '%1$s' but found '%2$s'.
SearchParserException.NO_EXPRESSION_FOUND=No expression found.
SearchParserException.INVALID_OPERATOR_AFTER_AND=Invalid operator after AND found of kind '%1$s'.
SearchParserException.INVALID_END_OF_QUERY=Invalid end of search query after '%1$s' (query must end with a search phrase or word).
SearchParserException.INVALID_END_OF_QUERY_TOKEN_LEFT=Invalid end of search query. Found not processed token '%1$s' at the end.
SearchParserException.TOKENIZER_EXCEPTION=Exception during tokenizer creation with message '%1$s'.
SearchParserException.INVALID_NOT_OPERAND=Invalid not operand for kind '%1$s' found.
SearchParserException.MISSING_CLOSE=Missing closing bracket after an opening bracket.
SearchParserException.EXPECTED_DIFFERENT_TOKEN=Expected token '%1$s' but found '%2$s'.
SearchParserException.INVALID_END_OF_QUERY=Invalid end of search query after '%1$s' (query must end with a search phrase or word).
SearchTokenizerException.FORBIDDEN_CHARACTER=Not allowed character '%1$s' found for token '%2$s'.
SearchTokenizerException.NOT_EXPECTED_TOKEN=Not expected token '%1$s' found.

View File

@ -25,6 +25,8 @@ import org.junit.Test;
public class SearchParserAndTokenizerTest {
private static final String EOF = "<EOF>";
@Test
public void basicParsing() throws Exception {
assertQuery("\"99\"").resultsIn("'99'");
@ -32,7 +34,12 @@ public class SearchParserAndTokenizerTest {
assertQuery("a AND b").resultsIn("{'a' AND 'b'}");
assertQuery("a AND b AND c").resultsIn("{{'a' AND 'b'} AND 'c'}");
assertQuery("a OR b").resultsIn("{'a' OR 'b'}");
assertQuery("a OR b OR c").resultsIn("{'a' OR {'b' OR 'c'}}");
assertQuery("a OR b OR c").resultsIn("{{'a' OR 'b'} OR 'c'}");
assertQuery("NOT a NOT b").resultsIn("{{NOT 'a'} AND {NOT 'b'}}");
assertQuery("NOT a AND NOT b").resultsIn("{{NOT 'a'} AND {NOT 'b'}}");
assertQuery("NOT a OR NOT b").resultsIn("{{NOT 'a'} OR {NOT 'b'}}");
assertQuery("NOT a OR NOT b NOT C").resultsIn("{{NOT 'a'} OR {{NOT 'b'} AND {NOT 'C'}}}");
}
@Test
@ -51,34 +58,44 @@ public class SearchParserAndTokenizerTest {
public void parenthesesParsing() throws Exception {
assertQuery("a AND (b OR c)").resultsIn("{'a' AND {'b' OR 'c'}}");
assertQuery("(a OR b) AND NOT c").resultsIn("{{'a' OR 'b'} AND {NOT 'c'}}");
assertQuery("(a OR B) AND (c OR d AND NOT e OR (f))")
.resultsIn("{{'a' OR 'B'} AND {{'c' OR {'d' AND {NOT 'e'}}} OR 'f'}}");
assertQuery("(a OR B) (c OR d NOT e OR (f))")
.resultsIn("{{'a' OR 'B'} AND {{'c' OR {'d' AND {NOT 'e'}}} OR 'f'}}");
assertQuery("((((a))))").resultsIn("'a'");
assertQuery("((((a)))) ((((a))))").resultsIn("{'a' AND 'a'}");
assertQuery("((((a)))) OR ((((a))))").resultsIn("{'a' OR 'a'}");
assertQuery("((((((a)))) ((((c))) OR (((C)))) ((((a))))))").resultsIn("{{'a' AND {'c' OR 'C'}} AND 'a'}");
assertQuery("((((\"a\")))) OR ((((\"a\"))))").resultsIn("{'a' OR 'a'}");
}
@Test
public void parseImplicitAnd() throws Exception {
assertQuery("a b").resultsIn("{'a' AND 'b'}");
assertQuery("a b c").resultsIn("{'a' AND {'b' AND 'c'}}");
assertQuery("a and b").resultsIn("{'a' AND {'and' AND 'b'}}");
assertQuery("a b c").resultsIn("{{'a' AND 'b'} AND 'c'}");
assertQuery("a and b").resultsIn("{{'a' AND 'and'} AND 'b'}");
assertQuery("hey ANDy warhol").resultsIn("{{'hey' AND 'ANDy'} AND 'warhol'}");
assertQuery("a b OR c").resultsIn("{{'a' AND 'b'} OR 'c'}");
assertQuery("a \"bc123\" OR c").resultsIn("{{'a' AND 'bc123'} OR 'c'}");
assertQuery("(a OR x) bc c").resultsIn("{{'a' OR 'x'} AND {'bc' AND 'c'}}");
assertQuery("one ((a OR x) bc c)").resultsIn("{'one' AND {{'a' OR 'x'} AND {'bc' AND 'c'}}}");
assertQuery("(a OR x) bc c").resultsIn("{{{'a' OR 'x'} AND 'bc'} AND 'c'}");
assertQuery("one ((a OR x) bc c)").resultsIn("{'one' AND {{{'a' OR 'x'} AND 'bc'} AND 'c'}}");
}
@Test
public void invalidSearchQuery() throws Exception {
assertQuery("99").resultsIn(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
assertQuery("NOT").resultsIn(SearchParserException.MessageKeys.INVALID_NOT_OPERAND);
assertQuery("AND").resultsIn(SearchParserException.MessageKeys.INVALID_BINARY_OPERATOR_POSITION);
assertQuery("OR").resultsIn(SearchParserException.MessageKeys.INVALID_BINARY_OPERATOR_POSITION);
assertQuery("AND").resultsInExpectedTerm(SearchQueryToken.Token.AND.name());
assertQuery("OR").resultsInExpectedTerm(SearchQueryToken.Token.OR.name());
assertQuery("NOT a AND").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY);
assertQuery("NOT a OR").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY);
assertQuery("a AND").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY);
assertQuery("a OR").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY);
assertQuery("NOT a AND").resultsInExpectedTerm(EOF);
assertQuery("NOT a OR").resultsInExpectedTerm(EOF);
assertQuery("a AND").resultsInExpectedTerm(EOF);
assertQuery("a OR").resultsInExpectedTerm(EOF);
assertQuery("a OR b)").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY_TOKEN_LEFT);
assertQuery("a NOT b)").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY_TOKEN_LEFT);
assertQuery("a AND b)").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY_TOKEN_LEFT);
assertQuery("a OR b)").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY);
assertQuery("a NOT b)").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY);
assertQuery("a AND b)").resultsIn(SearchParserException.MessageKeys.INVALID_END_OF_QUERY);
assertQuery("(a OR b").resultsIn(SearchParserException.MessageKeys.MISSING_CLOSE);
assertQuery("(a NOT b").resultsIn(SearchParserException.MessageKeys.MISSING_CLOSE);
@ -86,6 +103,9 @@ public class SearchParserAndTokenizerTest {
assertQuery("((a AND b OR c)").resultsIn(SearchParserException.MessageKeys.MISSING_CLOSE);
assertQuery("a AND (b OR c").resultsIn(SearchParserException.MessageKeys.MISSING_CLOSE);
assertQuery("(a AND ((b OR c)").resultsIn(SearchParserException.MessageKeys.MISSING_CLOSE);
assertQuery("NOT NOT a").resultsIn(SearchParserException.MessageKeys.INVALID_NOT_OPERAND);
assertQuery("NOT (a)").resultsIn(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
}
private static Validator assertQuery(String searchQuery) {
@ -126,6 +146,15 @@ public class SearchParserAndTokenizerTest {
Assert.fail("SearchParserException with message key " + key.getKey() + " was not thrown.");
}
public void resultsInExpectedTerm(final String actualToken) throws SearchTokenizerException {
try {
resultsIn(searchQuery);
} catch(SearchParserException e) {
Assert.assertEquals(SearchParserException.MessageKeys.EXPECTED_DIFFERENT_TOKEN, e.getMessageKey());
Assert.assertEquals("Expected PHRASE||WORD found: " + actualToken, e.getMessage());
}
}
private void resultsIn(String expectedSearchExpression) throws SearchTokenizerException, SearchParserException {
final SearchExpression searchExpression = getSearchExpression();
Assert.assertEquals(expectedSearchExpression, searchExpression.toString());

View File

@ -184,31 +184,36 @@ public class SearchParserTest extends SearchParser {
@Test
public void doubleAnd() throws Exception {
runEx(SearchParserException.MessageKeys.INVALID_OPERATOR_AFTER_AND, Token.WORD, Token.AND, Token.AND, Token.WORD);
runEx(SearchParserException.MessageKeys.EXPECTED_DIFFERENT_TOKEN, Token.WORD, Token.AND, Token.AND, Token.WORD);
}
@Test
public void invalidQueryEnds() {
runEx(MessageKeys.INVALID_END_OF_QUERY, Token.WORD, Token.AND);
runEx(MessageKeys.INVALID_END_OF_QUERY, Token.WORD, Token.OR);
runEx(MessageKeys.INVALID_END_OF_QUERY, Token.NOT, Token.WORD, Token.OR);
runEx(MessageKeys.INVALID_END_OF_QUERY, Token.NOT, Token.WORD, Token.AND);
runEx(MessageKeys.INVALID_END_OF_QUERY_TOKEN_LEFT, Token.WORD, Token.AND, Token.WORD, Token.CLOSE);
runEx(MessageKeys.EXPECTED_DIFFERENT_TOKEN, Token.WORD, Token.AND);
runEx(MessageKeys.EXPECTED_DIFFERENT_TOKEN, Token.WORD, Token.OR);
runEx(MessageKeys.EXPECTED_DIFFERENT_TOKEN, Token.NOT, Token.WORD, Token.OR);
runEx(MessageKeys.EXPECTED_DIFFERENT_TOKEN, Token.NOT, Token.WORD, Token.AND);
runEx(MessageKeys.INVALID_END_OF_QUERY, Token.WORD, Token.AND, Token.WORD, Token.CLOSE);
}
@Test
public void invalidQueryStarts() throws Exception {
run(Token.WORD, Token.AND, Token.WORD, Token.AND, Token.WORD);
}
@Test
public void singleAnd() {
runEx(SearchParserException.MessageKeys.INVALID_BINARY_OPERATOR_POSITION, Token.AND);
runEx(SearchParserException.MessageKeys.EXPECTED_DIFFERENT_TOKEN, Token.AND);
}
@Test
public void singleOpenBracket() {
runEx(SearchParserException.MessageKeys.NO_EXPRESSION_FOUND, Token.OPEN);
runEx(SearchParserException.MessageKeys.EXPECTED_DIFFERENT_TOKEN, Token.OPEN);
}
@Test
public void emptyBrackets() {
runEx(SearchParserException.MessageKeys.NO_EXPRESSION_FOUND, Token.OPEN, Token.CLOSE);
runEx(SearchParserException.MessageKeys.EXPECTED_DIFFERENT_TOKEN, Token.OPEN, Token.CLOSE);
}
@Test

View File

@ -368,7 +368,6 @@ public class SearchTokenizerTest {
this.searchQuery = searchQuery;
}
@SuppressWarnings("unused")
private Validator enableLogging() {
log = true;
return this;