[OLINGO-568] Added SearchTokenizerException

This commit is contained in:
mibo 2015-11-10 20:10:02 +01:00
parent f64abe136b
commit 3eef0bf605
4 changed files with 82 additions and 43 deletions

View File

@ -27,7 +27,11 @@ public class SearchParser {
public SearchOption parse(String path, String value) { public SearchOption parse(String path, String value) {
SearchTokenizer tokenizer = new SearchTokenizer(); SearchTokenizer tokenizer = new SearchTokenizer();
List<SearchQueryToken> tokens = tokenizer.tokenize(value); try {
List<SearchQueryToken> tokens = tokenizer.tokenize(value);
} catch (SearchTokenizerException e) {
return null;
}
return new SearchOptionImpl(); return new SearchOptionImpl();
} }
} }

View File

@ -58,14 +58,14 @@ public class SearchTokenizer {
token = t; token = t;
} }
protected abstract State nextChar(char c); protected abstract State nextChar(char c) throws SearchTokenizerException;
public State allowed(char c) { public State allowed(char c) {
return this; return this;
} }
public State forbidden(char c) { public State forbidden(char c) throws SearchTokenizerException {
throw new IllegalStateException(this.getClass().getName() + "->" + c); throw new SearchTokenizerException("Forbidden character for " + this.getClass().getName() + "->" + c);
} }
public State finish() { public State finish() {
@ -105,7 +105,7 @@ public class SearchTokenizer {
* @return true if character is allowed for a phrase * @return true if character is allowed for a phrase
*/ */
static boolean isAllowedPhrase(final char character) { static boolean isAllowedPhrase(final char character) {
// FIXME mibo: check missing and '\'' // FIXME mibo: check missing
return isAllowedChar(character) return isAllowedChar(character)
|| character == '-' || character == '-'
|| character == '.' || character == '.'
@ -115,6 +115,7 @@ public class SearchTokenizer {
|| character == '@' || character == '@'
|| character == '/' || character == '/'
|| character == '$' || character == '$'
|| character == '\''
|| character == '='; || character == '=';
} }
@ -142,7 +143,7 @@ public class SearchTokenizer {
public LiteralState(Token t) { public LiteralState(Token t) {
super(t); super(t);
} }
public LiteralState(Token t, char c) { public LiteralState(Token t, char c) throws SearchTokenizerException {
super(t); super(t);
init(c); init(c);
} }
@ -159,9 +160,9 @@ public class SearchTokenizer {
return literal.toString(); return literal.toString();
} }
public State init(char c) { public State init(char c) throws SearchTokenizerException {
if(isFinished()) { if(isFinished()) {
throw new IllegalStateException(toString() + " is already finished."); throw new SearchTokenizerException(toString() + " is already finished.");
} }
literal.append(c); literal.append(c);
return this; return this;
@ -176,7 +177,7 @@ public class SearchTokenizer {
super(null, initLiteral); super(null, initLiteral);
} }
@Override @Override
public State nextChar(char c) { public State nextChar(char c) throws SearchTokenizerException {
if (c == CHAR_OPEN) { if (c == CHAR_OPEN) {
return new OpenState(); return new OpenState();
} else if (isWhitespace(c)) { } else if (isWhitespace(c)) {
@ -189,7 +190,7 @@ public class SearchTokenizer {
} }
@Override @Override
public State init(char c) { public State init(char c) throws SearchTokenizerException {
return nextChar(c); return nextChar(c);
} }
} }
@ -199,7 +200,7 @@ public class SearchTokenizer {
super(Token.TERM); super(Token.TERM);
} }
@Override @Override
public State nextChar(char c) { public State nextChar(char c) throws SearchTokenizerException {
if(c == CHAR_N) { if(c == CHAR_N) {
return new NotState(c); return new NotState(c);
} else if (c == QUOTATION_MARK) { } else if (c == QUOTATION_MARK) {
@ -207,16 +208,16 @@ public class SearchTokenizer {
} else if (isAllowedChar(c)) { } else if (isAllowedChar(c)) {
return new SearchWordState(c); return new SearchWordState(c);
} }
throw new IllegalStateException(this.getClass().getName() + "->" + c); return forbidden(c);
} }
@Override @Override
public State init(char c) { public State init(char c) throws SearchTokenizerException {
return nextChar(c); return nextChar(c);
} }
} }
private class SearchWordState extends LiteralState { private class SearchWordState extends LiteralState {
public SearchWordState(char c) { public SearchWordState(char c) throws SearchTokenizerException {
super(Token.WORD, c); super(Token.WORD, c);
} }
public SearchWordState(State toConsume) { public SearchWordState(State toConsume) {
@ -224,7 +225,7 @@ public class SearchTokenizer {
} }
@Override @Override
public State nextChar(char c) { public State nextChar(char c) throws SearchTokenizerException {
if (isAllowedChar(c)) { if (isAllowedChar(c)) {
return allowed(c); return allowed(c);
} else if (c == CHAR_CLOSE) { } else if (c == CHAR_CLOSE) {
@ -244,7 +245,7 @@ public class SearchTokenizer {
} }
private class SearchPhraseState extends LiteralState { private class SearchPhraseState extends LiteralState {
public SearchPhraseState(char c) { public SearchPhraseState(char c) throws SearchTokenizerException {
super(Token.PHRASE, c); super(Token.PHRASE, c);
if(c != QUOTATION_MARK) { if(c != QUOTATION_MARK) {
forbidden(c); forbidden(c);
@ -252,19 +253,17 @@ public class SearchTokenizer {
} }
@Override @Override
public State nextChar(char c) { public State nextChar(char c) throws SearchTokenizerException {
if(isFinished()) { if (isAllowedPhrase(c)) {
return new SearchExpressionState().init(c); return allowed(c);
} else if (isAllowedPhrase(c)) { } else if (isWhitespace(c)) {
return allowed(c); return allowed(c);
} else if (c == QUOTATION_MARK) { } else if (c == QUOTATION_MARK) {
finish(); finish();
allowed(c); allowed(c);
return new SearchExpressionState(); return new SearchExpressionState();
} else if (isWhitespace(c)) { } else if(isFinished()) {
return allowed(c); return new SearchExpressionState().init(c);
} else if (c == CHAR_CLOSE) {
return allowed(c);
} }
return forbidden(c); return forbidden(c);
} }
@ -276,7 +275,7 @@ public class SearchTokenizer {
finish(); finish();
} }
@Override @Override
public State nextChar(char c) { public State nextChar(char c) throws SearchTokenizerException {
finish(); finish();
if (isWhitespace(c)) { if (isWhitespace(c)) {
return forbidden(c); return forbidden(c);
@ -292,13 +291,13 @@ public class SearchTokenizer {
} }
@Override @Override
public State nextChar(char c) { public State nextChar(char c) throws SearchTokenizerException {
return new SearchExpressionState().init(c); return new SearchExpressionState().init(c);
} }
} }
private class NotState extends LiteralState { private class NotState extends LiteralState {
public NotState(char c) { public NotState(char c) throws SearchTokenizerException {
super(Token.NOT, c); super(Token.NOT, c);
if(c != CHAR_N) { if(c != CHAR_N) {
forbidden(c); forbidden(c);
@ -306,11 +305,11 @@ public class SearchTokenizer {
} }
@Override @Override
public State nextChar(char c) { public State nextChar(char c) {
if (getLiteral().length() == 1 && c == CHAR_O) { if (literal.length() == 1 && c == CHAR_O) {
return allowed(c); return allowed(c);
} else if (getLiteral().length() == 2 && c == CHAR_T) { } else if (literal.length() == 2 && c == CHAR_T) {
return allowed(c); return allowed(c);
} else if(getLiteral().length() == 3 && isWhitespace(c)) { } else if(literal.length() == 3 && isWhitespace(c)) {
finish(); finish();
return new BeforeSearchExpressionRwsState(); return new BeforeSearchExpressionRwsState();
} else { } else {
@ -326,7 +325,7 @@ public class SearchTokenizer {
super(null); super(null);
} }
@Override @Override
public State nextChar(char c) { public State nextChar(char c) throws SearchTokenizerException {
if (isWhitespace(c)) { if (isWhitespace(c)) {
return allowed(c); return allowed(c);
} else { } else {
@ -342,7 +341,7 @@ public class SearchTokenizer {
super(null); super(null);
} }
@Override @Override
public State nextChar(char c) { public State nextChar(char c) throws SearchTokenizerException {
if (!noneRws && isWhitespace(c)) { if (!noneRws && isWhitespace(c)) {
return allowed(c); return allowed(c);
} else if (c == CHAR_O) { } else if (c == CHAR_O) {
@ -374,7 +373,7 @@ public class SearchTokenizer {
} }
// TODO (mibo): add (new) parse exception // TODO (mibo): add (new) parse exception
public List<SearchQueryToken> tokenize(String searchQuery) { public List<SearchQueryToken> tokenize(String searchQuery) throws SearchTokenizerException {
char[] chars = searchQuery.toCharArray(); char[] chars = searchQuery.toCharArray();
State state = new SearchExpressionState(); State state = new SearchExpressionState();

View File

@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.olingo.server.core.uri.parser.search;
public class SearchTokenizerException extends Exception {
public SearchTokenizerException(String message) {
super(message);
}
}

View File

@ -150,7 +150,7 @@ public class SearchTokenizerTest {
} }
@Test @Test
public void parseImplicitAnd() { public void parseImplicitAnd() throws SearchTokenizerException {
SearchValidator.init("a b").addExpected(WORD, AND, WORD).validate(); SearchValidator.init("a b").addExpected(WORD, AND, WORD).validate();
SearchValidator.init("a b OR c").addExpected(WORD, AND, WORD, OR, WORD).validate(); SearchValidator.init("a b OR c").addExpected(WORD, AND, WORD, OR, WORD).validate();
SearchValidator.init("a bc OR c").addExpected(WORD, AND, WORD, OR, WORD).validate(); SearchValidator.init("a bc OR c").addExpected(WORD, AND, WORD, OR, WORD).validate();
@ -305,7 +305,7 @@ public class SearchTokenizerTest {
} }
@Test @Test
public void moreMixedTests() { public void moreMixedTests() throws SearchTokenizerException {
validate("abc"); validate("abc");
validate("NOT abc"); validate("NOT abc");
@ -340,20 +340,23 @@ public class SearchTokenizerTest {
} }
@Test @Test
public void parseInvalid() { public void parseInvalid() throws SearchTokenizerException {
SearchValidator.init("abc AND OR something").validate(); SearchValidator.init("abc AND OR something").validate();
SearchValidator.init("abc AND \"something\" )").validate();
//
SearchValidator.init("( abc AND) OR something").validate(SearchTokenizerException.class);
} }
public boolean validate(String query) { public void validate(String query) throws SearchTokenizerException {
return new SearchValidator(query).validate(); new SearchValidator(query).validate();
} }
public boolean validate(String query, SearchQueryToken.Token ... tokens) { public void validate(String query, SearchQueryToken.Token ... tokens) throws SearchTokenizerException {
SearchValidator sv = new SearchValidator(query); SearchValidator sv = new SearchValidator(query);
for (SearchQueryToken.Token token : tokens) { for (SearchQueryToken.Token token : tokens) {
sv.addExpected(token); sv.addExpected(token);
} }
return sv.validate(); sv.validate();
} }
private static class SearchValidator { private static class SearchValidator {
@ -393,7 +396,17 @@ public class SearchTokenizerTest {
} }
return this; return this;
} }
private boolean validate() { private void validate(Class<? extends Exception> exception) throws SearchTokenizerException {
try {
new SearchTokenizer().tokenize(searchQuery);
} catch (Exception e) {
Assert.assertEquals(exception, e.getClass());
return;
}
Assert.fail("Expected exception " + exception.getClass().getSimpleName() + " was not thrown.");
}
private void validate() throws SearchTokenizerException {
SearchTokenizer tokenizer = new SearchTokenizer(); SearchTokenizer tokenizer = new SearchTokenizer();
List<SearchQueryToken> result = tokenizer.tokenize(searchQuery); List<SearchQueryToken> result = tokenizer.tokenize(searchQuery);
Assert.assertNotNull(result); Assert.assertNotNull(result);
@ -412,8 +425,6 @@ public class SearchTokenizerTest {
} }
} }
} }
return true;
} }
} }