[OLINGO-568] Added support for escape of escape and quote characters

This commit is contained in:
Michael Bolz 2015-11-30 15:15:00 +01:00
parent 6dd0a0f3e5
commit e5ac590794
4 changed files with 134 additions and 78 deletions

View File

@ -37,6 +37,13 @@ import java.util.List;
* searchWord = 1*ALPHA ; Actually: any character from the Unicode categories L or Nl, * searchWord = 1*ALPHA ; Actually: any character from the Unicode categories L or Nl,
* ; but not the words AND, OR, and NOT * ; but not the words AND, OR, and NOT
* </code> * </code>
*
* <b>ATTENTION:</b> For a <code>searchPhrase</code> the percent encoding is not supported by the
* <code>SearchTokenizer</code>.<br/>
* This was a decision based on that the <code>org.apache.olingo.server.core.uri.parser.Parser</code>
* already handles in his <code>parseUri</code> method each query as <code>percent decoded</code> strings (see
* line <i>177ff</i> (<code>for (RawUri.QueryOption option : uri.queryOptionListDecoded)</code>).
*
*/ */
public class SearchTokenizer { public class SearchTokenizer {
@ -45,6 +52,7 @@ public class SearchTokenizer {
private boolean finished = false; private boolean finished = false;
protected static final char QUOTATION_MARK = '\"'; protected static final char QUOTATION_MARK = '\"';
protected static final char PHRASE_ESCAPE_CHAR = '\\';
protected static final char CHAR_N = 'N'; protected static final char CHAR_N = 'N';
protected static final char CHAR_O = 'O'; protected static final char CHAR_O = 'O';
protected static final char CHAR_T = 'T'; protected static final char CHAR_T = 'T';
@ -126,45 +134,59 @@ public class SearchTokenizer {
} }
/** /**
* searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark * <code>
* * <b>searchPhrase</b> = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
* qchar-no-AMP-DQUOTE = qchar-unescaped / escape ( escape / quotation-mark ) * <br/><br/>
* * <b>qchar-no-AMP-DQUOTE</b> = qchar-unescaped / escape ( escape / quotation-mark )
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "=" * <br/><br/>
* * <b>qchar-unescaped</b> = unreserved / pct-encoded-unescaped / other-delims /
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * ":" / "@" / "/" / "?" / "$" / "'" / "="
* * <br/><br/>
* escape = "\" / "%5C" ; reverse solidus U+005C * <b>unreserved</b> = ALPHA / DIGIT / "-" / "." / "_" / "~"
* * <br/><br/>
* pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG * <b>escape</b> = "\" / "%5C" ; reverse solidus U+005C
* <br/><br/>
* <b>pct-encoded-unescaped</b> = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F ) * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" ) * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
* <br/><br/>
* <b>other-delims</b> = "!" / "(" / ")" / "*" / "+" / "," / ";"
* <br/><br/>
* <b>quotation-mark</b> = DQUOTE / "%22"
* <br/><br/>
* <b>ALPHA</b> = %x41-5A / %x61-7A
* <br/>
* <b>DIGIT</b> = %x30-39
* <br/>
* <b>DQUOTE</b> = %x22
* </code>
* *
* other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";" * Checks if given <code>character</code> is allowed for a search phrase.
* * <b>ATTENTION:</b> Escaping and percent encoding is not be validated here (and can not be validated on
* quotation-mark = DQUOTE / "%22" * a single character).<br/>
* * Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link #QUOTATION_MARK} characters this method will
* ALPHA = %x41-5A / %x61-7A * return <code>FALSE</code>.<br/>
* DIGIT = %x30-39 * <b>Furthermore</b> percent encoded characters are also not validated (and can not be validated on
* DQUOTE = %x22 * a single character).<br/>
* Hence for the <code>%</code> character this method will return <code>FALSE</code>.<br/>
* *
* @param character which is checked * @param character which is checked
* @return true if character is allowed for a phrase * @return true if character is allowed for a phrase
*/ */
static boolean isAllowedPhrase(final char character) { static boolean isAllowedPhrase(final char character) {
// FIXME mibo: check missing // FIXME mibo: check missing
return isQCharUnescaped(character) || isEscaped(character); return isQCharUnescaped(character);// || isEscaped(character);
} }
/** // /**
* escape = "\" / "%5C" ; reverse solidus U+005C // * escape = "\" / "%5C" ; reverse solidus U+005C
* @param character which is checked // * @param character which is checked
* @return true if character is allowed // * @return true if character is allowed
*/ // */
private static boolean isEscaped(char character) { // private static boolean isEscaped(char character) {
// TODO: mibo(151117): check how to implement // // TODO: mibo(151130): is checked in SearchPhraseState
return false; // return false;
} // }
/** /**
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "=" * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
@ -173,14 +195,14 @@ public class SearchTokenizer {
*/ */
private static boolean isQCharUnescaped(char character) { private static boolean isQCharUnescaped(char character) {
return isUnreserved(character) return isUnreserved(character)
|| isPctEncodedUnescaped(character) // || isPctEncodedUnescaped(character)
|| isOtherDelims(character) || isOtherDelims(character)
|| character == ':' || character == ':'
|| character == '@' || character == '@'
|| character == '/' || character == '/'
|| character == '$' || character == '$'
|| character == '\'' || character == '\''
|| character == '='; || character == '=';
} }
/** /**
@ -190,43 +212,43 @@ public class SearchTokenizer {
*/ */
private static boolean isOtherDelims(char character) { private static boolean isOtherDelims(char character) {
return character == '!' return character == '!'
|| character == '(' || character == '('
|| character == ')' || character == ')'
|| character == '*' || character == '*'
|| character == '+' || character == '+'
|| character == ',' || character == ','
|| character == ';'; || character == ';';
} }
/** // /**
* pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG // * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F ) // * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" ) // * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
* // *
* HEXDIG = DIGIT / A-to-F // * HEXDIG = DIGIT / A-to-F
* // *
* @param character which is checked // * @param character which is checked
* @return true if character is allowed // * @return true if character is allowed
*/ // */
private static boolean isPctEncodedUnescaped(char character) { // private static boolean isPctEncodedUnescaped(char character) {
String hex = Integer.toHexString(character); // String hex = Integer.toHexString(character);
char aschar[] = hex.toCharArray(); // char aschar[] = hex.toCharArray();
if(aschar[0] == '%') { // if(aschar[0] == '%') {
if(aschar[1] == '2') { // if(aschar[1] == '2') {
return aschar[2] != '2' && isHexDigit(aschar[2]); // return aschar[2] != '2' && isHexDigit(aschar[2]);
} else if(aschar[1] == '5') { // } else if(aschar[1] == '5') {
return aschar[2] != 'C' && isHexDigit(aschar[2]); // return aschar[2] != 'C' && isHexDigit(aschar[2]);
} else if(isHexDigit(aschar[1])) { // } else if(isHexDigit(aschar[1])) {
return isHexDigit(aschar[2]); // return isHexDigit(aschar[2]);
} // }
} // }
return false; // return false;
} // }
private static boolean isHexDigit(char character) { // private static boolean isHexDigit(char character) {
return 'A' <= character && character <= 'F' // case A..F // return 'A' <= character && character <= 'F' // case A..F
|| '0' <= character && character <= '9'; // case 0..9 // || '0' <= character && character <= '9'; // case 0..9
} // }
/** /**
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
@ -235,10 +257,10 @@ public class SearchTokenizer {
*/ */
private static boolean isUnreserved(char character) { private static boolean isUnreserved(char character) {
return isAlphaOrDigit(character) return isAlphaOrDigit(character)
|| character == '-' || character == '-'
|| character == '.' || character == '.'
|| character == '_' || character == '_'
|| character == '~'; || character == '~';
} }
/** /**
@ -256,8 +278,6 @@ public class SearchTokenizer {
// BWS = *( SP / HTAB / "%20" / "%09" ) ; "bad" whitespace // BWS = *( SP / HTAB / "%20" / "%09" ) ; "bad" whitespace
// RWS = 1*( SP / HTAB / "%20" / "%09" ) ; "required" whitespace // RWS = 1*( SP / HTAB / "%20" / "%09" ) ; "required" whitespace
static boolean isWhitespace(final char character) { static boolean isWhitespace(final char character) {
// ( SP / HTAB / "%20" / "%09" )
// TODO mibo: add missing whitespaces
return character == ' ' || character == '\t'; return character == ' ' || character == '\t';
} }
@ -400,6 +420,7 @@ public class SearchTokenizer {
private class SearchPhraseState extends LiteralState { private class SearchPhraseState extends LiteralState {
private boolean closed = false; private boolean closed = false;
private boolean escaped = false;
public SearchPhraseState(char c) throws SearchTokenizerException { public SearchPhraseState(char c) throws SearchTokenizerException {
super(Token.PHRASE, c); super(Token.PHRASE, c);
if (c != QUOTATION_MARK) { if (c != QUOTATION_MARK) {
@ -416,6 +437,16 @@ public class SearchTokenizer {
} else if (isWhitespace(c)) { } else if (isWhitespace(c)) {
return new RwsState(); return new RwsState();
} }
} else if(escaped) {
escaped = false;
if(c == QUOTATION_MARK || c == PHRASE_ESCAPE_CHAR) {
return allowed(c);
} else {
return forbidden(c);
}
} else if(c == PHRASE_ESCAPE_CHAR) {
escaped = true;
return this;
} else if (isAllowedPhrase(c)) { } else if (isAllowedPhrase(c)) {
return allowed(c); return allowed(c);
} else if (isWhitespace(c)) { } else if (isWhitespace(c)) {

View File

@ -250,6 +250,14 @@ public class SearchTokenizerTest {
@Test @Test
public void characterInPhrase() throws Exception { public void characterInPhrase() throws Exception {
assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE); assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE);
//escaped characters
assertQuery("\"\\\"123\" OR \"\\\\abc\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"123\""),
new Validator.Tuple(OR), new Validator.Tuple(PHRASE, "\"\\abc\""));
assertQuery("\"\\\"1\\\\23\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"1\\23\""));
// exceptions
assertQuery("\"\\\"1\\\\").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
assertQuery("\"1\\\"").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
assertQuery("\"1\\23\"").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER);
} }
@Test @Test

View File

@ -45,6 +45,7 @@ import org.apache.olingo.server.core.uri.parser.UriParserException;
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException; import org.apache.olingo.server.core.uri.parser.UriParserSemanticException;
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException.MessageKeys; import org.apache.olingo.server.core.uri.parser.UriParserSemanticException.MessageKeys;
import org.apache.olingo.server.core.uri.parser.UriParserSyntaxException; import org.apache.olingo.server.core.uri.parser.UriParserSyntaxException;
import org.apache.olingo.server.core.uri.parser.search.SearchParserException;
import org.apache.olingo.server.core.uri.testutil.FilterValidator; import org.apache.olingo.server.core.uri.testutil.FilterValidator;
import org.apache.olingo.server.core.uri.testutil.TestUriValidator; import org.apache.olingo.server.core.uri.testutil.TestUriValidator;
import org.apache.olingo.server.core.uri.validator.UriValidationException; import org.apache.olingo.server.core.uri.validator.UriValidationException;
@ -5428,9 +5429,7 @@ public class TestFullResourcePath {
} }
@Test @Test
@Ignore("$search currently not implemented")
public void testSearch() throws Exception { public void testSearch() throws Exception {
testUri.run("ESTwoKeyNav", "$search=abc"); testUri.run("ESTwoKeyNav", "$search=abc");
testUri.run("ESTwoKeyNav", "$search=NOT abc"); testUri.run("ESTwoKeyNav", "$search=NOT abc");
@ -5462,6 +5461,19 @@ public class TestFullResourcePath {
testUri.run("ESTwoKeyNav", "$search=(abc AND def) ghi "); testUri.run("ESTwoKeyNav", "$search=(abc AND def) ghi ");
testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)"); testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)");
testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)"); testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)");
// escaped characters
testUri.run("ESTwoKeyNav", "$search=\"abc\"");
testUri.run("ESTwoKeyNav", "$search=\"a\\\"bc\"");
testUri.run("ESTwoKeyNav", "$search=%22abc%22");
testUri.run("ESTwoKeyNav", "$search=%22a%5C%22bc%22");
testUri.run("ESTwoKeyNav", "$search=%22a%5C%5Cbc%22");
// wrong escaped characters
testUri.runEx("ESTwoKeyNav", "$search=%22a%22bc%22")
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
testUri.runEx("ESTwoKeyNav", "$search=%22a%5Cbc%22")
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
} }
@Test @Test

View File

@ -176,6 +176,11 @@ public class TestUriValidator implements TestValidator {
} }
} }
public TestUriValidator isExceptionMessage(final ODataLibraryException.MessageKey messageKey) {
assertEquals(messageKey, exception.getMessageKey());
return this;
}
public TestUriValidator isExSyntax(final UriParserSyntaxException.MessageKeys messageKey) { public TestUriValidator isExSyntax(final UriParserSyntaxException.MessageKeys messageKey) {
assertEquals(UriParserSyntaxException.class, exception.getClass()); assertEquals(UriParserSyntaxException.class, exception.getClass());
assertEquals(messageKey, exception.getMessageKey()); assertEquals(messageKey, exception.getMessageKey());