[OLINGO-568] Added support for escape of escape and quote characters

This commit is contained in:
Michael Bolz 2015-11-30 15:15:00 +01:00
parent 6dd0a0f3e5
commit e5ac590794
4 changed files with 134 additions and 78 deletions

View File

@ -37,6 +37,13 @@ import java.util.List;
* searchWord = 1*ALPHA ; Actually: any character from the Unicode categories L or Nl,
* ; but not the words AND, OR, and NOT
* </code>
*
* <b>ATTENTION:</b> For a <code>searchPhrase</code> the percent encoding is not supported by the
* <code>SearchTokenizer</code>.<br/>
* This was a decision based on that the <code>org.apache.olingo.server.core.uri.parser.Parser</code>
* already handles in his <code>parseUri</code> method each query as <code>percent decoded</code> strings (see
* line <i>177ff</i> (<code>for (RawUri.QueryOption option : uri.queryOptionListDecoded)</code>).
*
*/
public class SearchTokenizer {
@ -45,6 +52,7 @@ public class SearchTokenizer {
private boolean finished = false;
protected static final char QUOTATION_MARK = '\"';
protected static final char PHRASE_ESCAPE_CHAR = '\\';
protected static final char CHAR_N = 'N';
protected static final char CHAR_O = 'O';
protected static final char CHAR_T = 'T';
@ -126,45 +134,59 @@ public class SearchTokenizer {
}
/**
* searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
*
* qchar-no-AMP-DQUOTE = qchar-unescaped / escape ( escape / quotation-mark )
*
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
*
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
*
* escape = "\" / "%5C" ; reverse solidus U+005C
*
* pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
* <code>
* <b>searchPhrase</b> = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
* <br/><br/>
* <b>qchar-no-AMP-DQUOTE</b> = qchar-unescaped / escape ( escape / quotation-mark )
* <br/><br/>
* <b>qchar-unescaped</b> = unreserved / pct-encoded-unescaped / other-delims /
* ":" / "@" / "/" / "?" / "$" / "'" / "="
* <br/><br/>
* <b>unreserved</b> = ALPHA / DIGIT / "-" / "." / "_" / "~"
* <br/><br/>
* <b>escape</b> = "\" / "%5C" ; reverse solidus U+005C
* <br/><br/>
* <b>pct-encoded-unescaped</b> = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
* <br/><br/>
* <b>other-delims</b> = "!" / "(" / ")" / "*" / "+" / "," / ";"
* <br/><br/>
* <b>quotation-mark</b> = DQUOTE / "%22"
* <br/><br/>
* <b>ALPHA</b> = %x41-5A / %x61-7A
* <br/>
* <b>DIGIT</b> = %x30-39
* <br/>
* <b>DQUOTE</b> = %x22
* </code>
*
* other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";"
*
* quotation-mark = DQUOTE / "%22"
*
* ALPHA = %x41-5A / %x61-7A
* DIGIT = %x30-39
* DQUOTE = %x22
* Checks if given <code>character</code> is allowed for a search phrase.
* <b>ATTENTION:</b> Escaping and percent encoding is not be validated here (and can not be validated on
* a single character).<br/>
* Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link #QUOTATION_MARK} characters this method will
* return <code>FALSE</code>.<br/>
* <b>Furthermore</b> percent encoded characters are also not validated (and can not be validated on
* a single character).<br/>
* Hence for the <code>%</code> character this method will return <code>FALSE</code>.<br/>
*
* @param character which is checked
* @return true if character is allowed for a phrase
*/
static boolean isAllowedPhrase(final char character) {
// FIXME mibo: check missing
return isQCharUnescaped(character) || isEscaped(character);
return isQCharUnescaped(character);// || isEscaped(character);
}
/**
* escape = "\" / "%5C" ; reverse solidus U+005C
* @param character which is checked
* @return true if character is allowed
*/
private static boolean isEscaped(char character) {
// TODO: mibo(151117): check how to implement
return false;
}
// /**
// * escape = "\" / "%5C" ; reverse solidus U+005C
// * @param character which is checked
// * @return true if character is allowed
// */
// private static boolean isEscaped(char character) {
// // TODO: mibo(151130): is checked in SearchPhraseState
// return false;
// }
/**
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
@ -173,14 +195,14 @@ public class SearchTokenizer {
*/
private static boolean isQCharUnescaped(char character) {
return isUnreserved(character)
|| isPctEncodedUnescaped(character)
|| isOtherDelims(character)
|| character == ':'
|| character == '@'
|| character == '/'
|| character == '$'
|| character == '\''
|| character == '=';
// || isPctEncodedUnescaped(character)
|| isOtherDelims(character)
|| character == ':'
|| character == '@'
|| character == '/'
|| character == '$'
|| character == '\''
|| character == '=';
}
/**
@ -190,43 +212,43 @@ public class SearchTokenizer {
*/
private static boolean isOtherDelims(char character) {
return character == '!'
|| character == '('
|| character == ')'
|| character == '*'
|| character == '+'
|| character == ','
|| character == ';';
|| character == '('
|| character == ')'
|| character == '*'
|| character == '+'
|| character == ','
|| character == ';';
}
/**
* pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
*
* HEXDIG = DIGIT / A-to-F
*
* @param character which is checked
* @return true if character is allowed
*/
private static boolean isPctEncodedUnescaped(char character) {
String hex = Integer.toHexString(character);
char aschar[] = hex.toCharArray();
if(aschar[0] == '%') {
if(aschar[1] == '2') {
return aschar[2] != '2' && isHexDigit(aschar[2]);
} else if(aschar[1] == '5') {
return aschar[2] != 'C' && isHexDigit(aschar[2]);
} else if(isHexDigit(aschar[1])) {
return isHexDigit(aschar[2]);
}
}
return false;
}
// /**
// * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
// * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
// * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
// *
// * HEXDIG = DIGIT / A-to-F
// *
// * @param character which is checked
// * @return true if character is allowed
// */
// private static boolean isPctEncodedUnescaped(char character) {
// String hex = Integer.toHexString(character);
// char aschar[] = hex.toCharArray();
// if(aschar[0] == '%') {
// if(aschar[1] == '2') {
// return aschar[2] != '2' && isHexDigit(aschar[2]);
// } else if(aschar[1] == '5') {
// return aschar[2] != 'C' && isHexDigit(aschar[2]);
// } else if(isHexDigit(aschar[1])) {
// return isHexDigit(aschar[2]);
// }
// }
// return false;
// }
private static boolean isHexDigit(char character) {
return 'A' <= character && character <= 'F' // case A..F
|| '0' <= character && character <= '9'; // case 0..9
}
// private static boolean isHexDigit(char character) {
// return 'A' <= character && character <= 'F' // case A..F
// || '0' <= character && character <= '9'; // case 0..9
// }
/**
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
@ -235,10 +257,10 @@ public class SearchTokenizer {
*/
private static boolean isUnreserved(char character) {
return isAlphaOrDigit(character)
|| character == '-'
|| character == '.'
|| character == '_'
|| character == '~';
|| character == '-'
|| character == '.'
|| character == '_'
|| character == '~';
}
/**
@ -256,8 +278,6 @@ public class SearchTokenizer {
// BWS = *( SP / HTAB / "%20" / "%09" ) ; "bad" whitespace
// RWS = 1*( SP / HTAB / "%20" / "%09" ) ; "required" whitespace
static boolean isWhitespace(final char character) {
// ( SP / HTAB / "%20" / "%09" )
// TODO mibo: add missing whitespaces
return character == ' ' || character == '\t';
}
@ -400,6 +420,7 @@ public class SearchTokenizer {
private class SearchPhraseState extends LiteralState {
private boolean closed = false;
private boolean escaped = false;
public SearchPhraseState(char c) throws SearchTokenizerException {
super(Token.PHRASE, c);
if (c != QUOTATION_MARK) {
@ -416,6 +437,16 @@ public class SearchTokenizer {
} else if (isWhitespace(c)) {
return new RwsState();
}
} else if(escaped) {
escaped = false;
if(c == QUOTATION_MARK || c == PHRASE_ESCAPE_CHAR) {
return allowed(c);
} else {
return forbidden(c);
}
} else if(c == PHRASE_ESCAPE_CHAR) {
escaped = true;
return this;
} else if (isAllowedPhrase(c)) {
return allowed(c);
} else if (isWhitespace(c)) {

View File

@ -250,6 +250,14 @@ public class SearchTokenizerTest {
@Test
public void characterInPhrase() throws Exception {
assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE);
//escaped characters
assertQuery("\"\\\"123\" OR \"\\\\abc\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"123\""),
new Validator.Tuple(OR), new Validator.Tuple(PHRASE, "\"\\abc\""));
assertQuery("\"\\\"1\\\\23\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"1\\23\""));
// exceptions
assertQuery("\"\\\"1\\\\").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
assertQuery("\"1\\\"").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
assertQuery("\"1\\23\"").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER);
}
@Test

View File

@ -45,6 +45,7 @@ import org.apache.olingo.server.core.uri.parser.UriParserException;
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException;
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException.MessageKeys;
import org.apache.olingo.server.core.uri.parser.UriParserSyntaxException;
import org.apache.olingo.server.core.uri.parser.search.SearchParserException;
import org.apache.olingo.server.core.uri.testutil.FilterValidator;
import org.apache.olingo.server.core.uri.testutil.TestUriValidator;
import org.apache.olingo.server.core.uri.validator.UriValidationException;
@ -5428,9 +5429,7 @@ public class TestFullResourcePath {
}
@Test
@Ignore("$search currently not implemented")
public void testSearch() throws Exception {
testUri.run("ESTwoKeyNav", "$search=abc");
testUri.run("ESTwoKeyNav", "$search=NOT abc");
@ -5462,6 +5461,19 @@ public class TestFullResourcePath {
testUri.run("ESTwoKeyNav", "$search=(abc AND def) ghi ");
testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)");
testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)");
// escaped characters
testUri.run("ESTwoKeyNav", "$search=\"abc\"");
testUri.run("ESTwoKeyNav", "$search=\"a\\\"bc\"");
testUri.run("ESTwoKeyNav", "$search=%22abc%22");
testUri.run("ESTwoKeyNav", "$search=%22a%5C%22bc%22");
testUri.run("ESTwoKeyNav", "$search=%22a%5C%5Cbc%22");
// wrong escaped characters
testUri.runEx("ESTwoKeyNav", "$search=%22a%22bc%22")
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
testUri.runEx("ESTwoKeyNav", "$search=%22a%5Cbc%22")
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
}
@Test

View File

@ -176,6 +176,11 @@ public class TestUriValidator implements TestValidator {
}
}
public TestUriValidator isExceptionMessage(final ODataLibraryException.MessageKey messageKey) {
assertEquals(messageKey, exception.getMessageKey());
return this;
}
public TestUriValidator isExSyntax(final UriParserSyntaxException.MessageKeys messageKey) {
assertEquals(UriParserSyntaxException.class, exception.getClass());
assertEquals(messageKey, exception.getMessageKey());