[OLINGO-568] Added support for escape of escape and quote characters
This commit is contained in:
parent
6dd0a0f3e5
commit
e5ac590794
|
@ -37,6 +37,13 @@ import java.util.List;
|
|||
* searchWord = 1*ALPHA ; Actually: any character from the Unicode categories L or Nl,
|
||||
* ; but not the words AND, OR, and NOT
|
||||
* </code>
|
||||
*
|
||||
* <b>ATTENTION:</b> For a <code>searchPhrase</code> the percent encoding is not supported by the
|
||||
* <code>SearchTokenizer</code>.<br/>
|
||||
* This was a decision based on that the <code>org.apache.olingo.server.core.uri.parser.Parser</code>
|
||||
* already handles in his <code>parseUri</code> method each query as <code>percent decoded</code> strings (see
|
||||
* line <i>177ff</i> (<code>for (RawUri.QueryOption option : uri.queryOptionListDecoded)</code>).
|
||||
*
|
||||
*/
|
||||
public class SearchTokenizer {
|
||||
|
||||
|
@ -45,6 +52,7 @@ public class SearchTokenizer {
|
|||
private boolean finished = false;
|
||||
|
||||
protected static final char QUOTATION_MARK = '\"';
|
||||
protected static final char PHRASE_ESCAPE_CHAR = '\\';
|
||||
protected static final char CHAR_N = 'N';
|
||||
protected static final char CHAR_O = 'O';
|
||||
protected static final char CHAR_T = 'T';
|
||||
|
@ -126,45 +134,59 @@ public class SearchTokenizer {
|
|||
}
|
||||
|
||||
/**
|
||||
* searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
|
||||
*
|
||||
* qchar-no-AMP-DQUOTE = qchar-unescaped / escape ( escape / quotation-mark )
|
||||
*
|
||||
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
|
||||
*
|
||||
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||
*
|
||||
* escape = "\" / "%5C" ; reverse solidus U+005C
|
||||
*
|
||||
* pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
|
||||
* <code>
|
||||
* <b>searchPhrase</b> = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
|
||||
* <br/><br/>
|
||||
* <b>qchar-no-AMP-DQUOTE</b> = qchar-unescaped / escape ( escape / quotation-mark )
|
||||
* <br/><br/>
|
||||
* <b>qchar-unescaped</b> = unreserved / pct-encoded-unescaped / other-delims /
|
||||
* ":" / "@" / "/" / "?" / "$" / "'" / "="
|
||||
* <br/><br/>
|
||||
* <b>unreserved</b> = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||
* <br/><br/>
|
||||
* <b>escape</b> = "\" / "%5C" ; reverse solidus U+005C
|
||||
* <br/><br/>
|
||||
* <b>pct-encoded-unescaped</b> = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
|
||||
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
|
||||
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
|
||||
* <br/><br/>
|
||||
* <b>other-delims</b> = "!" / "(" / ")" / "*" / "+" / "," / ";"
|
||||
* <br/><br/>
|
||||
* <b>quotation-mark</b> = DQUOTE / "%22"
|
||||
* <br/><br/>
|
||||
* <b>ALPHA</b> = %x41-5A / %x61-7A
|
||||
* <br/>
|
||||
* <b>DIGIT</b> = %x30-39
|
||||
* <br/>
|
||||
* <b>DQUOTE</b> = %x22
|
||||
* </code>
|
||||
*
|
||||
* other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";"
|
||||
*
|
||||
* quotation-mark = DQUOTE / "%22"
|
||||
*
|
||||
* ALPHA = %x41-5A / %x61-7A
|
||||
* DIGIT = %x30-39
|
||||
* DQUOTE = %x22
|
||||
* Checks if given <code>character</code> is allowed for a search phrase.
|
||||
* <b>ATTENTION:</b> Escaping and percent encoding is not be validated here (and can not be validated on
|
||||
* a single character).<br/>
|
||||
* Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link #QUOTATION_MARK} characters this method will
|
||||
* return <code>FALSE</code>.<br/>
|
||||
* <b>Furthermore</b> percent encoded characters are also not validated (and can not be validated on
|
||||
* a single character).<br/>
|
||||
* Hence for the <code>%</code> character this method will return <code>FALSE</code>.<br/>
|
||||
*
|
||||
* @param character which is checked
|
||||
* @return true if character is allowed for a phrase
|
||||
*/
|
||||
static boolean isAllowedPhrase(final char character) {
|
||||
// FIXME mibo: check missing
|
||||
return isQCharUnescaped(character) || isEscaped(character);
|
||||
return isQCharUnescaped(character);// || isEscaped(character);
|
||||
}
|
||||
|
||||
/**
|
||||
* escape = "\" / "%5C" ; reverse solidus U+005C
|
||||
* @param character which is checked
|
||||
* @return true if character is allowed
|
||||
*/
|
||||
private static boolean isEscaped(char character) {
|
||||
// TODO: mibo(151117): check how to implement
|
||||
return false;
|
||||
}
|
||||
// /**
|
||||
// * escape = "\" / "%5C" ; reverse solidus U+005C
|
||||
// * @param character which is checked
|
||||
// * @return true if character is allowed
|
||||
// */
|
||||
// private static boolean isEscaped(char character) {
|
||||
// // TODO: mibo(151130): is checked in SearchPhraseState
|
||||
// return false;
|
||||
// }
|
||||
|
||||
/**
|
||||
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
|
||||
|
@ -173,14 +195,14 @@ public class SearchTokenizer {
|
|||
*/
|
||||
private static boolean isQCharUnescaped(char character) {
|
||||
return isUnreserved(character)
|
||||
|| isPctEncodedUnescaped(character)
|
||||
|| isOtherDelims(character)
|
||||
|| character == ':'
|
||||
|| character == '@'
|
||||
|| character == '/'
|
||||
|| character == '$'
|
||||
|| character == '\''
|
||||
|| character == '=';
|
||||
// || isPctEncodedUnescaped(character)
|
||||
|| isOtherDelims(character)
|
||||
|| character == ':'
|
||||
|| character == '@'
|
||||
|| character == '/'
|
||||
|| character == '$'
|
||||
|| character == '\''
|
||||
|| character == '=';
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -190,43 +212,43 @@ public class SearchTokenizer {
|
|||
*/
|
||||
private static boolean isOtherDelims(char character) {
|
||||
return character == '!'
|
||||
|| character == '('
|
||||
|| character == ')'
|
||||
|| character == '*'
|
||||
|| character == '+'
|
||||
|| character == ','
|
||||
|| character == ';';
|
||||
|| character == '('
|
||||
|| character == ')'
|
||||
|| character == '*'
|
||||
|| character == '+'
|
||||
|| character == ','
|
||||
|| character == ';';
|
||||
}
|
||||
|
||||
/**
|
||||
* pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
|
||||
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
|
||||
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
|
||||
*
|
||||
* HEXDIG = DIGIT / A-to-F
|
||||
*
|
||||
* @param character which is checked
|
||||
* @return true if character is allowed
|
||||
*/
|
||||
private static boolean isPctEncodedUnescaped(char character) {
|
||||
String hex = Integer.toHexString(character);
|
||||
char aschar[] = hex.toCharArray();
|
||||
if(aschar[0] == '%') {
|
||||
if(aschar[1] == '2') {
|
||||
return aschar[2] != '2' && isHexDigit(aschar[2]);
|
||||
} else if(aschar[1] == '5') {
|
||||
return aschar[2] != 'C' && isHexDigit(aschar[2]);
|
||||
} else if(isHexDigit(aschar[1])) {
|
||||
return isHexDigit(aschar[2]);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// /**
|
||||
// * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
|
||||
// * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
|
||||
// * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
|
||||
// *
|
||||
// * HEXDIG = DIGIT / A-to-F
|
||||
// *
|
||||
// * @param character which is checked
|
||||
// * @return true if character is allowed
|
||||
// */
|
||||
// private static boolean isPctEncodedUnescaped(char character) {
|
||||
// String hex = Integer.toHexString(character);
|
||||
// char aschar[] = hex.toCharArray();
|
||||
// if(aschar[0] == '%') {
|
||||
// if(aschar[1] == '2') {
|
||||
// return aschar[2] != '2' && isHexDigit(aschar[2]);
|
||||
// } else if(aschar[1] == '5') {
|
||||
// return aschar[2] != 'C' && isHexDigit(aschar[2]);
|
||||
// } else if(isHexDigit(aschar[1])) {
|
||||
// return isHexDigit(aschar[2]);
|
||||
// }
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
|
||||
private static boolean isHexDigit(char character) {
|
||||
return 'A' <= character && character <= 'F' // case A..F
|
||||
|| '0' <= character && character <= '9'; // case 0..9
|
||||
}
|
||||
// private static boolean isHexDigit(char character) {
|
||||
// return 'A' <= character && character <= 'F' // case A..F
|
||||
// || '0' <= character && character <= '9'; // case 0..9
|
||||
// }
|
||||
|
||||
/**
|
||||
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||
|
@ -235,10 +257,10 @@ public class SearchTokenizer {
|
|||
*/
|
||||
private static boolean isUnreserved(char character) {
|
||||
return isAlphaOrDigit(character)
|
||||
|| character == '-'
|
||||
|| character == '.'
|
||||
|| character == '_'
|
||||
|| character == '~';
|
||||
|| character == '-'
|
||||
|| character == '.'
|
||||
|| character == '_'
|
||||
|| character == '~';
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -256,8 +278,6 @@ public class SearchTokenizer {
|
|||
// BWS = *( SP / HTAB / "%20" / "%09" ) ; "bad" whitespace
|
||||
// RWS = 1*( SP / HTAB / "%20" / "%09" ) ; "required" whitespace
|
||||
static boolean isWhitespace(final char character) {
|
||||
// ( SP / HTAB / "%20" / "%09" )
|
||||
// TODO mibo: add missing whitespaces
|
||||
return character == ' ' || character == '\t';
|
||||
}
|
||||
|
||||
|
@ -400,6 +420,7 @@ public class SearchTokenizer {
|
|||
|
||||
private class SearchPhraseState extends LiteralState {
|
||||
private boolean closed = false;
|
||||
private boolean escaped = false;
|
||||
public SearchPhraseState(char c) throws SearchTokenizerException {
|
||||
super(Token.PHRASE, c);
|
||||
if (c != QUOTATION_MARK) {
|
||||
|
@ -416,6 +437,16 @@ public class SearchTokenizer {
|
|||
} else if (isWhitespace(c)) {
|
||||
return new RwsState();
|
||||
}
|
||||
} else if(escaped) {
|
||||
escaped = false;
|
||||
if(c == QUOTATION_MARK || c == PHRASE_ESCAPE_CHAR) {
|
||||
return allowed(c);
|
||||
} else {
|
||||
return forbidden(c);
|
||||
}
|
||||
} else if(c == PHRASE_ESCAPE_CHAR) {
|
||||
escaped = true;
|
||||
return this;
|
||||
} else if (isAllowedPhrase(c)) {
|
||||
return allowed(c);
|
||||
} else if (isWhitespace(c)) {
|
||||
|
|
|
@ -250,6 +250,14 @@ public class SearchTokenizerTest {
|
|||
@Test
|
||||
public void characterInPhrase() throws Exception {
|
||||
assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE);
|
||||
//escaped characters
|
||||
assertQuery("\"\\\"123\" OR \"\\\\abc\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"123\""),
|
||||
new Validator.Tuple(OR), new Validator.Tuple(PHRASE, "\"\\abc\""));
|
||||
assertQuery("\"\\\"1\\\\23\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"1\\23\""));
|
||||
// exceptions
|
||||
assertQuery("\"\\\"1\\\\").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
|
||||
assertQuery("\"1\\\"").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
|
||||
assertQuery("\"1\\23\"").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.olingo.server.core.uri.parser.UriParserException;
|
|||
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException;
|
||||
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException.MessageKeys;
|
||||
import org.apache.olingo.server.core.uri.parser.UriParserSyntaxException;
|
||||
import org.apache.olingo.server.core.uri.parser.search.SearchParserException;
|
||||
import org.apache.olingo.server.core.uri.testutil.FilterValidator;
|
||||
import org.apache.olingo.server.core.uri.testutil.TestUriValidator;
|
||||
import org.apache.olingo.server.core.uri.validator.UriValidationException;
|
||||
|
@ -5428,9 +5429,7 @@ public class TestFullResourcePath {
|
|||
}
|
||||
|
||||
@Test
|
||||
@Ignore("$search currently not implemented")
|
||||
public void testSearch() throws Exception {
|
||||
|
||||
testUri.run("ESTwoKeyNav", "$search=abc");
|
||||
testUri.run("ESTwoKeyNav", "$search=NOT abc");
|
||||
|
||||
|
@ -5462,6 +5461,19 @@ public class TestFullResourcePath {
|
|||
testUri.run("ESTwoKeyNav", "$search=(abc AND def) ghi ");
|
||||
testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)");
|
||||
testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)");
|
||||
|
||||
// escaped characters
|
||||
testUri.run("ESTwoKeyNav", "$search=\"abc\"");
|
||||
testUri.run("ESTwoKeyNav", "$search=\"a\\\"bc\"");
|
||||
testUri.run("ESTwoKeyNav", "$search=%22abc%22");
|
||||
testUri.run("ESTwoKeyNav", "$search=%22a%5C%22bc%22");
|
||||
testUri.run("ESTwoKeyNav", "$search=%22a%5C%5Cbc%22");
|
||||
|
||||
// wrong escaped characters
|
||||
testUri.runEx("ESTwoKeyNav", "$search=%22a%22bc%22")
|
||||
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
|
||||
testUri.runEx("ESTwoKeyNav", "$search=%22a%5Cbc%22")
|
||||
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -176,6 +176,11 @@ public class TestUriValidator implements TestValidator {
|
|||
}
|
||||
}
|
||||
|
||||
public TestUriValidator isExceptionMessage(final ODataLibraryException.MessageKey messageKey) {
|
||||
assertEquals(messageKey, exception.getMessageKey());
|
||||
return this;
|
||||
}
|
||||
|
||||
public TestUriValidator isExSyntax(final UriParserSyntaxException.MessageKeys messageKey) {
|
||||
assertEquals(UriParserSyntaxException.class, exception.getClass());
|
||||
assertEquals(messageKey, exception.getMessageKey());
|
||||
|
|
Loading…
Reference in New Issue