mirror of
https://github.com/apache/olingo-odata4.git
synced 2025-02-06 18:18:55 +00:00
[OLINGO-568] Added support for escape of escape and quote characters
This commit is contained in:
parent
6dd0a0f3e5
commit
e5ac590794
@ -37,6 +37,13 @@ import java.util.List;
|
|||||||
* searchWord = 1*ALPHA ; Actually: any character from the Unicode categories L or Nl,
|
* searchWord = 1*ALPHA ; Actually: any character from the Unicode categories L or Nl,
|
||||||
* ; but not the words AND, OR, and NOT
|
* ; but not the words AND, OR, and NOT
|
||||||
* </code>
|
* </code>
|
||||||
|
*
|
||||||
|
* <b>ATTENTION:</b> For a <code>searchPhrase</code> the percent encoding is not supported by the
|
||||||
|
* <code>SearchTokenizer</code>.<br/>
|
||||||
|
* This was a decision based on that the <code>org.apache.olingo.server.core.uri.parser.Parser</code>
|
||||||
|
* already handles in his <code>parseUri</code> method each query as <code>percent decoded</code> strings (see
|
||||||
|
* line <i>177ff</i> (<code>for (RawUri.QueryOption option : uri.queryOptionListDecoded)</code>).
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
public class SearchTokenizer {
|
public class SearchTokenizer {
|
||||||
|
|
||||||
@ -45,6 +52,7 @@ public class SearchTokenizer {
|
|||||||
private boolean finished = false;
|
private boolean finished = false;
|
||||||
|
|
||||||
protected static final char QUOTATION_MARK = '\"';
|
protected static final char QUOTATION_MARK = '\"';
|
||||||
|
protected static final char PHRASE_ESCAPE_CHAR = '\\';
|
||||||
protected static final char CHAR_N = 'N';
|
protected static final char CHAR_N = 'N';
|
||||||
protected static final char CHAR_O = 'O';
|
protected static final char CHAR_O = 'O';
|
||||||
protected static final char CHAR_T = 'T';
|
protected static final char CHAR_T = 'T';
|
||||||
@ -126,45 +134,59 @@ public class SearchTokenizer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
|
* <code>
|
||||||
*
|
* <b>searchPhrase</b> = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
|
||||||
* qchar-no-AMP-DQUOTE = qchar-unescaped / escape ( escape / quotation-mark )
|
* <br/><br/>
|
||||||
*
|
* <b>qchar-no-AMP-DQUOTE</b> = qchar-unescaped / escape ( escape / quotation-mark )
|
||||||
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
|
* <br/><br/>
|
||||||
*
|
* <b>qchar-unescaped</b> = unreserved / pct-encoded-unescaped / other-delims /
|
||||||
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
* ":" / "@" / "/" / "?" / "$" / "'" / "="
|
||||||
*
|
* <br/><br/>
|
||||||
* escape = "\" / "%5C" ; reverse solidus U+005C
|
* <b>unreserved</b> = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||||
*
|
* <br/><br/>
|
||||||
* pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
|
* <b>escape</b> = "\" / "%5C" ; reverse solidus U+005C
|
||||||
|
* <br/><br/>
|
||||||
|
* <b>pct-encoded-unescaped</b> = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
|
||||||
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
|
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
|
||||||
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
|
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
|
||||||
|
* <br/><br/>
|
||||||
|
* <b>other-delims</b> = "!" / "(" / ")" / "*" / "+" / "," / ";"
|
||||||
|
* <br/><br/>
|
||||||
|
* <b>quotation-mark</b> = DQUOTE / "%22"
|
||||||
|
* <br/><br/>
|
||||||
|
* <b>ALPHA</b> = %x41-5A / %x61-7A
|
||||||
|
* <br/>
|
||||||
|
* <b>DIGIT</b> = %x30-39
|
||||||
|
* <br/>
|
||||||
|
* <b>DQUOTE</b> = %x22
|
||||||
|
* </code>
|
||||||
*
|
*
|
||||||
* other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";"
|
* Checks if given <code>character</code> is allowed for a search phrase.
|
||||||
*
|
* <b>ATTENTION:</b> Escaping and percent encoding is not be validated here (and can not be validated on
|
||||||
* quotation-mark = DQUOTE / "%22"
|
* a single character).<br/>
|
||||||
*
|
* Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link #QUOTATION_MARK} characters this method will
|
||||||
* ALPHA = %x41-5A / %x61-7A
|
* return <code>FALSE</code>.<br/>
|
||||||
* DIGIT = %x30-39
|
* <b>Furthermore</b> percent encoded characters are also not validated (and can not be validated on
|
||||||
* DQUOTE = %x22
|
* a single character).<br/>
|
||||||
|
* Hence for the <code>%</code> character this method will return <code>FALSE</code>.<br/>
|
||||||
*
|
*
|
||||||
* @param character which is checked
|
* @param character which is checked
|
||||||
* @return true if character is allowed for a phrase
|
* @return true if character is allowed for a phrase
|
||||||
*/
|
*/
|
||||||
static boolean isAllowedPhrase(final char character) {
|
static boolean isAllowedPhrase(final char character) {
|
||||||
// FIXME mibo: check missing
|
// FIXME mibo: check missing
|
||||||
return isQCharUnescaped(character) || isEscaped(character);
|
return isQCharUnescaped(character);// || isEscaped(character);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// /**
|
||||||
* escape = "\" / "%5C" ; reverse solidus U+005C
|
// * escape = "\" / "%5C" ; reverse solidus U+005C
|
||||||
* @param character which is checked
|
// * @param character which is checked
|
||||||
* @return true if character is allowed
|
// * @return true if character is allowed
|
||||||
*/
|
// */
|
||||||
private static boolean isEscaped(char character) {
|
// private static boolean isEscaped(char character) {
|
||||||
// TODO: mibo(151117): check how to implement
|
// // TODO: mibo(151130): is checked in SearchPhraseState
|
||||||
return false;
|
// return false;
|
||||||
}
|
// }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
|
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
|
||||||
@ -173,14 +195,14 @@ public class SearchTokenizer {
|
|||||||
*/
|
*/
|
||||||
private static boolean isQCharUnescaped(char character) {
|
private static boolean isQCharUnescaped(char character) {
|
||||||
return isUnreserved(character)
|
return isUnreserved(character)
|
||||||
|| isPctEncodedUnescaped(character)
|
// || isPctEncodedUnescaped(character)
|
||||||
|| isOtherDelims(character)
|
|| isOtherDelims(character)
|
||||||
|| character == ':'
|
|| character == ':'
|
||||||
|| character == '@'
|
|| character == '@'
|
||||||
|| character == '/'
|
|| character == '/'
|
||||||
|| character == '$'
|
|| character == '$'
|
||||||
|| character == '\''
|
|| character == '\''
|
||||||
|| character == '=';
|
|| character == '=';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -190,43 +212,43 @@ public class SearchTokenizer {
|
|||||||
*/
|
*/
|
||||||
private static boolean isOtherDelims(char character) {
|
private static boolean isOtherDelims(char character) {
|
||||||
return character == '!'
|
return character == '!'
|
||||||
|| character == '('
|
|| character == '('
|
||||||
|| character == ')'
|
|| character == ')'
|
||||||
|| character == '*'
|
|| character == '*'
|
||||||
|| character == '+'
|
|| character == '+'
|
||||||
|| character == ','
|
|| character == ','
|
||||||
|| character == ';';
|
|| character == ';';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// /**
|
||||||
* pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
|
// * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
|
||||||
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
|
// * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
|
||||||
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
|
// * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
|
||||||
*
|
// *
|
||||||
* HEXDIG = DIGIT / A-to-F
|
// * HEXDIG = DIGIT / A-to-F
|
||||||
*
|
// *
|
||||||
* @param character which is checked
|
// * @param character which is checked
|
||||||
* @return true if character is allowed
|
// * @return true if character is allowed
|
||||||
*/
|
// */
|
||||||
private static boolean isPctEncodedUnescaped(char character) {
|
// private static boolean isPctEncodedUnescaped(char character) {
|
||||||
String hex = Integer.toHexString(character);
|
// String hex = Integer.toHexString(character);
|
||||||
char aschar[] = hex.toCharArray();
|
// char aschar[] = hex.toCharArray();
|
||||||
if(aschar[0] == '%') {
|
// if(aschar[0] == '%') {
|
||||||
if(aschar[1] == '2') {
|
// if(aschar[1] == '2') {
|
||||||
return aschar[2] != '2' && isHexDigit(aschar[2]);
|
// return aschar[2] != '2' && isHexDigit(aschar[2]);
|
||||||
} else if(aschar[1] == '5') {
|
// } else if(aschar[1] == '5') {
|
||||||
return aschar[2] != 'C' && isHexDigit(aschar[2]);
|
// return aschar[2] != 'C' && isHexDigit(aschar[2]);
|
||||||
} else if(isHexDigit(aschar[1])) {
|
// } else if(isHexDigit(aschar[1])) {
|
||||||
return isHexDigit(aschar[2]);
|
// return isHexDigit(aschar[2]);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
return false;
|
// return false;
|
||||||
}
|
// }
|
||||||
|
|
||||||
private static boolean isHexDigit(char character) {
|
// private static boolean isHexDigit(char character) {
|
||||||
return 'A' <= character && character <= 'F' // case A..F
|
// return 'A' <= character && character <= 'F' // case A..F
|
||||||
|| '0' <= character && character <= '9'; // case 0..9
|
// || '0' <= character && character <= '9'; // case 0..9
|
||||||
}
|
// }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||||
@ -235,10 +257,10 @@ public class SearchTokenizer {
|
|||||||
*/
|
*/
|
||||||
private static boolean isUnreserved(char character) {
|
private static boolean isUnreserved(char character) {
|
||||||
return isAlphaOrDigit(character)
|
return isAlphaOrDigit(character)
|
||||||
|| character == '-'
|
|| character == '-'
|
||||||
|| character == '.'
|
|| character == '.'
|
||||||
|| character == '_'
|
|| character == '_'
|
||||||
|| character == '~';
|
|| character == '~';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -256,8 +278,6 @@ public class SearchTokenizer {
|
|||||||
// BWS = *( SP / HTAB / "%20" / "%09" ) ; "bad" whitespace
|
// BWS = *( SP / HTAB / "%20" / "%09" ) ; "bad" whitespace
|
||||||
// RWS = 1*( SP / HTAB / "%20" / "%09" ) ; "required" whitespace
|
// RWS = 1*( SP / HTAB / "%20" / "%09" ) ; "required" whitespace
|
||||||
static boolean isWhitespace(final char character) {
|
static boolean isWhitespace(final char character) {
|
||||||
// ( SP / HTAB / "%20" / "%09" )
|
|
||||||
// TODO mibo: add missing whitespaces
|
|
||||||
return character == ' ' || character == '\t';
|
return character == ' ' || character == '\t';
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -400,6 +420,7 @@ public class SearchTokenizer {
|
|||||||
|
|
||||||
private class SearchPhraseState extends LiteralState {
|
private class SearchPhraseState extends LiteralState {
|
||||||
private boolean closed = false;
|
private boolean closed = false;
|
||||||
|
private boolean escaped = false;
|
||||||
public SearchPhraseState(char c) throws SearchTokenizerException {
|
public SearchPhraseState(char c) throws SearchTokenizerException {
|
||||||
super(Token.PHRASE, c);
|
super(Token.PHRASE, c);
|
||||||
if (c != QUOTATION_MARK) {
|
if (c != QUOTATION_MARK) {
|
||||||
@ -416,6 +437,16 @@ public class SearchTokenizer {
|
|||||||
} else if (isWhitespace(c)) {
|
} else if (isWhitespace(c)) {
|
||||||
return new RwsState();
|
return new RwsState();
|
||||||
}
|
}
|
||||||
|
} else if(escaped) {
|
||||||
|
escaped = false;
|
||||||
|
if(c == QUOTATION_MARK || c == PHRASE_ESCAPE_CHAR) {
|
||||||
|
return allowed(c);
|
||||||
|
} else {
|
||||||
|
return forbidden(c);
|
||||||
|
}
|
||||||
|
} else if(c == PHRASE_ESCAPE_CHAR) {
|
||||||
|
escaped = true;
|
||||||
|
return this;
|
||||||
} else if (isAllowedPhrase(c)) {
|
} else if (isAllowedPhrase(c)) {
|
||||||
return allowed(c);
|
return allowed(c);
|
||||||
} else if (isWhitespace(c)) {
|
} else if (isWhitespace(c)) {
|
||||||
|
@ -250,6 +250,14 @@ public class SearchTokenizerTest {
|
|||||||
@Test
|
@Test
|
||||||
public void characterInPhrase() throws Exception {
|
public void characterInPhrase() throws Exception {
|
||||||
assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE);
|
assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE);
|
||||||
|
//escaped characters
|
||||||
|
assertQuery("\"\\\"123\" OR \"\\\\abc\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"123\""),
|
||||||
|
new Validator.Tuple(OR), new Validator.Tuple(PHRASE, "\"\\abc\""));
|
||||||
|
assertQuery("\"\\\"1\\\\23\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"1\\23\""));
|
||||||
|
// exceptions
|
||||||
|
assertQuery("\"\\\"1\\\\").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
|
||||||
|
assertQuery("\"1\\\"").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
|
||||||
|
assertQuery("\"1\\23\"").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -45,6 +45,7 @@ import org.apache.olingo.server.core.uri.parser.UriParserException;
|
|||||||
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException;
|
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException;
|
||||||
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException.MessageKeys;
|
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException.MessageKeys;
|
||||||
import org.apache.olingo.server.core.uri.parser.UriParserSyntaxException;
|
import org.apache.olingo.server.core.uri.parser.UriParserSyntaxException;
|
||||||
|
import org.apache.olingo.server.core.uri.parser.search.SearchParserException;
|
||||||
import org.apache.olingo.server.core.uri.testutil.FilterValidator;
|
import org.apache.olingo.server.core.uri.testutil.FilterValidator;
|
||||||
import org.apache.olingo.server.core.uri.testutil.TestUriValidator;
|
import org.apache.olingo.server.core.uri.testutil.TestUriValidator;
|
||||||
import org.apache.olingo.server.core.uri.validator.UriValidationException;
|
import org.apache.olingo.server.core.uri.validator.UriValidationException;
|
||||||
@ -5428,9 +5429,7 @@ public class TestFullResourcePath {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Ignore("$search currently not implemented")
|
|
||||||
public void testSearch() throws Exception {
|
public void testSearch() throws Exception {
|
||||||
|
|
||||||
testUri.run("ESTwoKeyNav", "$search=abc");
|
testUri.run("ESTwoKeyNav", "$search=abc");
|
||||||
testUri.run("ESTwoKeyNav", "$search=NOT abc");
|
testUri.run("ESTwoKeyNav", "$search=NOT abc");
|
||||||
|
|
||||||
@ -5462,6 +5461,19 @@ public class TestFullResourcePath {
|
|||||||
testUri.run("ESTwoKeyNav", "$search=(abc AND def) ghi ");
|
testUri.run("ESTwoKeyNav", "$search=(abc AND def) ghi ");
|
||||||
testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)");
|
testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)");
|
||||||
testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)");
|
testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)");
|
||||||
|
|
||||||
|
// escaped characters
|
||||||
|
testUri.run("ESTwoKeyNav", "$search=\"abc\"");
|
||||||
|
testUri.run("ESTwoKeyNav", "$search=\"a\\\"bc\"");
|
||||||
|
testUri.run("ESTwoKeyNav", "$search=%22abc%22");
|
||||||
|
testUri.run("ESTwoKeyNav", "$search=%22a%5C%22bc%22");
|
||||||
|
testUri.run("ESTwoKeyNav", "$search=%22a%5C%5Cbc%22");
|
||||||
|
|
||||||
|
// wrong escaped characters
|
||||||
|
testUri.runEx("ESTwoKeyNav", "$search=%22a%22bc%22")
|
||||||
|
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
|
||||||
|
testUri.runEx("ESTwoKeyNav", "$search=%22a%5Cbc%22")
|
||||||
|
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -176,6 +176,11 @@ public class TestUriValidator implements TestValidator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public TestUriValidator isExceptionMessage(final ODataLibraryException.MessageKey messageKey) {
|
||||||
|
assertEquals(messageKey, exception.getMessageKey());
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public TestUriValidator isExSyntax(final UriParserSyntaxException.MessageKeys messageKey) {
|
public TestUriValidator isExSyntax(final UriParserSyntaxException.MessageKeys messageKey) {
|
||||||
assertEquals(UriParserSyntaxException.class, exception.getClass());
|
assertEquals(UriParserSyntaxException.class, exception.getClass());
|
||||||
assertEquals(messageKey, exception.getMessageKey());
|
assertEquals(messageKey, exception.getMessageKey());
|
||||||
|
Loading…
x
Reference in New Issue
Block a user