[OLINGO-1571] Fixed special chars for search
This commit is contained in:
parent
eb1a2d80f6
commit
683e471dc2
|
@ -124,83 +124,17 @@ public class SearchTokenizer {
|
||||||
|| isOtherDelimsForWord(character);
|
|| isOtherDelimsForWord(character);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <code>
|
* The check for allowed characters in a <code>SearchPhrase</code> assumes that
|
||||||
* <b>searchPhrase</b> = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
|
* the whole phrase is already percent decoded.
|
||||||
* <br/><br/>
|
* Hence, all characters are allowed besides the double quote (<code>"</code>).
|
||||||
* <b>qchar-no-AMP-DQUOTE</b> = qchar-unescaped / escape ( escape / quotation-mark )
|
|
||||||
* <br/><br/>
|
|
||||||
* <b>qchar-unescaped</b> = unreserved / pct-encoded-unescaped / other-delims /
|
|
||||||
* ":" / "@" / "/" / "?" / "$" / "'" / "="
|
|
||||||
* <br/><br/>
|
|
||||||
* <b>unreserved</b> = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
|
||||||
* <br/><br/>
|
|
||||||
* <b>escape</b> = "\" / "%5C" ; reverse solidus U+005C
|
|
||||||
* <br/><br/>
|
|
||||||
* <b>pct-encoded-unescaped</b> = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
|
|
||||||
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
|
|
||||||
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
|
|
||||||
* <br/><br/>
|
|
||||||
* <b>other-delims</b> = "!" / "(" / ")" / "*" / "+" / "," / ";"
|
|
||||||
* <br/><br/>
|
|
||||||
* <b>quotation-mark</b> = DQUOTE / "%22"
|
|
||||||
* <br/><br/>
|
|
||||||
* <b>ALPHA</b> = %x41-5A / %x61-7A
|
|
||||||
* <br/>
|
|
||||||
* <b>DIGIT</b> = %x30-39
|
|
||||||
* <br/>
|
|
||||||
* <b>DQUOTE</b> = %x22
|
|
||||||
* </code>
|
|
||||||
*
|
|
||||||
* Checks if given <code>character</code> is allowed for a search phrase.
|
|
||||||
* <b>ATTENTION:</b> Escaping and percent encoding is not be validated here (and can not be validated on
|
|
||||||
* a single character).<br/>
|
|
||||||
* Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link #QUOTATION_MARK} characters this method will
|
|
||||||
* return <code>FALSE</code>.<br/>
|
|
||||||
* <b>Furthermore</b> percent encoded characters are also not validated (and can not be validated on
|
|
||||||
* a single character).<br/>
|
|
||||||
* Hence for the <code>%</code> character this method assumeS that it was percent encoded and is now decoded
|
|
||||||
* and will return <code>TRUE</code>.<br/>
|
|
||||||
*
|
*
|
||||||
* @param character which is checked
|
* @param character which is checked
|
||||||
* @return true if character is allowed for a phrase
|
* @return true if character is allowed for a phrase
|
||||||
*/
|
*/
|
||||||
static boolean isAllowedPhrase(final char character) {
|
static boolean isAllowedPhrase(final char character) {
|
||||||
// the '%' is allowed because it is assumed that it was percent encoded and is now decoded
|
return Character.isUnicodeIdentifierStart(character)
|
||||||
return isQCharUnescaped(character)
|
|| character != '"';
|
||||||
|| character == '%'
|
|
||||||
|| Character.isUnicodeIdentifierStart(character);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
|
|
||||||
* @param character which is checked
|
|
||||||
* @return true if character is allowed
|
|
||||||
*/
|
|
||||||
private static boolean isQCharUnescaped(final char character) {
|
|
||||||
return isUnreserved(character)
|
|
||||||
|| isOtherDelims(character)
|
|
||||||
|| character == ':'
|
|
||||||
|| character == '@'
|
|
||||||
|| character == '/'
|
|
||||||
|| character == '$'
|
|
||||||
|| character == '\''
|
|
||||||
|| character == '=';
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";"
|
|
||||||
* @param character which is checked
|
|
||||||
* @return true if character is allowed
|
|
||||||
*/
|
|
||||||
private static boolean isOtherDelims(final char character) {
|
|
||||||
return character == '!'
|
|
||||||
|| character == '('
|
|
||||||
|| character == ')'
|
|
||||||
|| character == '*'
|
|
||||||
|| character == '+'
|
|
||||||
|| character == ','
|
|
||||||
|| character == ';';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -212,7 +146,6 @@ public class SearchTokenizer {
|
||||||
return character == '!'
|
return character == '!'
|
||||||
|| character == '*'
|
|| character == '*'
|
||||||
|| character == '+'
|
|| character == '+'
|
||||||
|| character == ','
|
|
||||||
|| character == ':'
|
|| character == ':'
|
||||||
|| character == '@'
|
|| character == '@'
|
||||||
|| character == '/'
|
|| character == '/'
|
||||||
|
@ -234,7 +167,7 @@ public class SearchTokenizer {
|
||||||
|| character == '<'
|
|| character == '<'
|
||||||
|| character == '`';
|
|| character == '`';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||||
* @param character which is checked
|
* @param character which is checked
|
||||||
|
@ -355,11 +288,11 @@ public class SearchTokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* As per the updated abnf
|
* As per the updated abnf
|
||||||
* https://github.com/oasis-tcs/odata-abnf/blob/master/abnf/odata-abnf-construction-rules.txt#L332-L356.
|
* https://github.com/oasis-tcs/odata-abnf/blob/master/abnf/odata-abnf-construction-rules.txt#L332-L356.
|
||||||
* searchWord = 1*( ALPHA / DIGIT / COMMA / "." / "-" / pct-encoded )
|
* searchWord = 1*( ALPHA / DIGIT / COMMA / "." / "-" / pct-encoded )
|
||||||
* This includes Unicode characters of categories
|
* This includes Unicode characters of categories
|
||||||
* L or N using UTF-8 and percent-encoding.
|
* L or N using UTF-8 and percent-encoding.
|
||||||
*/
|
*/
|
||||||
private class SearchWordState extends LiteralState {
|
private class SearchWordState extends LiteralState {
|
||||||
|
|
|
@ -6,9 +6,9 @@
|
||||||
* to you under the Apache License, Version 2.0 (the
|
* to you under the Apache License, Version 2.0 (the
|
||||||
* "License"); you may not use this file except in compliance
|
* "License"); you may not use this file except in compliance
|
||||||
* with the License. You may obtain a copy of the License at
|
* with the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing,
|
* Unless required by applicable law or agreed to in writing,
|
||||||
* software distributed under the License is distributed on an
|
* software distributed under the License is distributed on an
|
||||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
@ -107,6 +107,13 @@ public class SearchTokenizerTest {
|
||||||
assertQuery("abc or \"xyz\"").resultsIn(WORD, WORD, PHRASE);
|
assertQuery("abc or \"xyz\"").resultsIn(WORD, WORD, PHRASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parsePhrase_decoded() throws Exception {
|
||||||
|
assertQuery("\"a & b\"").resultsIn(PHRASE);
|
||||||
|
assertQuery("\" ! # $ % & ' ( ) * + , / : ; = ? @ [ ] \"").resultsIn(PHRASE);
|
||||||
|
assertQuery("\" - . < > ^ _ ` { | } ~ \"").resultsIn(PHRASE);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void parseNot() throws Exception {
|
public void parseNot() throws Exception {
|
||||||
assertQuery("NOT").resultsIn(NOT);
|
assertQuery("NOT").resultsIn(NOT);
|
||||||
|
@ -401,4 +408,4 @@ public class SearchTokenizerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue