From 0fc1e2c2f7b82c5dbb83edd76355b0a928b87b81 Mon Sep 17 00:00:00 2001 From: sabi0 <2sabio@gmail.com> Date: Mon, 8 Jan 2024 22:18:37 +0100 Subject: [PATCH] Code cleanups in EscapeQuerySyntaxImpl (#12973) --- .../queryparser/classic/QueryParserBase.java | 17 ++-- .../parser/EscapeQuerySyntaxImpl.java | 99 ++++++++++--------- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index f8d0d539e49..e7d8c55d723 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -246,8 +246,8 @@ public abstract class QueryParserBase extends QueryBuilder * Sets the boolean operator of the QueryParser. In default mode (OR_OPERATOR) terms * without any modifiers are considered optional: for example capital of Hungary is * equal to capital OR of OR Hungary.
- * In AND_OPERATOR mode terms are considered to be in conjunction: the above - * mentioned query is parsed as capital AND of AND Hungary + * In AND_OPERATOR mode terms are considered to be in conjunction: the + * above-mentioned query is parsed as capital AND of AND Hungary */ public void setDefaultOperator(Operator op) { this.operator = op; @@ -378,7 +378,7 @@ public abstract class QueryParserBase extends QueryBuilder // If this term is introduced by OR, make the preceding term optional, // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // notice if the input is a OR b, first term is parsed as required; without - // this modification a OR b would parsed as +a OR b + // this modification a OR b would be parsed as +a OR b BooleanClause c = clauses.get(clauses.size() - 1); if (!c.isProhibited()) clauses.set(clauses.size() - 1, new BooleanClause(c.getQuery(), Occur.SHOULD)); @@ -659,7 +659,7 @@ public abstract class QueryParserBase extends QueryBuilder * disallow */ protected Query getBooleanQuery(List clauses) throws ParseException { - if (clauses.size() == 0) { + if (clauses.isEmpty()) { return null; // all clause words were filtered away by the analyzer. } BooleanQuery.Builder query = newBooleanQuery(); @@ -902,8 +902,7 @@ public abstract class QueryParserBase extends QueryBuilder * Returns a String where the escape char has been removed, or kept only once if there was a * double escape. * - *

Supports escaped unicode characters, e. g. translates \\u0041 to A - * . + *

Supports escaped Unicode characters, e.g. translates {@code \u005Cu0041} to {@code A}. */ String discardEscapeChar(String input) throws ParseException { // Create char array to hold unescaped char sequence @@ -919,7 +918,7 @@ public abstract class QueryParserBase extends QueryBuilder boolean lastCharWasEscapeChar = false; // The multiplier the current unicode digit must be multiplied with. - // E. g. the first digit must be multiplied with 16^3, the second with 16^2... + // E.g. the first digit must be multiplied with 16^3, the second with 16^2... int codePointMultiplier = 0; // Used to calculate the codepoint of the escaped unicode character @@ -955,7 +954,7 @@ public abstract class QueryParserBase extends QueryBuilder } if (codePointMultiplier > 0) { - throw new ParseException("Truncated unicode escape sequence."); + throw new ParseException("Truncated Unicode escape sequence."); } if (lastCharWasEscapeChar) { @@ -966,7 +965,7 @@ public abstract class QueryParserBase extends QueryBuilder } /** Returns the numeric value of the hexadecimal character */ - static final int hexToInt(char c) throws ParseException { + static int hexToInt(char c) throws ParseException { if ('0' <= c && c <= '9') { return c - '0'; } else if ('a' <= c && c <= 'f') { diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/EscapeQuerySyntaxImpl.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/EscapeQuerySyntaxImpl.java index ea534875528..d8bb456301a 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/EscapeQuerySyntaxImpl.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/EscapeQuerySyntaxImpl.java @@ -40,20 +40,20 @@ public class EscapeQuerySyntaxImpl implements EscapeQuerySyntax { "AND", "OR", "NOT", "TO", "WITHIN", "SENTENCE", "PARAGRAPH", "INORDER" }; - private static final CharSequence escapeChar(CharSequence str, Locale locale) { - if (str == null || str.length() == 0) return str; + private static CharSequence escapeChar(CharSequence str, Locale locale) { + if (str == null || str.isEmpty()) return str; CharSequence buffer = str; - // regular escapable Char for terms - for (int i = 0; i < escapableTermChars.length; i++) { - buffer = replaceIgnoreCase(buffer, escapableTermChars[i].toLowerCase(locale), "\\", locale); + // regular escapable char for terms + for (String escapableTermChar : escapableTermChars) { + buffer = escapeIgnoringCase(buffer, escapableTermChar.toLowerCase(locale), "\\", locale); } - // First Character of a term as more escaping chars - for (int i = 0; i < escapableTermExtraFirstChars.length; i++) { - if (buffer.charAt(0) == escapableTermExtraFirstChars[i].charAt(0)) { - buffer = "\\" + buffer.charAt(0) + buffer.subSequence(1, buffer.length()); + // first char of a term as more escaping chars + for (String escapableTermExtraFirstChar : escapableTermExtraFirstChars) { + if (buffer.charAt(0) == escapableTermExtraFirstChar.charAt(0)) { + buffer = "\\" + buffer; break; } } @@ -61,84 +61,88 @@ public class EscapeQuerySyntaxImpl implements EscapeQuerySyntax { return buffer; } - private final CharSequence escapeQuoted(CharSequence str, Locale locale) { - if (str == null || str.length() == 0) return str; + private static CharSequence escapeQuoted(CharSequence str, Locale locale) { + if (str == null || str.isEmpty()) return str; CharSequence buffer = str; - for (int i = 0; i < escapableQuotedChars.length; i++) { - buffer = replaceIgnoreCase(buffer, escapableTermChars[i].toLowerCase(locale), "\\", locale); + for (String escapableQuotedChar : escapableQuotedChars) { + buffer = escapeIgnoringCase(buffer, escapableQuotedChar.toLowerCase(locale), "\\", locale); } return buffer; } - private static final CharSequence escapeTerm(CharSequence term, Locale locale) { - if (term == null) return term; + private static CharSequence escapeTerm(CharSequence term, Locale locale) { + if (term == null || term.isEmpty()) return term; - // Escape single Chars + // escape single chars term = escapeChar(term, locale); term = escapeWhiteChar(term, locale); - // Escape Parser Words - for (int i = 0; i < escapableWordTokens.length; i++) { - if (escapableWordTokens[i].equalsIgnoreCase(term.toString())) return "\\" + term; + // escape parser words + for (String escapableWordToken : escapableWordTokens) { + if (escapableWordToken.equalsIgnoreCase(term.toString())) return "\\" + term; } return term; } /** - * replace with ignore case + * Prepend every case-insensitive occurrence of the {@code sequence1} in the {@code string} with + * the {@code escapeChar}. When the {@code sequence1} is empty, every character in the {@code + * string} is escaped. * - * @param string string to get replaced + * @param string string to apply escaping to * @param sequence1 the old character sequence in lowercase - * @param escapeChar the new character to prefix sequence1 in return string. - * @return the new String + * @param escapeChar the escape character to prefix sequence1 in the returned string + * @return CharSequence with every occurrence of {@code sequence1} prepended with {@code + * escapeChar} */ - private static CharSequence replaceIgnoreCase( + private static CharSequence escapeIgnoringCase( CharSequence string, CharSequence sequence1, CharSequence escapeChar, Locale locale) { if (escapeChar == null || sequence1 == null || string == null) throw new NullPointerException(); - // empty string case int count = string.length(); int sequence1Length = sequence1.length(); + + // empty search string - escape every character if (sequence1Length == 0) { - StringBuilder result = new StringBuilder((count + 1) * escapeChar.length()); - result.append(escapeChar); + StringBuilder result = new StringBuilder(count * (1 + escapeChar.length())); for (int i = 0; i < count; i++) { - result.append(string.charAt(i)); result.append(escapeChar); + result.append(string.charAt(i)); } - return result.toString(); + return result; } // normal case + String lowercase = string.toString().toLowerCase(locale); StringBuilder result = new StringBuilder(); char first = sequence1.charAt(0); int start = 0, copyStart = 0, firstIndex; while (start < count) { - if ((firstIndex = string.toString().toLowerCase(locale).indexOf(first, start)) == -1) break; + if ((firstIndex = lowercase.indexOf(first, start)) == -1) break; boolean found = true; if (sequence1.length() > 1) { if (firstIndex + sequence1Length > count) break; for (int i = 1; i < sequence1Length; i++) { - if (string.toString().toLowerCase(locale).charAt(firstIndex + i) != sequence1.charAt(i)) { + if (lowercase.charAt(firstIndex + i) != sequence1.charAt(i)) { found = false; break; } } } if (found) { - result.append(string.toString().substring(copyStart, firstIndex)); + result.append(string, copyStart, firstIndex); result.append(escapeChar); - result.append(string.toString().substring(firstIndex, firstIndex + sequence1Length)); + result.append(string, firstIndex, firstIndex + sequence1Length); copyStart = start = firstIndex + sequence1Length; } else { start = firstIndex + 1; } } - if (result.length() == 0 && copyStart == 0) return string; - result.append(string.toString().substring(copyStart)); - return result.toString(); + if (result.isEmpty() && copyStart == 0) return string; + result.append(string, copyStart, string.length()); + return result; } /** @@ -148,25 +152,23 @@ public class EscapeQuerySyntaxImpl implements EscapeQuerySyntax { * @param locale locale to be used when performing string compares * @return the new String */ - private static final CharSequence escapeWhiteChar(CharSequence str, Locale locale) { - if (str == null || str.length() == 0) return str; + private static CharSequence escapeWhiteChar(CharSequence str, Locale locale) { + if (str == null || str.isEmpty()) return str; CharSequence buffer = str; - for (int i = 0; i < escapableWhiteChars.length; i++) { - buffer = replaceIgnoreCase(buffer, escapableWhiteChars[i].toLowerCase(locale), "\\", locale); + for (String escapableWhiteChar : escapableWhiteChars) { + buffer = escapeIgnoringCase(buffer, escapableWhiteChar.toLowerCase(locale), "\\", locale); } return buffer; } @Override public CharSequence escape(CharSequence text, Locale locale, Type type) { - if (text == null || text.length() == 0) return text; + if (text == null || text.isEmpty()) return text; - // escape wildcards and the escape char (this has to be perform before - // anything else) - // since we need to preserve the UnescapedCharSequence and escape the - // original escape chars + // escape wildcards and the escape char (this has to be performed before anything else) + // since we need to preserve the UnescapedCharSequence and escape the original escape chars if (text instanceof UnescapedCharSequence) { text = ((UnescapedCharSequence) text).toStringEscaped(wildcardChars); } else { @@ -184,7 +186,7 @@ public class EscapeQuerySyntaxImpl implements EscapeQuerySyntax { * Returns a String where the escape char has been removed, or kept only once if there was a * double escape. * - *

Supports escaped unicode characters, e. g. translates A to A. + *

Supports escaped Unicode characters, e.g. translates {@code \u005Cu0041} to {@code A}. */ public static UnescapedCharSequence discardEscapeChar(CharSequence input) throws ParseException { // Create char array to hold unescaped char sequence @@ -201,8 +203,7 @@ public class EscapeQuerySyntaxImpl implements EscapeQuerySyntax { boolean lastCharWasEscapeChar = false; // The multiplier the current unicode digit must be multiplied with. - // E. g. the first digit must be multiplied with 16^3, the second with - // 16^2... + // E.g. the first digit must be multiplied with 16^3, the second with 16^2... int codePointMultiplier = 0; // Used to calculate the codepoint of the escaped unicode character @@ -252,7 +253,7 @@ public class EscapeQuerySyntaxImpl implements EscapeQuerySyntax { } /** Returns the numeric value of the hexadecimal character */ - private static final int hexToInt(char c) throws ParseException { + private static int hexToInt(char c) throws ParseException { if ('0' <= c && c <= '9') { return c - '0'; } else if ('a' <= c && c <= 'f') {