From 72dea4919ebc79721167d451e7c7afa022aeee05 Mon Sep 17 00:00:00 2001 From: Bruno Roustant Date: Fri, 10 Jan 2020 16:52:49 +0100 Subject: [PATCH] SOLR-6613: TextField.analyzeMultiTerm does not throw an exception when Analyzer returns no terms. (Bruno Roustant) --- solr/CHANGES.txt | 2 + .../solr/parser/SolrQueryParserBase.java | 5 +- .../org/apache/solr/schema/TextField.java | 16 +++++- .../solr/search/SimpleQParserPlugin.java | 43 +++++++++------- .../org/apache/solr/schema/TestTextField.java | 50 +++++++++++++++++++ 5 files changed, 95 insertions(+), 21 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/schema/TestTextField.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index bb78a2b2645..0838026354a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -371,6 +371,8 @@ Bug Fixes affects splits triggered by the autoscale framework, which use async mode. (Megan Carey, Andy Vuong, Bilal Waheed, Ilan Ginzburg, yonik) +* SOLR-6613: TextField.analyzeMultiTerm does not throw an exception when Analyzer returns no terms. (Bruno Roustant) + Other Changes --------------------- diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index a03675d334b..5095ce2a0bf 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -44,6 +44,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; import org.apache.lucene.util.Version; import org.apache.lucene.util.automaton.Automata; @@ -995,8 +996,8 @@ public abstract class SolrQueryParserBase extends QueryBuilder { SchemaField sf = schema.getFieldOrNull((field)); if (sf == null || ! (fieldType instanceof TextField)) return part; - String out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer()).utf8ToString(); - return out; + BytesRef out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer()); + return out == null ? part : out.utf8ToString(); } diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 0d44eb7293a..3bad0f21fd4 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -165,6 +165,16 @@ public class TextField extends FieldType { return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive); } + /** + * Analyzes a text part using the provided {@link Analyzer} for a multi-term query. + *

+ * Expects a single token to be used as multi-term term. This single token might also be filtered out + * so zero token is supported and null is returned in this case. + * + * @return The multi-term term bytes; or null if there is no multi-term terms. + * @throws SolrException If the {@link Analyzer} tokenizes more than one token; + * or if an underlying {@link IOException} occurs. + */ public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) { if (part == null || analyzerIn == null) return null; @@ -173,8 +183,10 @@ public class TextField extends FieldType { TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); - if (!source.incrementToken()) - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part); + if (!source.incrementToken()) { + // Accept no tokens because it may have been filtered out by a StopFilter for example. + return null; + } BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef()); if (source.incrementToken()) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part); diff --git a/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java index 47e88ce6171..5096308cc17 100644 --- a/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java @@ -24,6 +24,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SimpleParams; import org.apache.solr.common.params.SolrParams; @@ -186,25 +187,29 @@ public class SimpleQParserPlugin extends QParserPlugin { for (Map.Entry entry : weights.entrySet()) { String field = entry.getKey(); FieldType type = schema.getFieldType(field); - Query prefix; + Query prefix = null; if (type instanceof TextField) { // If the field type is a TextField then use the multi term analyzer. Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer(); - String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString(); - SchemaField sf = schema.getField(field); - prefix = sf.getType().getPrefixQuery(qParser, sf, term); + BytesRef termBytes = TextField.analyzeMultiTerm(field, text, analyzer); + if (termBytes != null) { + String term = termBytes.utf8ToString(); + SchemaField sf = schema.getField(field); + prefix = sf.getType().getPrefixQuery(qParser, sf, term); + } } else { // If the type is *not* a TextField don't do any analysis. SchemaField sf = schema.getField(field); prefix = type.getPrefixQuery(qParser, sf, text); } - - float boost = entry.getValue(); - if (boost != 1f) { - prefix = new BoostQuery(prefix, boost); + if (prefix != null) { + float boost = entry.getValue(); + if (boost != 1f) { + prefix = new BoostQuery(prefix, boost); + } + bq.add(prefix, BooleanClause.Occur.SHOULD); } - bq.add(prefix, BooleanClause.Occur.SHOULD); } return simplify(bq.build()); @@ -217,23 +222,27 @@ public class SimpleQParserPlugin extends QParserPlugin { for (Map.Entry entry : weights.entrySet()) { String field = entry.getKey(); FieldType type = schema.getFieldType(field); - Query fuzzy; + Query fuzzy = null; if (type instanceof TextField) { // If the field type is a TextField then use the multi term analyzer. Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer(); - String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString(); - fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness); + BytesRef termBytes = TextField.analyzeMultiTerm(field, text, analyzer); + if (termBytes != null) { + String term = termBytes.utf8ToString(); + fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness); + } } else { // If the type is *not* a TextField don't do any analysis. fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness); } - - float boost = entry.getValue(); - if (boost != 1f) { - fuzzy = new BoostQuery(fuzzy, boost); + if (fuzzy != null) { + float boost = entry.getValue(); + if (boost != 1f) { + fuzzy = new BoostQuery(fuzzy, boost); + } + bq.add(fuzzy, BooleanClause.Occur.SHOULD); } - bq.add(fuzzy, BooleanClause.Occur.SHOULD); } return simplify(bq.build()); diff --git a/solr/core/src/test/org/apache/solr/schema/TestTextField.java b/solr/core/src/test/org/apache/solr/schema/TestTextField.java new file mode 100644 index 00000000000..9409908180a --- /dev/null +++ b/solr/core/src/test/org/apache/solr/schema/TestTextField.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; +import org.junit.Test; + +/** + * Tests directly {@link org.apache.solr.schema.TextField} methods. + */ +public class TestTextField extends SolrTestCaseJ4 { + + @Test + public void testAnalyzeMultiTerm() { + // No terms provided by the StopFilter (stop word) for the multi-term part. + // This is supported. Check TextField.analyzeMultiTerm returns null (and does not throw an exception). + BytesRef termBytes = TextField.analyzeMultiTerm("field", "the", new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET)); + assertNull(termBytes); + + // One term provided by the WhitespaceTokenizer for the multi-term part. + // This is the regular case. Check TextField.analyzeMultiTerm returns it (and does not throw an exception). + termBytes = TextField.analyzeMultiTerm("field", "Sol", new WhitespaceAnalyzer()); + assertEquals("Sol", termBytes.utf8ToString()); + + // Two terms provided by the WhitespaceTokenizer for the multi-term part. + // This is not allowed. Expect an exception. + SolrException exception = expectThrows(SolrException.class, () -> TextField.analyzeMultiTerm("field", "term1 term2", new WhitespaceAnalyzer())); + assertEquals("Unexpected error code", SolrException.ErrorCode.BAD_REQUEST.code, exception.code()); + } +} \ No newline at end of file