diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bb78a2b2645..0838026354a 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -371,6 +371,8 @@ Bug Fixes
affects splits triggered by the autoscale framework, which use async mode.
(Megan Carey, Andy Vuong, Bilal Waheed, Ilan Ginzburg, yonik)
+* SOLR-6613: TextField.analyzeMultiTerm does not throw an exception when Analyzer returns no terms. (Bruno Roustant)
+
Other Changes
---------------------
diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
index a03675d334b..5095ce2a0bf 100644
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@@ -44,6 +44,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.Automata;
@@ -995,8 +996,8 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
SchemaField sf = schema.getFieldOrNull((field));
if (sf == null || ! (fieldType instanceof TextField)) return part;
- String out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer()).utf8ToString();
- return out;
+ BytesRef out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer());
+ return out == null ? part : out.utf8ToString();
}
diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java
index 0d44eb7293a..3bad0f21fd4 100644
--- a/solr/core/src/java/org/apache/solr/schema/TextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TextField.java
@@ -165,6 +165,16 @@ public class TextField extends FieldType {
return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
}
+ /**
+ * Analyzes a text part using the provided {@link Analyzer} for a multi-term query.
+ *
+ * Expects a single token to be used as multi-term term. This single token might also be filtered out
+ * so zero token is supported and null is returned in this case.
+ *
+ * @return The multi-term term bytes; or null if there is no multi-term terms.
+ * @throws SolrException If the {@link Analyzer} tokenizes more than one token;
+ * or if an underlying {@link IOException} occurs.
+ */
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
if (part == null || analyzerIn == null) return null;
@@ -173,8 +183,10 @@ public class TextField extends FieldType {
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
- if (!source.incrementToken())
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
+ if (!source.incrementToken()) {
+ // Accept no tokens because it may have been filtered out by a StopFilter for example.
+ return null;
+ }
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
if (source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
diff --git a/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java
index 47e88ce6171..5096308cc17 100644
--- a/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java
@@ -24,6 +24,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SimpleParams;
import org.apache.solr.common.params.SolrParams;
@@ -186,25 +187,29 @@ public class SimpleQParserPlugin extends QParserPlugin {
for (Map.Entry entry : weights.entrySet()) {
String field = entry.getKey();
FieldType type = schema.getFieldType(field);
- Query prefix;
+ Query prefix = null;
if (type instanceof TextField) {
// If the field type is a TextField then use the multi term analyzer.
Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
- String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
- SchemaField sf = schema.getField(field);
- prefix = sf.getType().getPrefixQuery(qParser, sf, term);
+ BytesRef termBytes = TextField.analyzeMultiTerm(field, text, analyzer);
+ if (termBytes != null) {
+ String term = termBytes.utf8ToString();
+ SchemaField sf = schema.getField(field);
+ prefix = sf.getType().getPrefixQuery(qParser, sf, term);
+ }
} else {
// If the type is *not* a TextField don't do any analysis.
SchemaField sf = schema.getField(field);
prefix = type.getPrefixQuery(qParser, sf, text);
}
-
- float boost = entry.getValue();
- if (boost != 1f) {
- prefix = new BoostQuery(prefix, boost);
+ if (prefix != null) {
+ float boost = entry.getValue();
+ if (boost != 1f) {
+ prefix = new BoostQuery(prefix, boost);
+ }
+ bq.add(prefix, BooleanClause.Occur.SHOULD);
}
- bq.add(prefix, BooleanClause.Occur.SHOULD);
}
return simplify(bq.build());
@@ -217,23 +222,27 @@ public class SimpleQParserPlugin extends QParserPlugin {
for (Map.Entry entry : weights.entrySet()) {
String field = entry.getKey();
FieldType type = schema.getFieldType(field);
- Query fuzzy;
+ Query fuzzy = null;
if (type instanceof TextField) {
// If the field type is a TextField then use the multi term analyzer.
Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
- String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
- fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness);
+ BytesRef termBytes = TextField.analyzeMultiTerm(field, text, analyzer);
+ if (termBytes != null) {
+ String term = termBytes.utf8ToString();
+ fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness);
+ }
} else {
// If the type is *not* a TextField don't do any analysis.
fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness);
}
-
- float boost = entry.getValue();
- if (boost != 1f) {
- fuzzy = new BoostQuery(fuzzy, boost);
+ if (fuzzy != null) {
+ float boost = entry.getValue();
+ if (boost != 1f) {
+ fuzzy = new BoostQuery(fuzzy, boost);
+ }
+ bq.add(fuzzy, BooleanClause.Occur.SHOULD);
}
- bq.add(fuzzy, BooleanClause.Occur.SHOULD);
}
return simplify(bq.build());
diff --git a/solr/core/src/test/org/apache/solr/schema/TestTextField.java b/solr/core/src/test/org/apache/solr/schema/TestTextField.java
new file mode 100644
index 00000000000..9409908180a
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/schema/TestTextField.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.schema;
+
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrException;
+import org.junit.Test;
+
+/**
+ * Tests directly {@link org.apache.solr.schema.TextField} methods.
+ */
+public class TestTextField extends SolrTestCaseJ4 {
+
+ @Test
+ public void testAnalyzeMultiTerm() {
+ // No terms provided by the StopFilter (stop word) for the multi-term part.
+ // This is supported. Check TextField.analyzeMultiTerm returns null (and does not throw an exception).
+ BytesRef termBytes = TextField.analyzeMultiTerm("field", "the", new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET));
+ assertNull(termBytes);
+
+ // One term provided by the WhitespaceTokenizer for the multi-term part.
+ // This is the regular case. Check TextField.analyzeMultiTerm returns it (and does not throw an exception).
+ termBytes = TextField.analyzeMultiTerm("field", "Sol", new WhitespaceAnalyzer());
+ assertEquals("Sol", termBytes.utf8ToString());
+
+ // Two terms provided by the WhitespaceTokenizer for the multi-term part.
+ // This is not allowed. Expect an exception.
+ SolrException exception = expectThrows(SolrException.class, () -> TextField.analyzeMultiTerm("field", "term1 term2", new WhitespaceAnalyzer()));
+ assertEquals("Unexpected error code", SolrException.ErrorCode.BAD_REQUEST.code, exception.code());
+ }
+}
\ No newline at end of file