SOLR-6613: TextField.analyzeMultiTerm does not throw an exception when Analyzer returns no terms. (Bruno Roustant)

2020-01-10 16:52:49 +01:00 · 2020-01-10 16:52:49 +01:00 · 72dea4919e
parent 1cb085afcb
commit 72dea4919e
5 changed files with 95 additions and 21 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -371,6 +371,8 @@ Bug Fixes
  affects splits triggered by the autoscale framework, which use async mode.
  (Megan Carey, Andy Vuong, Bilal Waheed, Ilan Ginzburg, yonik)
 * SOLR-6613: TextField.analyzeMultiTerm does not throw an exception when Analyzer returns no terms. (Bruno Roustant)
 Other Changes
 ---------------------
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@ -44,6 +44,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.search.RegexpQuery;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.QueryBuilder;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util.automaton.Automata;
@ -995,8 +996,8 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
    SchemaField sf = schema.getFieldOrNull((field));
    if (sf == null || ! (fieldType instanceof TextField)) return part;
-    String out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer()).utf8ToString();
+    BytesRef out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer());
-    return out;
+    return out == null ? part : out.utf8ToString();
  }
--- a/solr/core/src/java/org/apache/solr/schema/TextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TextField.java
@ -165,6 +165,16 @@ public class TextField extends FieldType {
    return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
  }
  /**
   * Analyzes a text part using the provided {@link Analyzer} for a multi-term query.
   * <p>
   * Expects a single token to be used as multi-term term. This single token might also be filtered out
   * so zero token is supported and null is returned in this case.
   *
   * @return The multi-term term bytes; or null if there is no multi-term terms.
   * @throws SolrException If the {@link Analyzer} tokenizes more than one token;
   * or if an underlying {@link IOException} occurs.
   */
  public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
    if (part == null || analyzerIn == null) return null;
@ -173,8 +183,10 @@ public class TextField extends FieldType {
      TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
-      if (!source.incrementToken())
+      if (!source.incrementToken()) {
-        throw  new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
+        // Accept no tokens because it may have been filtered out by a StopFilter for example.
        return null;
      }
      BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
      if (source.incrementToken())
        throw  new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
--- a/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java
@ -24,6 +24,7 @@ import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SimpleParams;
 import org.apache.solr.common.params.SolrParams;
@ -186,25 +187,29 @@ public class SimpleQParserPlugin extends QParserPlugin {
      for (Map.Entry<String, Float> entry : weights.entrySet()) {
        String field = entry.getKey();
        FieldType type = schema.getFieldType(field);
-        Query prefix;
+        Query prefix = null;
        if (type instanceof TextField) {
          // If the field type is a TextField then use the multi term analyzer.
          Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
-          String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
+          BytesRef termBytes = TextField.analyzeMultiTerm(field, text, analyzer);
-          SchemaField sf = schema.getField(field);
+          if (termBytes != null) {
-          prefix = sf.getType().getPrefixQuery(qParser, sf, term);
+            String term = termBytes.utf8ToString();
            SchemaField sf = schema.getField(field);
            prefix = sf.getType().getPrefixQuery(qParser, sf, term);
          }
        } else {
          // If the type is *not* a TextField don't do any analysis.
          SchemaField sf = schema.getField(field);
          prefix = type.getPrefixQuery(qParser, sf, text);
        }
-
+        if (prefix != null) {
-        float boost = entry.getValue();
+          float boost = entry.getValue();
-        if (boost != 1f) {
+          if (boost != 1f) {
-          prefix = new BoostQuery(prefix, boost);
+            prefix = new BoostQuery(prefix, boost);
          }
          bq.add(prefix, BooleanClause.Occur.SHOULD);
        }
        bq.add(prefix, BooleanClause.Occur.SHOULD);
      }
      return simplify(bq.build());
@ -217,23 +222,27 @@ public class SimpleQParserPlugin extends QParserPlugin {
      for (Map.Entry<String, Float> entry : weights.entrySet()) {
        String field = entry.getKey();
        FieldType type = schema.getFieldType(field);
-        Query fuzzy;
+        Query fuzzy = null;
        if (type instanceof TextField) {
          // If the field type is a TextField then use the multi term analyzer.
          Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
-          String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
+          BytesRef termBytes = TextField.analyzeMultiTerm(field, text, analyzer);
-          fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness);
+          if (termBytes != null) {
            String term = termBytes.utf8ToString();
            fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness);
          }
        } else {
          // If the type is *not* a TextField don't do any analysis.
          fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness);
        }
-
+        if (fuzzy != null) {
-        float boost = entry.getValue();
+          float boost = entry.getValue();
-        if (boost != 1f) {
+          if (boost != 1f) {
-          fuzzy = new BoostQuery(fuzzy, boost);
+            fuzzy = new BoostQuery(fuzzy, boost);
          }
          bq.add(fuzzy, BooleanClause.Occur.SHOULD);
        }
        bq.add(fuzzy, BooleanClause.Occur.SHOULD);
      }
      return simplify(bq.build());
--- a/solr/core/src/test/org/apache/solr/schema/TestTextField.java
+++ b/solr/core/src/test/org/apache/solr/schema/TestTextField.java
@ -0,0 +1,50 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.solr.schema;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrException;
 import org.junit.Test;
 /**
 * Tests directly {@link org.apache.solr.schema.TextField} methods.
 */
 public class TestTextField extends SolrTestCaseJ4 {
  @Test
  public void testAnalyzeMultiTerm() {
    // No terms provided by the StopFilter (stop word) for the multi-term part.
    // This is supported. Check TextField.analyzeMultiTerm returns null (and does not throw an exception).
    BytesRef termBytes = TextField.analyzeMultiTerm("field", "the", new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET));
    assertNull(termBytes);
    // One term provided by the WhitespaceTokenizer for the multi-term part.
    // This is the regular case. Check TextField.analyzeMultiTerm returns it (and does not throw an exception).
    termBytes = TextField.analyzeMultiTerm("field", "Sol", new WhitespaceAnalyzer());
    assertEquals("Sol", termBytes.utf8ToString());
    // Two terms provided by the WhitespaceTokenizer for the multi-term part.
    // This is not allowed. Expect an exception.
    SolrException exception = expectThrows(SolrException.class, () -> TextField.analyzeMultiTerm("field", "term1 term2", new WhitespaceAnalyzer()));
    assertEquals("Unexpected error code", SolrException.ErrorCode.BAD_REQUEST.code, exception.code());
  }
 }