mirror of https://github.com/apache/lucene.git
SOLR-6613: TextField.analyzeMultiTerm does not throw an exception when Analyzer returns no terms. (Bruno Roustant)
This commit is contained in:
parent
1cb085afcb
commit
72dea4919e
|
@ -371,6 +371,8 @@ Bug Fixes
|
||||||
affects splits triggered by the autoscale framework, which use async mode.
|
affects splits triggered by the autoscale framework, which use async mode.
|
||||||
(Megan Carey, Andy Vuong, Bilal Waheed, Ilan Ginzburg, yonik)
|
(Megan Carey, Andy Vuong, Bilal Waheed, Ilan Ginzburg, yonik)
|
||||||
|
|
||||||
|
* SOLR-6613: TextField.analyzeMultiTerm does not throw an exception when Analyzer returns no terms. (Bruno Roustant)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.QueryVisitor;
|
import org.apache.lucene.search.QueryVisitor;
|
||||||
import org.apache.lucene.search.RegexpQuery;
|
import org.apache.lucene.search.RegexpQuery;
|
||||||
import org.apache.lucene.search.WildcardQuery;
|
import org.apache.lucene.search.WildcardQuery;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.QueryBuilder;
|
import org.apache.lucene.util.QueryBuilder;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.apache.lucene.util.automaton.Automata;
|
import org.apache.lucene.util.automaton.Automata;
|
||||||
|
@ -995,8 +996,8 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
|
|
||||||
SchemaField sf = schema.getFieldOrNull((field));
|
SchemaField sf = schema.getFieldOrNull((field));
|
||||||
if (sf == null || ! (fieldType instanceof TextField)) return part;
|
if (sf == null || ! (fieldType instanceof TextField)) return part;
|
||||||
String out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer()).utf8ToString();
|
BytesRef out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer());
|
||||||
return out;
|
return out == null ? part : out.utf8ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -165,6 +165,16 @@ public class TextField extends FieldType {
|
||||||
return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
|
return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyzes a text part using the provided {@link Analyzer} for a multi-term query.
|
||||||
|
* <p>
|
||||||
|
* Expects a single token to be used as multi-term term. This single token might also be filtered out
|
||||||
|
* so zero token is supported and null is returned in this case.
|
||||||
|
*
|
||||||
|
* @return The multi-term term bytes; or null if there is no multi-term terms.
|
||||||
|
* @throws SolrException If the {@link Analyzer} tokenizes more than one token;
|
||||||
|
* or if an underlying {@link IOException} occurs.
|
||||||
|
*/
|
||||||
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
|
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
|
||||||
if (part == null || analyzerIn == null) return null;
|
if (part == null || analyzerIn == null) return null;
|
||||||
|
|
||||||
|
@ -173,8 +183,10 @@ public class TextField extends FieldType {
|
||||||
|
|
||||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
|
||||||
if (!source.incrementToken())
|
if (!source.incrementToken()) {
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
|
// Accept no tokens because it may have been filtered out by a StopFilter for example.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
|
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
|
||||||
if (source.incrementToken())
|
if (source.incrementToken())
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.FuzzyQuery;
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.SimpleParams;
|
import org.apache.solr.common.params.SimpleParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
@ -186,25 +187,29 @@ public class SimpleQParserPlugin extends QParserPlugin {
|
||||||
for (Map.Entry<String, Float> entry : weights.entrySet()) {
|
for (Map.Entry<String, Float> entry : weights.entrySet()) {
|
||||||
String field = entry.getKey();
|
String field = entry.getKey();
|
||||||
FieldType type = schema.getFieldType(field);
|
FieldType type = schema.getFieldType(field);
|
||||||
Query prefix;
|
Query prefix = null;
|
||||||
|
|
||||||
if (type instanceof TextField) {
|
if (type instanceof TextField) {
|
||||||
// If the field type is a TextField then use the multi term analyzer.
|
// If the field type is a TextField then use the multi term analyzer.
|
||||||
Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
|
Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
|
||||||
String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
|
BytesRef termBytes = TextField.analyzeMultiTerm(field, text, analyzer);
|
||||||
SchemaField sf = schema.getField(field);
|
if (termBytes != null) {
|
||||||
prefix = sf.getType().getPrefixQuery(qParser, sf, term);
|
String term = termBytes.utf8ToString();
|
||||||
|
SchemaField sf = schema.getField(field);
|
||||||
|
prefix = sf.getType().getPrefixQuery(qParser, sf, term);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// If the type is *not* a TextField don't do any analysis.
|
// If the type is *not* a TextField don't do any analysis.
|
||||||
SchemaField sf = schema.getField(field);
|
SchemaField sf = schema.getField(field);
|
||||||
prefix = type.getPrefixQuery(qParser, sf, text);
|
prefix = type.getPrefixQuery(qParser, sf, text);
|
||||||
}
|
}
|
||||||
|
if (prefix != null) {
|
||||||
float boost = entry.getValue();
|
float boost = entry.getValue();
|
||||||
if (boost != 1f) {
|
if (boost != 1f) {
|
||||||
prefix = new BoostQuery(prefix, boost);
|
prefix = new BoostQuery(prefix, boost);
|
||||||
|
}
|
||||||
|
bq.add(prefix, BooleanClause.Occur.SHOULD);
|
||||||
}
|
}
|
||||||
bq.add(prefix, BooleanClause.Occur.SHOULD);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return simplify(bq.build());
|
return simplify(bq.build());
|
||||||
|
@ -217,23 +222,27 @@ public class SimpleQParserPlugin extends QParserPlugin {
|
||||||
for (Map.Entry<String, Float> entry : weights.entrySet()) {
|
for (Map.Entry<String, Float> entry : weights.entrySet()) {
|
||||||
String field = entry.getKey();
|
String field = entry.getKey();
|
||||||
FieldType type = schema.getFieldType(field);
|
FieldType type = schema.getFieldType(field);
|
||||||
Query fuzzy;
|
Query fuzzy = null;
|
||||||
|
|
||||||
if (type instanceof TextField) {
|
if (type instanceof TextField) {
|
||||||
// If the field type is a TextField then use the multi term analyzer.
|
// If the field type is a TextField then use the multi term analyzer.
|
||||||
Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
|
Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
|
||||||
String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
|
BytesRef termBytes = TextField.analyzeMultiTerm(field, text, analyzer);
|
||||||
fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness);
|
if (termBytes != null) {
|
||||||
|
String term = termBytes.utf8ToString();
|
||||||
|
fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// If the type is *not* a TextField don't do any analysis.
|
// If the type is *not* a TextField don't do any analysis.
|
||||||
fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness);
|
fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness);
|
||||||
}
|
}
|
||||||
|
if (fuzzy != null) {
|
||||||
float boost = entry.getValue();
|
float boost = entry.getValue();
|
||||||
if (boost != 1f) {
|
if (boost != 1f) {
|
||||||
fuzzy = new BoostQuery(fuzzy, boost);
|
fuzzy = new BoostQuery(fuzzy, boost);
|
||||||
|
}
|
||||||
|
bq.add(fuzzy, BooleanClause.Occur.SHOULD);
|
||||||
}
|
}
|
||||||
bq.add(fuzzy, BooleanClause.Occur.SHOULD);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return simplify(bq.build());
|
return simplify(bq.build());
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.schema;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests directly {@link org.apache.solr.schema.TextField} methods.
|
||||||
|
*/
|
||||||
|
public class TestTextField extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAnalyzeMultiTerm() {
|
||||||
|
// No terms provided by the StopFilter (stop word) for the multi-term part.
|
||||||
|
// This is supported. Check TextField.analyzeMultiTerm returns null (and does not throw an exception).
|
||||||
|
BytesRef termBytes = TextField.analyzeMultiTerm("field", "the", new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET));
|
||||||
|
assertNull(termBytes);
|
||||||
|
|
||||||
|
// One term provided by the WhitespaceTokenizer for the multi-term part.
|
||||||
|
// This is the regular case. Check TextField.analyzeMultiTerm returns it (and does not throw an exception).
|
||||||
|
termBytes = TextField.analyzeMultiTerm("field", "Sol", new WhitespaceAnalyzer());
|
||||||
|
assertEquals("Sol", termBytes.utf8ToString());
|
||||||
|
|
||||||
|
// Two terms provided by the WhitespaceTokenizer for the multi-term part.
|
||||||
|
// This is not allowed. Expect an exception.
|
||||||
|
SolrException exception = expectThrows(SolrException.class, () -> TextField.analyzeMultiTerm("field", "term1 term2", new WhitespaceAnalyzer()));
|
||||||
|
assertEquals("Unexpected error code", SolrException.ErrorCode.BAD_REQUEST.code, exception.code());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue