SOLR-5490: SimpleQParser uses multiterm analysis chain for prefixes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1544837 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-11-23 17:12:14 +00:00
parent fd6268a9a8
commit 2ef13e06f0
4 changed files with 51 additions and 3 deletions

View File

@ -84,7 +84,7 @@ New Features
* SOLR-5287: You can edit files in the conf directory from the admin UI
(Erick Erickson, Stefan Matheis)
* SOLR-5447: Add a QParserPlugin for Lucene's SimpleQueryParser.
* SOLR-5447, SOLR-5490: Add a QParserPlugin for Lucene's SimpleQueryParser.
(Jack Conradson via shalin)
* SOLR-5446: Admin UI - Allow changing Schema and Config (steffkes)

View File

@ -17,8 +17,12 @@ package org.apache.solr.search;
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SimpleParams;
@ -26,6 +30,9 @@ import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.parser.QueryParser;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.TextField;
import org.apache.solr.util.SolrPluginUtils;
import java.util.HashMap;
@ -154,7 +161,35 @@ public class SimpleQParserPlugin extends QParserPlugin {
}
// Create a SimpleQueryParser using the analyzer from the schema.
final SimpleQueryParser parser = new SimpleQueryParser(req.getSchema().getAnalyzer(), queryFields, enabledOps);
final IndexSchema schema = req.getSchema();
final SimpleQueryParser parser = new SimpleQueryParser(req.getSchema().getAnalyzer(), queryFields, enabledOps) {
// Override newPrefixQuery to provide a multi term analyzer for prefix queries run against TextFields.
@Override
protected Query newPrefixQuery(String text) {
BooleanQuery bq = new BooleanQuery(true);
for (Map.Entry<String, Float> entry : weights.entrySet()) {
String field = entry.getKey();
FieldType type = schema.getFieldType(field);
Query prefix;
if (type instanceof TextField) {
// If the field type is a TextField then use the multi term analyzer.
Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
prefix = new PrefixQuery(new Term(field, term));
} else {
// If the type is *not* a TextField don't do any analysis.
prefix = new PrefixQuery(new Term(entry.getKey(), text));
}
prefix.setBoost(entry.getValue());
bq.add(prefix, BooleanClause.Occur.SHOULD);
}
return simplify(bq);
}
};
// Set the default operator to be either 'AND' or 'OR' for the query.
QueryParser.Operator defaultOp = QueryParsing.getQueryParserDefaultOperator(req.getSchema(), defaultParams.get(QueryParsing.OP));

View File

@ -28,6 +28,7 @@
<analyzer>
<tokenizer class="solr.MockTokenizerFactory" pattern="whitespace"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldtype>

View File

@ -17,6 +17,9 @@ package org.apache.solr.search;
* limitations under the License.
*/
import org.apache.lucene.index.Term;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
@ -44,7 +47,8 @@ public class TestSimpleQParserPlugin extends SolrTestCaseJ4 {
assertU(adoc("id", "53", "text0", "close", "text1", ")", "text-keyword0", ")"));
assertU(adoc("id", "54", "text0", "escape", "text1", "\\", "text-keyword0", "\\"));
assertU(adoc("id", "55", "text0", "whitespace", "text1", "whitespace", "text-keyword0", " "));
assertU(adoc("id", "55", "text0", "whitespace", "text1", "whitespace", "text-keyword0", "\n"));
assertU(adoc("id", "56", "text0", "whitespace", "text1", "whitespace", "text-keyword0", "\n"));
assertU(adoc("id", "57", "text0", "foobar", "text1", "foo bar", "text-keyword0", "fb"));
assertU(commit());
}
@ -205,4 +209,12 @@ public class TestSimpleQParserPlugin extends SolrTestCaseJ4 {
"q.op", "AND"), "/response/numFound==1");
assertJQ(req("defType", "simple", "qf", "text1", "q", "t2 t3"), "/response/numFound==2");
}
/** Test that multiterm analysis chain is used for prefix. */
public void testPrefixChain() throws Exception {
assertJQ(req("defType", "simple", "qf", "text0", "q", "FOOBAR*"), "/response/numFound==1");
assertJQ(req("defType", "simple", "qf", "text0", "q", "Fóóbar*"), "/response/numFound==1");
assertJQ(req("defType", "simple", "qf", "text0", "q", "FOO*"), "/response/numFound==1");
assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR*"), "/response/numFound==0");
}
}