SOLR-9933: SolrCoreParser now supports configuration of custom SpanQueryBuilder classes. (Daniel Collins, Christine Poerschke)

This commit is contained in:
Christine Poerschke 2017-01-31 09:59:44 +00:00
parent 98a5a81378
commit 7467866364
7 changed files with 270 additions and 15 deletions

View File

@ -121,6 +121,9 @@ New Features
* SOLR-8029: Added new style APIs and a framework for creating new APIs and mapping old APIs to new
(noble, Steve Rowe, Cassandra Targett, Timothy Potter)
* SOLR-9933: SolrCoreParser now supports configuration of custom SpanQueryBuilder classes.
(Daniel Collins, Christine Poerschke)
Bug Fixes
----------------------

View File

@ -16,15 +16,20 @@
*/
package org.apache.solr.search;
import java.lang.invoke.MethodHandles;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.xml.CoreParser;
import org.apache.lucene.queryparser.xml.QueryBuilder;
import org.apache.lucene.queryparser.xml.builders.SpanQueryBuilder;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Assembles a QueryBuilder which uses Query objects from Solr's <code>search</code> module
@ -32,6 +37,8 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin;
*/
public class SolrCoreParser extends CoreParser implements NamedListInitializedPlugin {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected final SolrQueryRequest req;
public SolrCoreParser(String defaultField, Analyzer analyzer,
@ -58,14 +65,35 @@ public class SolrCoreParser extends CoreParser implements NamedListInitializedPl
final String queryName = entry.getKey();
final String queryBuilderClassName = (String)entry.getValue();
final SolrQueryBuilder queryBuilder = loader.newInstance(
queryBuilderClassName,
SolrQueryBuilder.class,
null,
new Class[] {String.class, Analyzer.class, SolrQueryRequest.class, QueryBuilder.class},
new Object[] {defaultField, analyzer, req, this});
try {
final SolrSpanQueryBuilder spanQueryBuilder = loader.newInstance(
queryBuilderClassName,
SolrSpanQueryBuilder.class,
null,
new Class[] {String.class, Analyzer.class, SolrQueryRequest.class, SpanQueryBuilder.class},
new Object[] {defaultField, analyzer, req, this});
this.queryFactory.addBuilder(queryName, queryBuilder);
this.addSpanQueryBuilder(queryName, spanQueryBuilder);
} catch (Exception outerException) {
try {
final SolrQueryBuilder queryBuilder = loader.newInstance(
queryBuilderClassName,
SolrQueryBuilder.class,
null,
new Class[] {String.class, Analyzer.class, SolrQueryRequest.class, QueryBuilder.class},
new Object[] {defaultField, analyzer, req, this});
this.addQueryBuilder(queryName, queryBuilder);
} catch (Exception innerException) {
log.error("Class {} not found or not suitable: {} {}",
queryBuilderClassName, outerException, innerException);
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Cannot find suitable "
+ SolrSpanQueryBuilder.class.getCanonicalName() + " or "
+ SolrQueryBuilder.class.getCanonicalName() + " class: "
+ queryBuilderClassName + " in "
+ loader);
}
}
}
}

View File

@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.xml.builders.SpanQueryBuilder;
import org.apache.solr.request.SolrQueryRequest;
public abstract class SolrSpanQueryBuilder extends SolrQueryBuilder implements SpanQueryBuilder {
protected final SpanQueryBuilder spanFactory;
public SolrSpanQueryBuilder(String defaultField, Analyzer analyzer,
SolrQueryRequest req, SpanQueryBuilder spanFactory) {
super(defaultField, analyzer, req, spanFactory);
this.spanFactory = spanFactory;
}
}

View File

@ -20,7 +20,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.queryparser.xml.QueryBuilder;
import org.apache.lucene.queryparser.xml.builders.SpanQueryBuilder;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
@ -28,14 +28,18 @@ import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.solr.request.SolrQueryRequest;
import org.w3c.dom.Element;
public class ApacheLuceneSolrNearQueryBuilder extends SolrQueryBuilder {
public class ApacheLuceneSolrNearQueryBuilder extends SolrSpanQueryBuilder {
public ApacheLuceneSolrNearQueryBuilder(String defaultField, Analyzer analyzer,
SolrQueryRequest req, QueryBuilder queryFactory) {
super(defaultField, analyzer, req, queryFactory);
SolrQueryRequest req, SpanQueryBuilder spanFactory) {
super(defaultField, analyzer, req, spanFactory);
}
public Query getQuery(Element e) throws ParserException {
return getSpanQuery(e);
}
public SpanQuery getSpanQuery(Element e) throws ParserException {
final String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
final SpanQuery[] spanQueries = new SpanQuery[]{
new SpanTermQuery(new Term(fieldName, "Apache")),

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.queryparser.xml.builders.SpanQueryBuilder;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.solr.request.SolrQueryRequest;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
public class ChooseOneWordQueryBuilder extends SolrSpanQueryBuilder {
public ChooseOneWordQueryBuilder(String defaultField, Analyzer analyzer, SolrQueryRequest req,
SpanQueryBuilder spanFactory) {
super(defaultField, analyzer, req, spanFactory);
}
public Query getQuery(Element e) throws ParserException {
return implGetQuery(e, false);
}
public SpanQuery getSpanQuery(Element e) throws ParserException {
return (SpanQuery)implGetQuery(e, true);
}
public Query implGetQuery(Element e, boolean span) throws ParserException {
Term term = null;
final String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
for (Node node = e.getFirstChild(); node != null; node = node.getNextSibling()) {
if (node.getNodeType() == Node.ELEMENT_NODE &&
node.getNodeName().equals("Word")) {
final String word = DOMUtils.getNonBlankTextOrFail((Element) node);
final Term t = new Term(fieldName, word);
if (term == null || term.text().length() < t.text().length()) {
term = t;
}
}
}
return (span ? new SpanTermQuery(term) : new TermQuery(term));
}
}

View File

@ -19,20 +19,22 @@ package org.apache.solr.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.queryparser.xml.QueryBuilder;
import org.apache.lucene.queryparser.xml.builders.SpanQueryBuilder;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.solr.request.SolrQueryRequest;
import org.w3c.dom.Element;
// A simple test query builder to demonstrate use of
// SolrQueryBuilder's queryFactory constructor argument.
public class HandyQueryBuilder extends SolrQueryBuilder {
public class HandyQueryBuilder extends SolrSpanQueryBuilder {
public HandyQueryBuilder(String defaultField, Analyzer analyzer,
SolrQueryRequest req, QueryBuilder queryFactory) {
super(defaultField, analyzer, req, queryFactory);
SolrQueryRequest req, SpanQueryBuilder spanFactory) {
super(defaultField, analyzer, req, spanFactory);
}
public Query getQuery(Element e) throws ParserException {
@ -44,9 +46,24 @@ public class HandyQueryBuilder extends SolrQueryBuilder {
return bq.build();
}
public SpanQuery getSpanQuery(Element e) throws ParserException {
SpanQuery subQueries[] = {
getSubSpanQuery(e, "Left"),
getSubSpanQuery(e, "Right"),
};
return new SpanOrQuery(subQueries);
}
private Query getSubQuery(Element e, String name) throws ParserException {
Element subE = DOMUtils.getChildByTagOrFail(e, name);
subE = DOMUtils.getFirstChildOrFail(subE);
return queryFactory.getQuery(subE);
}
private SpanQuery getSubSpanQuery(Element e, String name) throws ParserException {
Element subE = DOMUtils.getChildByTagOrFail(e, name);
subE = DOMUtils.getFirstChildOrFail(subE);
return spanFactory.getSpanQuery(subE);
}
}

View File

@ -24,13 +24,18 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.xml.CoreParser;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.common.util.NamedList;
@ -52,6 +57,7 @@ public class TestSolrCoreParser extends LuceneTestCase {
args.add("GoodbyeQuery", GoodbyeQueryBuilder.class.getCanonicalName());
args.add("HandyQuery", HandyQueryBuilder.class.getCanonicalName());
args.add("ApacheLuceneSolr", ApacheLuceneSolrNearQueryBuilder.class.getCanonicalName());
args.add("ChooseOneWord", ChooseOneWordQueryBuilder.class.getCanonicalName());
solrCoreParser.init(args);
}
}
@ -85,6 +91,10 @@ public class TestSolrCoreParser extends LuceneTestCase {
public void testApacheLuceneSolr() throws IOException, ParserException {
final String fieldName = "contents";
final Query query = parseXmlString("<ApacheLuceneSolr fieldName='"+fieldName+"'/>");
checkApacheLuceneSolr(query, fieldName);
}
private static void checkApacheLuceneSolr(Query query, String fieldName) {
assertTrue(query instanceof SpanNearQuery);
final SpanNearQuery snq = (SpanNearQuery)query;
assertEquals(fieldName, snq.getField());
@ -96,6 +106,7 @@ public class TestSolrCoreParser extends LuceneTestCase {
assertTrue(snq.getClauses()[2] instanceof SpanTermQuery);
}
// test custom query (HandyQueryBuilder) wrapping a Query
public void testHandyQuery() throws IOException, ParserException {
final String lhsXml = "<HelloQuery/>";
final String rhsXml = "<GoodbyeQuery/>";
@ -107,4 +118,101 @@ public class TestSolrCoreParser extends LuceneTestCase {
assertTrue(bq.clauses().get(1).getQuery() instanceof MatchNoDocsQuery);
}
private static SpanQuery unwrapSpanBoostQuery(Query query) {
assertTrue(query instanceof SpanBoostQuery);
final SpanBoostQuery spanBoostQuery = (SpanBoostQuery)query;
return spanBoostQuery.getQuery();
}
// test custom query (HandyQueryBuilder) wrapping a SpanQuery
public void testHandySpanQuery() throws IOException, ParserException {
final String lhsXml = "<SpanOr fieldName='contents'>"
+ "<SpanTerm>rain</SpanTerm>"
+ "<SpanTerm>spain</SpanTerm>"
+ "<SpanTerm>plain</SpanTerm>"
+ "</SpanOr>";
final String rhsXml = "<SpanNear fieldName='contents' slop='2' inOrder='true'>"
+ "<SpanTerm>sunny</SpanTerm>"
+ "<SpanTerm>sky</SpanTerm>"
+ "</SpanNear>";
final Query query = parseHandyQuery(lhsXml, rhsXml);
final BooleanQuery bq = (BooleanQuery)query;
assertEquals(2, bq.clauses().size());
for (int ii=0; ii<bq.clauses().size(); ++ii) {
final Query clauseQuery = bq.clauses().get(ii).getQuery();
switch (ii) {
case 0:
assertTrue(unwrapSpanBoostQuery(clauseQuery) instanceof SpanOrQuery);
break;
case 1:
assertTrue(unwrapSpanBoostQuery(clauseQuery) instanceof SpanNearQuery);
break;
default:
fail("unexpected clause index "+ii);
}
}
}
private static String composeChooseOneWordQueryXml(String fieldName, String... termTexts) {
final StringBuilder sb = new StringBuilder("<ChooseOneWord fieldName='"+fieldName+"'>");
for (String termText : termTexts) {
sb.append("<Word>").append(termText).append("</Word>");
}
sb.append("</ChooseOneWord>");
return sb.toString();
}
// test custom queries being wrapped in a Query or SpanQuery
public void testCustomQueryWrapping() throws IOException, ParserException {
final boolean span = random().nextBoolean();
// the custom queries
final String fieldName = "contents";
final String[] randomTerms = new String[] {"bumble", "honey", "solitary"};
final String randomQuery = composeChooseOneWordQueryXml(fieldName, randomTerms);
final String apacheLuceneSolr = "<ApacheLuceneSolr fieldName='"+fieldName+"'/>";
// the wrapping query
final String parentQuery = (span ? "SpanOr" : "BooleanQuery");
final String subQueryPrefix = (span ? "" : "<Clause occurs='must'>");
final String subQuerySuffix = (span ? "" : "</Clause>");
final String xml = "<"+parentQuery+">"
+ subQueryPrefix+randomQuery+subQuerySuffix
+ subQueryPrefix+apacheLuceneSolr+subQuerySuffix
+ "</"+parentQuery+">";
// the test
final Query query = parseXmlString(xml);
if (span) {
assertTrue(unwrapSpanBoostQuery(query) instanceof SpanOrQuery);
final SpanOrQuery soq = (SpanOrQuery)unwrapSpanBoostQuery(query);
assertEquals(2, soq.getClauses().length);
checkChooseOneWordQuery(span, soq.getClauses()[0], fieldName, randomTerms);
checkApacheLuceneSolr(soq.getClauses()[1], fieldName);
} else {
assertTrue(query instanceof BooleanQuery);
final BooleanQuery bq = (BooleanQuery)query;
assertEquals(2, bq.clauses().size());
checkChooseOneWordQuery(span, bq.clauses().get(0).getQuery(), fieldName, randomTerms);
checkApacheLuceneSolr(bq.clauses().get(1).getQuery(), fieldName);
}
}
private static void checkChooseOneWordQuery(boolean span, Query query, String fieldName, String ... expectedTermTexts) {
final Term term;
if (span) {
assertTrue(query instanceof SpanTermQuery);
final SpanTermQuery stq = (SpanTermQuery)query;
term = stq.getTerm();
} else {
assertTrue(query instanceof TermQuery);
final TermQuery tq = (TermQuery)query;
term = tq.getTerm();
}
final String text = term.text();
boolean foundExpected = false;
for (String expected : expectedTermTexts) {
foundExpected |= expected.equals(text);
}
assertEquals(fieldName, term.field());
assertTrue("expected term text ("+text+") not found in ("+expectedTermTexts+")", foundExpected);
}
}