mirror of https://github.com/apache/lucene.git
Added new TermsQueryBuilder to simply build boolean queries from text without having concern over clashing reserved words/special characters/legal syntax that is demanded by normal query parser syntax. Added new xml attributes to BooleanQueryBuilder to control disableCoord and minimumNumberShouldMatch.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@417593 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0e23b32894
commit
b2dd60bd4b
|
@ -21,6 +21,7 @@ import org.apache.lucene.xmlparser.builders.SpanOrTermsBuilder;
|
|||
import org.apache.lucene.xmlparser.builders.SpanQueryBuilderFactory;
|
||||
import org.apache.lucene.xmlparser.builders.SpanTermBuilder;
|
||||
import org.apache.lucene.xmlparser.builders.TermQueryBuilder;
|
||||
import org.apache.lucene.xmlparser.builders.TermsQueryBuilder;
|
||||
import org.apache.lucene.xmlparser.builders.UserInputQueryBuilder;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
|
@ -48,6 +49,7 @@ public class CoreParser implements QueryBuilder
|
|||
|
||||
queryFactory = new QueryBuilderFactory();
|
||||
queryFactory.addBuilder("TermQuery",new TermQueryBuilder());
|
||||
queryFactory.addBuilder("TermsQuery",new TermsQueryBuilder(analyzer));
|
||||
queryFactory.addBuilder("MatchAllDocsQuery",new MatchAllDocsQueryBuilder());
|
||||
queryFactory.addBuilder("BooleanQuery",new BooleanQueryBuilder(queryFactory));
|
||||
queryFactory.addBuilder("UserQuery",new UserInputQueryBuilder(parser));
|
||||
|
|
|
@ -29,7 +29,8 @@ public class BooleanQueryBuilder implements QueryBuilder {
|
|||
* @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
|
||||
*/
|
||||
public Query getQuery(Element e) throws ParserException {
|
||||
BooleanQuery bq=new BooleanQuery();
|
||||
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
|
||||
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
|
||||
bq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
|
||||
NodeList nl = e.getElementsByTagName("Clause");
|
||||
for(int i=0;i<nl.getLength();i++)
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Created on 25-Jan-2006
|
||||
*/
|
||||
package org.apache.lucene.xmlparser.builders;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.xmlparser.DOMUtils;
|
||||
import org.apache.lucene.xmlparser.ParserException;
|
||||
import org.apache.lucene.xmlparser.QueryBuilder;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
|
||||
/**
|
||||
* Builds a BooleanQuery from all of the terms found in the XML element using the choice of analyzer
|
||||
* @author maharwood
|
||||
*/
|
||||
public class TermsQueryBuilder implements QueryBuilder {
|
||||
|
||||
Analyzer analyzer;
|
||||
|
||||
|
||||
public TermsQueryBuilder(Analyzer analyzer)
|
||||
{
|
||||
this.analyzer = analyzer;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public Query getQuery(Element e) throws ParserException {
|
||||
|
||||
String fieldName=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName");
|
||||
String text=DOMUtils.getNonBlankTextOrFail(e);
|
||||
|
||||
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
|
||||
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
try
|
||||
{
|
||||
Token token = ts.next();
|
||||
Term term = null;
|
||||
while (token != null)
|
||||
{
|
||||
if (term == null)
|
||||
{
|
||||
term = new Term(fieldName, token.termText());
|
||||
} else
|
||||
{
|
||||
// create from previous to save fieldName.intern overhead
|
||||
term = term.createTerm(token.termText());
|
||||
}
|
||||
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
|
||||
token = ts.next();
|
||||
}
|
||||
}
|
||||
catch (IOException ioe)
|
||||
{
|
||||
throw new RuntimeException("Error constructing terms from index:"
|
||||
+ ioe);
|
||||
}
|
||||
bq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
|
||||
|
||||
return bq;
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- TermsQuery uses an analyzer to tokenize text and creates a BooleanQuery with nested
|
||||
"should" TermQueries for each of the tokens encountered. This can be used for user input
|
||||
which may include content or characters that would otherwise be illegal query syntax when
|
||||
using the standard lucene query parser. Of course the downside is that none of the query
|
||||
operators (AND NOT ~ ^ : etc) will have an effect. For some scenarios queries are
|
||||
not formed by people familiar with Lucene query syntax and they can inadvertently type illegal
|
||||
query syntax so in these cases this is an appropriate and simple alternative
|
||||
-->
|
||||
<TermsQuery fieldName="contents">sumitomo bank</TermsQuery>
|
|
@ -85,6 +85,11 @@ public class TestParser extends TestCase {
|
|||
Query q=parse("TermQuery.xml");
|
||||
dumpResults("TermQuery", q, 5);
|
||||
}
|
||||
public void testSimpleTermsQueryXML() throws ParserException, IOException
|
||||
{
|
||||
Query q=parse("TermsQuery.xml");
|
||||
dumpResults("TermsQuery", q, 5);
|
||||
}
|
||||
public void testBooleanQueryXML() throws ParserException, IOException
|
||||
{
|
||||
Query q=parse("BooleanQuery.xml");
|
||||
|
|
Loading…
Reference in New Issue