Added new TermsQueryBuilder to simply build boolean queries from text without having concern over clashing reserved words/special characters/legal syntax that is demanded by normal query parser syntax. Added new xml attributes to BooleanQueryBuilder to control disableCoord and minimumNumberShouldMatch.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@417593 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Harwood 2006-06-27 21:56:47 +00:00
parent 0e23b32894
commit b2dd60bd4b
5 changed files with 95 additions and 1 deletions

View File

@ -21,6 +21,7 @@ import org.apache.lucene.xmlparser.builders.SpanOrTermsBuilder;
import org.apache.lucene.xmlparser.builders.SpanQueryBuilderFactory;
import org.apache.lucene.xmlparser.builders.SpanTermBuilder;
import org.apache.lucene.xmlparser.builders.TermQueryBuilder;
import org.apache.lucene.xmlparser.builders.TermsQueryBuilder;
import org.apache.lucene.xmlparser.builders.UserInputQueryBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
@ -48,6 +49,7 @@ public class CoreParser implements QueryBuilder
queryFactory = new QueryBuilderFactory();
queryFactory.addBuilder("TermQuery",new TermQueryBuilder());
queryFactory.addBuilder("TermsQuery",new TermsQueryBuilder(analyzer));
queryFactory.addBuilder("MatchAllDocsQuery",new MatchAllDocsQueryBuilder());
queryFactory.addBuilder("BooleanQuery",new BooleanQueryBuilder(queryFactory));
queryFactory.addBuilder("UserQuery",new UserInputQueryBuilder(parser));

View File

@ -29,7 +29,8 @@ public class BooleanQueryBuilder implements QueryBuilder {
* @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
*/
public Query getQuery(Element e) throws ParserException {
BooleanQuery bq=new BooleanQuery();
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
bq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
NodeList nl = e.getElementsByTagName("Clause");
for(int i=0;i<nl.getLength();i++)

View File

@ -0,0 +1,76 @@
/*
* Created on 25-Jan-2006
*/
package org.apache.lucene.xmlparser.builders;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.ParserException;
import org.apache.lucene.xmlparser.QueryBuilder;
import org.w3c.dom.Element;
/**
* Builds a BooleanQuery from all of the terms found in the XML element using the choice of analyzer
* @author maharwood
*/
public class TermsQueryBuilder implements QueryBuilder {
Analyzer analyzer;
public TermsQueryBuilder(Analyzer analyzer)
{
this.analyzer = analyzer;
}
public Query getQuery(Element e) throws ParserException {
String fieldName=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName");
String text=DOMUtils.getNonBlankTextOrFail(e);
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try
{
Token token = ts.next();
Term term = null;
while (token != null)
{
if (term == null)
{
term = new Term(fieldName, token.termText());
} else
{
// create from previous to save fieldName.intern overhead
term = term.createTerm(token.termText());
}
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
token = ts.next();
}
}
catch (IOException ioe)
{
throw new RuntimeException("Error constructing terms from index:"
+ ioe);
}
bq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
return bq;
}
}

View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- TermsQuery uses an analyzer to tokenize text and creates a BooleanQuery with nested
"should" TermQueries for each of the tokens encountered. This can be used for user input
which may include content or characters that would otherwise be illegal query syntax when
using the standard lucene query parser. Of course the downside is that none of the query
operators (AND NOT ~ ^ : etc) will have an effect. For some scenarios queries are
not formed by people familiar with Lucene query syntax and they can inadvertently type illegal
query syntax so in these cases this is an appropriate and simple alternative
-->
<TermsQuery fieldName="contents">sumitomo bank</TermsQuery>

View File

@ -85,6 +85,11 @@ public class TestParser extends TestCase {
Query q=parse("TermQuery.xml");
dumpResults("TermQuery", q, 5);
}
public void testSimpleTermsQueryXML() throws ParserException, IOException
{
Query q=parse("TermsQuery.xml");
dumpResults("TermsQuery", q, 5);
}
public void testBooleanQueryXML() throws ParserException, IOException
{
Query q=parse("BooleanQuery.xml");