LUCENE-1531

Added support for BoostingTermQuery to XML query parser.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@742411 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Karl-Johan Wettin 2009-02-09 11:49:33 +00:00
parent c45b701113
commit 6e692d38ec
7 changed files with 87 additions and 26 deletions
contrib
CHANGES.txt
xml-query-parser
LuceneCoreQuery.dtd
src
java/org/apache/lucene/xmlparser
test/org/apache/lucene/xmlparser

View File

@ -31,6 +31,9 @@ New features
RangeQuery at the expense of added space (additional indexed RangeQuery at the expense of added space (additional indexed
tokens) consumed in the index. (Uwe Schindler via Mike McCandless) tokens) consumed in the index. (Uwe Schindler via Mike McCandless)
2. LUCENE-1531: Added support for BoostingTermQuery to XML query parser. (Karl Wettin)
Documentation Documentation
(None) (None)

View File

@ -53,8 +53,8 @@
--> -->
<!-- @hidden Define core types of XML elements --> <!-- @hidden Define core types of XML elements -->
<!ENTITY % coreSpanQueries "SpanOr|SpanNear|SpanOrTerms|SpanFirst|SpanNot|SpanTerm" > <!ENTITY % coreSpanQueries "SpanOr|SpanNear|SpanOrTerms|SpanFirst|SpanNot|SpanTerm|BoostingTermQuery" >
<!ENTITY % coreQueries "BooleanQuery|UserQuery|FilteredQuery|TermQuery|TermsQuery|MatchAllDocsQuery|ConstantScoreQuery" > <!ENTITY % coreQueries "BooleanQuery|UserQuery|FilteredQuery|TermQuery|TermsQuery|MatchAllDocsQuery|ConstantScoreQuery|BoostingTermQuery" >
<!ENTITY % coreFilters "RangeFilter|CachedFilter" > <!ENTITY % coreFilters "RangeFilter|CachedFilter" >
<!-- @hidden Allow for extensions --> <!-- @hidden Allow for extensions -->
@ -186,6 +186,29 @@ Passes content directly through to the standard LuceneQuery parser see "Lucene Q
<!ATTLIST TermQuery fieldName CDATA #IMPLIED> <!ATTLIST TermQuery fieldName CDATA #IMPLIED>
<!--
A boosted term query - no analysis is done of the child text. Also a span member.
(Text below is copied from the javadocs of BoostingTermQuery)
The BoostingTermQuery is very similar to the {@link org.apache.lucene.search.spans.SpanTermQuery} except
that it factors in the value of the payload located at each of the positions where the
{@link org.apache.lucene.index.Term} occurs.
In order to take advantage of this, you must override {@link org.apache.lucene.search.Similarity#scorePayload(String, byte[],int,int)}
which returns 1 by default.
Payload scores are averaged across term occurrences in the document.
@see org.apache.lucene.search.Similarity#scorePayload(String, byte[], int, int)
-->
<!ELEMENT BoostingTermQuery (#PCDATA)>
<!-- Optional boost for matches on this query. Values > 1 -->
<!ATTLIST TermQuery boost CDATA "1.0">
<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute -->
<!ATTLIST TermQuery fieldName CDATA #IMPLIED>
<!-- <!--
The equivalent of a BooleanQuery with multiple optional TermQuery clauses. The equivalent of a BooleanQuery with multiple optional TermQuery clauses.

View File

@ -8,22 +8,7 @@ import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.xmlparser.builders.BooleanQueryBuilder; import org.apache.lucene.xmlparser.builders.*;
import org.apache.lucene.xmlparser.builders.ConstantScoreQueryBuilder;
import org.apache.lucene.xmlparser.builders.FilteredQueryBuilder;
import org.apache.lucene.xmlparser.builders.MatchAllDocsQueryBuilder;
import org.apache.lucene.xmlparser.builders.CachedFilterBuilder;
import org.apache.lucene.xmlparser.builders.RangeFilterBuilder;
import org.apache.lucene.xmlparser.builders.SpanFirstBuilder;
import org.apache.lucene.xmlparser.builders.SpanNearBuilder;
import org.apache.lucene.xmlparser.builders.SpanNotBuilder;
import org.apache.lucene.xmlparser.builders.SpanOrBuilder;
import org.apache.lucene.xmlparser.builders.SpanOrTermsBuilder;
import org.apache.lucene.xmlparser.builders.SpanQueryBuilderFactory;
import org.apache.lucene.xmlparser.builders.SpanTermBuilder;
import org.apache.lucene.xmlparser.builders.TermQueryBuilder;
import org.apache.lucene.xmlparser.builders.TermsQueryBuilder;
import org.apache.lucene.xmlparser.builders.UserInputQueryBuilder;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
@ -113,6 +98,10 @@ public class CoreParser implements QueryBuilder
sqof.addBuilder("SpanNear",snb); sqof.addBuilder("SpanNear",snb);
queryFactory.addBuilder("SpanNear",snb); queryFactory.addBuilder("SpanNear",snb);
BoostingTermBuilder btb=new BoostingTermBuilder();
sqof.addBuilder("BoostingTermQuery",btb);
queryFactory.addBuilder("BoostingTermQuery",btb);
SpanTermBuilder snt=new SpanTermBuilder(); SpanTermBuilder snt=new SpanTermBuilder();
sqof.addBuilder("SpanTerm",snt); sqof.addBuilder("SpanTerm",snt);
queryFactory.addBuilder("SpanTerm",snt); queryFactory.addBuilder("SpanTerm",snt);

View File

@ -0,0 +1,41 @@
package org.apache.lucene.xmlparser.builders;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.payloads.BoostingTermQuery;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.ParserException;
import org.w3c.dom.Element;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class BoostingTermBuilder extends SpanBuilderBase
{
public SpanQuery getSpanQuery(Element e) throws ParserException
{
String fieldName=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName");
String value=DOMUtils.getNonBlankTextOrFail(e);
BoostingTermQuery btq = new BoostingTermQuery(new Term(fieldName,value));
btq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
return btq;
}
}

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<BoostingTermQuery fieldName="contents">sumitomo</BoostingTermQuery>

View File

@ -15,6 +15,7 @@
<SpanNear slop="6" inOrder="false"> <SpanNear slop="6" inOrder="false">
<SpanTerm>mine</SpanTerm> <SpanTerm>mine</SpanTerm>
<SpanOrTerms>worker workers</SpanOrTerms> <SpanOrTerms>worker workers</SpanOrTerms>
<BoostingTermQuery>heavy</BoostingTermQuery>
</SpanNear> </SpanNear>
</SpanOr> </SpanOr>
</SpanNear> </SpanNear>

View File

@ -1,9 +1,6 @@
package org.apache.lucene.xmlparser; package org.apache.lucene.xmlparser;
import java.io.BufferedReader; import java.io.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -148,6 +145,11 @@ public class TestParser extends TestCase {
Query q=parse("TermsFilterQuery.xml"); Query q=parse("TermsFilterQuery.xml");
dumpResults("Terms Filter",q, 5); dumpResults("Terms Filter",q, 5);
} }
public void testBoostingTermQueryXML() throws Exception
{
Query q=parse("BoostingTermQuery.xml");
dumpResults("BoostingTermQuery",q, 5);
}
public void testSpanTermXML() throws Exception public void testSpanTermXML() throws Exception
{ {
Query q=parse("SpanQuery.xml"); Query q=parse("SpanQuery.xml");