Added support for field-specific highlighting which respects the fieldnames found in queries. Pass a field name to the QueryScorer in order to only select those field's query terms for highlighting. Updated JUnit tests too.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@351504 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Harwood 2005-12-01 22:18:33 +00:00
parent 18b9843fad
commit 2da431d139
3 changed files with 85 additions and 23 deletions

View File

@ -48,6 +48,17 @@ public class QueryScorer implements Scorer
{
this(QueryTermExtractor.getTerms(query));
}
/**
*
* @param query a Lucene query (ideally rewritten using query.rewrite
* before being passed to this class and the searcher)
* @param fieldName the Field name which is used to match Query terms
*/
public QueryScorer(Query query, String fieldName)
{
this(QueryTermExtractor.getTerms(query, false,fieldName));
}
/**
*

View File

@ -61,7 +61,7 @@ public final class QueryTermExtractor
*/
public static final WeightedTerm[] getIdfWeightedTerms(Query query, IndexReader reader, String fieldName)
{
WeightedTerm[] terms=getTerms(query,false);
WeightedTerm[] terms=getTerms(query,false, fieldName);
int totalNumDocs=reader.numDocs();
for (int i = 0; i < terms.length; i++)
{
@ -80,6 +80,25 @@ public final class QueryTermExtractor
return terms;
}
/**
* Extracts all terms texts of a given Query into an array of WeightedTerms
*
* @param query Query to extract term texts from
* @param prohibited <code>true</code> to extract "prohibited" terms, too
* @param fieldName The fieldName used to filter query terms
* @return an array of the terms used in a query, plus their weights.
*/
public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName)
{
HashSet terms=new HashSet();
if(fieldName!=null)
{
fieldName=fieldName.intern();
}
getTerms(query,terms,prohibited,fieldName);
return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);
}
/**
* Extracts all terms texts of a given Query into an array of WeightedTerms
*
@ -89,27 +108,26 @@ public final class QueryTermExtractor
*/
public static final WeightedTerm[] getTerms(Query query, boolean prohibited)
{
HashSet terms=new HashSet();
getTerms(query,terms,prohibited);
return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);
}
return getTerms(query,prohibited,null);
}
private static final void getTerms(Query query, HashSet terms,boolean prohibited)
//fieldname MUST be interned prior to this call
private static final void getTerms(Query query, HashSet terms,boolean prohibited, String fieldName)
{
if (query instanceof BooleanQuery)
getTermsFromBooleanQuery((BooleanQuery) query, terms, prohibited);
getTermsFromBooleanQuery((BooleanQuery) query, terms, prohibited, fieldName);
else
if (query instanceof PhraseQuery)
getTermsFromPhraseQuery((PhraseQuery) query, terms);
getTermsFromPhraseQuery((PhraseQuery) query, terms, fieldName);
else
if (query instanceof TermQuery)
getTermsFromTermQuery((TermQuery) query, terms);
getTermsFromTermQuery((TermQuery) query, terms, fieldName);
else
if(query instanceof SpanNearQuery)
getTermsFromSpanNearQuery((SpanNearQuery) query, terms);
getTermsFromSpanNearQuery((SpanNearQuery) query, terms, fieldName);
}
private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet terms, boolean prohibited)
private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet terms, boolean prohibited, String fieldName)
{
BooleanClause[] queryClauses = query.getClauses();
int i;
@ -117,27 +135,33 @@ public final class QueryTermExtractor
for (i = 0; i < queryClauses.length; i++)
{
if (prohibited || !queryClauses[i].prohibited)
getTerms(queryClauses[i].query, terms, prohibited);
getTerms(queryClauses[i].query, terms, prohibited, fieldName);
}
}
private static final void getTermsFromPhraseQuery(PhraseQuery query, HashSet terms)
private static final void getTermsFromPhraseQuery(PhraseQuery query, HashSet terms, String fieldName)
{
Term[] queryTerms = query.getTerms();
int i;
for (i = 0; i < queryTerms.length; i++)
{
terms.add(new WeightedTerm(query.getBoost(),queryTerms[i].text()));
if((fieldName==null)||(queryTerms[i].field()==fieldName))
{
terms.add(new WeightedTerm(query.getBoost(),queryTerms[i].text()));
}
}
}
private static final void getTermsFromTermQuery(TermQuery query, HashSet terms)
private static final void getTermsFromTermQuery(TermQuery query, HashSet terms, String fieldName)
{
terms.add(new WeightedTerm(query.getBoost(),query.getTerm().text()));
if((fieldName==null)||(query.getTerm().field()==fieldName))
{
terms.add(new WeightedTerm(query.getBoost(),query.getTerm().text()));
}
}
private static final void getTermsFromSpanNearQuery(SpanNearQuery query, HashSet terms){
private static final void getTermsFromSpanNearQuery(SpanNearQuery query, HashSet terms, String fieldName){
Collection queryTerms = query.getTerms();
@ -149,10 +173,10 @@ public final class QueryTermExtractor
String text = term.text();
terms.add(new WeightedTerm(query.getBoost(), text));
if((fieldName==null)||(term.field()==fieldName))
{
terms.add(new WeightedTerm(query.getBoost(), text));
}
}
}

View File

@ -455,9 +455,36 @@ public class HighlighterTest extends TestCase implements Formatter
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
}
public void testFieldSpecificHighlighting() throws IOException, ParseException
{
String docMainText="fred is one of the people";
QueryParser parser=new QueryParser(FIELD_NAME,analyzer);
Query query=parser.parse("fred category:people");
//highlighting respects fieldnames used in query
QueryScorer fieldSpecificScorer=new QueryScorer(query, "contents");
Highlighter fieldSpecificHighlighter =
new Highlighter(new SimpleHTMLFormatter(),fieldSpecificScorer);
fieldSpecificHighlighter.setTextFragmenter(new NullFragmenter());
String result=fieldSpecificHighlighter.getBestFragment(analyzer,FIELD_NAME,docMainText);
assertEquals("Should match",result,"<B>fred</B> is one of the people");
//highlighting does not respect fieldnames used in query
QueryScorer fieldInSpecificScorer=new QueryScorer(query);
Highlighter fieldInSpecificHighlighter =
new Highlighter(new SimpleHTMLFormatter(),fieldInSpecificScorer);
fieldInSpecificHighlighter.setTextFragmenter(new NullFragmenter());
result=fieldInSpecificHighlighter.getBestFragment(analyzer,FIELD_NAME,docMainText);
assertEquals("Should match",result,"<B>fred</B> is one of the <B>people</B>");
reader.close();
}
/*