Simplified QueryTermExtractor.java to make use of Query.extractTerms method (especially now that all the SpanQuery classes implement this correctly).

Added tests in Junit test to demonstrate new support for other Queries (FilteredQuery) now that we use the standard extractTerms feature of Query objects. Also deprecated highlighter getBestFragments method that hard-coded choice of fieldname and introduced new variation that takes an additional fieldName argument git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@389888 13f79535-47bb-0310-9956-ffa450edef68
2006-03-29 21:01:40 +00:00 · 2006-03-29 21:01:40 +00:00 · 286f4f5f07
parent 4696ac421e
commit 286f4f5f07
3 changed files with 99 additions and 82 deletions
--- a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
+++ b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
@ -113,7 +113,8 @@ public class Highlighter
 	 * into chunks  
 	 * @param text        	text to highlight terms in
 	 * @param maxNumFragments  the maximum number of fragments.
-	 *
+	 * @deprecated This method incorrectly hardcodes the choice of fieldname. Use the
 	 * method of the same name that takes a fieldname.
 	 * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
 	 */
 	public final String[] getBestFragments(
@ -125,6 +126,29 @@ public class Highlighter
 		TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
 		return getBestFragments(tokenStream, text, maxNumFragments);
 	}
 	/**
 	 * Highlights chosen terms in a text, extracting the most relevant sections.
 	 * This is a convenience method that calls
 	 * {@link #getBestFragments(TokenStream, String, int)}
 	 *
 	 * @param analyzer   the analyzer that will be used to split <code>text</code>
 	 * into chunks  
 	 * @param fieldName     the name of the field being highlighted (used by analyzer)
 	 * @param text        	text to highlight terms in
 	 * @param maxNumFragments  the maximum number of fragments.
 	 *
 	 * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
 	 */
 	public final String[] getBestFragments(
 		Analyzer analyzer,	
 		String fieldName,
 		String text,
 		int maxNumFragments)
 		throws IOException
 	{
 		TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
 		return getBestFragments(tokenStream, text, maxNumFragments);
 	}
 	/**
 	 * Highlights chosen terms in a text, extracting the most relevant sections.
--- a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
+++ b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
@ -16,18 +16,12 @@ package org.apache.lucene.search.highlight;
 */
 import java.io.IOException;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.spans.SpanNearQuery;
 /**
 * Utility class used to extract the terms used in a query, plus any weights.
@ -114,75 +108,22 @@ public final class QueryTermExtractor
 	//fieldname MUST be interned prior to this call
 	private static final void getTerms(Query query, HashSet terms,boolean prohibited, String fieldName) 
 	{
-		if (query instanceof BooleanQuery)
+       	try
-			getTermsFromBooleanQuery((BooleanQuery) query, terms, prohibited, fieldName);
+       	{
-		else
+       		HashSet nonWeightedTerms=new HashSet();
-			if (query instanceof PhraseQuery)
+       		query.extractTerms(nonWeightedTerms);
-				getTermsFromPhraseQuery((PhraseQuery) query, terms, fieldName);
+       		for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();)
-			else
+			{
-				if (query instanceof TermQuery)
+				Term term = (Term) iter.next();
-					getTermsFromTermQuery((TermQuery) query, terms, fieldName);
+			    if((fieldName==null)||(term.field()==fieldName))
-				else
+				{
-		        if(query instanceof SpanNearQuery)
+					terms.add(new WeightedTerm(query.getBoost(),term.text()));
-		            getTermsFromSpanNearQuery((SpanNearQuery) query, terms, fieldName);
+				}
 			}
 	      }
 	      catch(UnsupportedOperationException ignore)
 	      {
 	    	  //this is non-fatal for our purposes
       	  }		        			        	
 	}
 	private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet terms, boolean prohibited, String fieldName)
 	{
 		BooleanClause[] queryClauses = query.getClauses();
 		int i;
 		for (i = 0; i < queryClauses.length; i++)
 		{
 			//Pre Lucene 2.0 code
 //			if (prohibited || !queryClauses[i].prohibited)
 //				getTerms(queryClauses[i].query, terms, prohibited, fieldName);
 			// Lucene 2.0 ready code
 			if (prohibited || queryClauses[i].getOccur()!=BooleanClause.Occur.MUST_NOT)
 				getTerms(queryClauses[i].getQuery(), terms, prohibited, fieldName);
 		}
 	}
 	private static final void getTermsFromPhraseQuery(PhraseQuery query, HashSet terms, String fieldName)
 	{
 		Term[] queryTerms = query.getTerms();
 		int i;
 		for (i = 0; i < queryTerms.length; i++)
 		{
 		    if((fieldName==null)||(queryTerms[i].field()==fieldName))
 		    {
 		        terms.add(new WeightedTerm(query.getBoost(),queryTerms[i].text()));
 		    }
 		}
 	}
 	private static final void getTermsFromTermQuery(TermQuery query, HashSet terms, String fieldName)
 	{
 	    if((fieldName==null)||(query.getTerm().field()==fieldName))
 	    {
 	        terms.add(new WeightedTerm(query.getBoost(),query.getTerm().text()));
 	    }
 	}
    private static final void getTermsFromSpanNearQuery(SpanNearQuery query, HashSet terms, String fieldName){
        Collection queryTerms = query.getTerms();
        for(Iterator iterator = queryTerms.iterator(); iterator.hasNext();){
            // break it out for debugging.
            Term term = (Term) iterator.next();
            String text = term.text();
    	    if((fieldName==null)||(term.field()==fieldName))
    	    {
    	        terms.add(new WeightedTerm(query.getBoost(), text));
    	    }
        }
    }
 }
--- a/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@ -38,13 +38,20 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MultiSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.RangeFilter;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.RAMDirectory;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
@ -140,6 +147,47 @@ public class HighlighterTest extends TestCase implements Formatter
 		//Currently highlights "John" and "Kennedy" separately
 		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
 	}
 	public void testGetBestFragmentsSpan() throws Exception
 	{
 		SpanQuery clauses[]={
 			new SpanTermQuery(new Term("contents","john")),
 			new SpanTermQuery(new Term("contents","kennedy")),
 			}; 
 		SpanNearQuery snq=new SpanNearQuery(clauses,1,true);
 		doSearching(snq);
 		doStandardHighlights();
 		//Currently highlights "John" and "Kennedy" separately
 		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
 	}
 	public void testGetBestFragmentsFilteredQuery() throws Exception
 	{
 		RangeFilter rf=new RangeFilter("contents","john","john",true,true);
 		SpanQuery clauses[]={
 				new SpanTermQuery(new Term("contents","john")),
 				new SpanTermQuery(new Term("contents","kennedy")),
 				}; 
 		SpanNearQuery snq=new SpanNearQuery(clauses,1,true);
 		FilteredQuery fq=new FilteredQuery(snq,rf);
 		doSearching(fq);
 		doStandardHighlights();
 		//Currently highlights "John" and "Kennedy" separately
 		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
 	}
 	public void testGetBestFragmentsFilteredPhraseQuery() throws Exception
 	{
 		RangeFilter rf=new RangeFilter("contents","john","john",true,true);
 		PhraseQuery pq=new PhraseQuery();
 		pq.add(new Term("contents","john"));
 		pq.add(new  Term("contents","kennedy"));
 		FilteredQuery fq=new FilteredQuery(pq,rf);
 		doSearching(fq);
 		doStandardHighlights();
 		//Currently highlights "John" and "Kennedy" separately
 		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
 	}
 	public void testGetBestFragmentsMultiTerm() throws Exception
 	{
@ -181,7 +229,7 @@ public class HighlighterTest extends TestCase implements Formatter
 		for (int i = 0; i < hits.length(); i++)
 		{
    		String text = hits.doc(i).get(FIELD_NAME);
-    		highlighter.getBestFragments(analyzer, text, 10);
+    		highlighter.getBestFragments(analyzer,FIELD_NAME, text, 10);
 		}
 		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
@ -536,17 +584,21 @@ public class HighlighterTest extends TestCase implements Formatter
 		numHighlights++; //update stats used in assertions
 		return "<b>" + originalText + "</b>";
 	}
-
+	
 	public void doSearching(String queryString) throws Exception
 	{
 		searcher = new IndexSearcher(ramDir);
 		QueryParser parser=new QueryParser(FIELD_NAME, new StandardAnalyzer());
 		query = parser.parse(queryString);
 		doSearching(query);
 	}
 	public void doSearching(Query unReWrittenQuery) throws Exception
 	{
 		searcher = new IndexSearcher(ramDir);
 		//for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a rewritten query!
-		query=query.rewrite(reader);
+		query=unReWrittenQuery.rewrite(reader);
 		System.out.println("Searching for: " + query.toString(FIELD_NAME));
 		hits = searcher.search(query);
-	}
+	}	
 	void doStandardHighlights() throws Exception
 	{