LUCENE-4853: PostingsHighlighter: add method that takes int[] docIDs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1458014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-03-18 21:29:26 +00:00
parent c5763b80ff
commit dfaddf1aa8
4 changed files with 88 additions and 24 deletions

View File

@ -106,6 +106,10 @@ New Features
String values come from (it still defaults to pulling from stored
fields). (Robert Muir, Mike McCandless)
* LUCENE-4853: Add PostingsHighlighter.highlightFields method that
takes int[] docIDs instead of TopDocs. (Robert Muir, Mike
McCandless)
API Changes
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use

View File

@ -237,16 +237,39 @@ public class PostingsHighlighter {
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
final IndexReader reader = searcher.getIndexReader();
final ScoreDoc scoreDocs[] = topDocs.scoreDocs;
query = rewrite(query);
SortedSet<Term> queryTerms = new TreeSet<Term>();
query.extractTerms(queryTerms);
int docids[] = new int[scoreDocs.length];
for (int i = 0; i < docids.length; i++) {
docids[i] = scoreDocs[i].doc;
}
return highlightFields(fields, query, searcher, docids, maxPassages);
}
/**
* Highlights the top-N passages from multiple fields,
* for the provided int[] docids.
*
* @param fields field names to highlight.
* Must have a stored string value and also be indexed with offsets.
* @param query query to highlight.
* @param searcher searcher that was previously used to execute the query.
* @param docids containing the document IDs to highlight.
* @param maxPassages The maximum number of top-N ranked passages per-field used to
* form the highlighted snippets.
* @return Map keyed on field name, containing the array of formatted snippets
* corresponding to the documents in <code>topDocs</code>.
* If no highlights were found for a document, its value is <code>null</code>.
* @throws IOException if an I/O error occurred during processing
* @throws IllegalArgumentException if <code>field</code> was indexed without
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, int[] docids, int maxPassages) throws IOException {
final IndexReader reader = searcher.getIndexReader();
query = rewrite(query);
SortedSet<Term> queryTerms = new TreeSet<Term>();
query.extractTerms(queryTerms);
IndexReaderContext readerContext = reader.getContext();
List<AtomicReaderContext> leaves = readerContext.leaves();
@ -269,9 +292,9 @@ public class PostingsHighlighter {
Term terms[] = fieldTerms.toArray(new Term[fieldTerms.size()]);
Map<Integer,String> fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages);
String[] result = new String[scoreDocs.length];
for (int j = 0; j < scoreDocs.length; j++) {
result[j] = fieldHighlights.get(scoreDocs[j].doc);
String[] result = new String[docids.length];
for (int j = 0; j < docids.length; j++) {
result[j] = fieldHighlights.get(docids[j]);
}
highlights.put(field, result);
}

View File

@ -40,6 +40,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
@ -467,6 +468,44 @@ public class TestPostingsHighlighter extends LuceneTestCase {
dir.close();
}
public void testSpecificDocIDs() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
body.setStringValue("Highlighting the first term. Hope it works.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
ScoreDoc[] hits = topDocs.scoreDocs;
int[] docIDs = new int[2];
docIDs[0] = hits[0].doc;
docIDs[1] = hits[1].doc;
String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 1).get("body");
assertEquals(2, snippets.length);
assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
ir.close();
dir.close();
}
public void testCustomFieldValueSource() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));

View File

@ -26,8 +26,6 @@ import java.util.Set;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.postingshighlight.PassageFormatter;
import org.apache.lucene.search.postingshighlight.PassageScorer;
import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
@ -129,16 +127,16 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn
// if highlighting isnt enabled, then why call doHighlighting?
if (isHighlightingEnabled(params)) {
SolrIndexSearcher searcher = req.getSearcher();
TopDocs topDocs = toTopDocs(docs);
int[] docIDs = toDocIDs(docs);
// fetch the unique keys
String[] keys = getUniqueKeys(searcher, topDocs);
String[] keys = getUniqueKeys(searcher, docIDs);
// query-time parameters
String[] fieldNames = getHighlightFields(query, req, defaultFields);
int numSnippets = params.getInt(HighlightParams.SNIPPETS, 1);
Map<String,String[]> snippets = highlighter.highlightFields(fieldNames, query, searcher, topDocs, numSnippets);
Map<String,String[]> snippets = highlighter.highlightFields(fieldNames, query, searcher, docIDs, numSnippets);
return encodeSnippets(keys, fieldNames, snippets);
} else {
return null;
@ -171,38 +169,38 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn
return list;
}
/** Converts solr's DocList to a lucene TopDocs */
protected TopDocs toTopDocs(DocList docs) {
ScoreDoc[] scoreDocs = new ScoreDoc[docs.size()];
/** Converts solr's DocList to the int[] docIDs */
protected int[] toDocIDs(DocList docs) {
int[] docIDs = new int[docs.size()];
DocIterator iterator = docs.iterator();
for (int i = 0; i < scoreDocs.length; i++) {
for (int i = 0; i < docIDs.length; i++) {
if (!iterator.hasNext()) {
throw new AssertionError();
}
scoreDocs[i] = new ScoreDoc(iterator.nextDoc(), Float.NaN);
docIDs[i] = iterator.nextDoc();
}
if (iterator.hasNext()) {
throw new AssertionError();
}
return new TopDocs(docs.matches(), scoreDocs, Float.NaN);
return docIDs;
}
/** Retrieves the unique keys for the topdocs to key the results */
protected String[] getUniqueKeys(SolrIndexSearcher searcher, TopDocs topDocs) throws IOException {
protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException {
IndexSchema schema = searcher.getSchema();
SchemaField keyField = schema.getUniqueKeyField();
if (keyField != null) {
Set<String> selector = Collections.singleton(keyField.getName());
String uniqueKeys[] = new String[topDocs.scoreDocs.length];
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
int docid = topDocs.scoreDocs[i].doc;
String uniqueKeys[] = new String[docIDs.length];
for (int i = 0; i < docIDs.length; i++) {
int docid = docIDs[i];
StoredDocument doc = searcher.doc(docid, selector);
String id = schema.printableUniqueKey(doc);
uniqueKeys[i] = id;
}
return uniqueKeys;
} else {
return new String[topDocs.scoreDocs.length];
return new String[docIDs.length];
}
}
}