mirror of https://github.com/apache/lucene.git
LUCENE-4853: PostingsHighlighter: add method that takes int[] docIDs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1458014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c5763b80ff
commit
dfaddf1aa8
|
@ -106,6 +106,10 @@ New Features
|
|||
String values come from (it still defaults to pulling from stored
|
||||
fields). (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-4853: Add PostingsHighlighter.highlightFields method that
|
||||
takes int[] docIDs instead of TopDocs. (Robert Muir, Mike
|
||||
McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
||||
|
|
|
@ -237,16 +237,39 @@ public class PostingsHighlighter {
|
|||
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
|
||||
*/
|
||||
public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
|
||||
final IndexReader reader = searcher.getIndexReader();
|
||||
final ScoreDoc scoreDocs[] = topDocs.scoreDocs;
|
||||
query = rewrite(query);
|
||||
SortedSet<Term> queryTerms = new TreeSet<Term>();
|
||||
query.extractTerms(queryTerms);
|
||||
|
||||
int docids[] = new int[scoreDocs.length];
|
||||
for (int i = 0; i < docids.length; i++) {
|
||||
docids[i] = scoreDocs[i].doc;
|
||||
}
|
||||
|
||||
return highlightFields(fields, query, searcher, docids, maxPassages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlights the top-N passages from multiple fields,
|
||||
* for the provided int[] docids.
|
||||
*
|
||||
* @param fields field names to highlight.
|
||||
* Must have a stored string value and also be indexed with offsets.
|
||||
* @param query query to highlight.
|
||||
* @param searcher searcher that was previously used to execute the query.
|
||||
* @param docids containing the document IDs to highlight.
|
||||
* @param maxPassages The maximum number of top-N ranked passages per-field used to
|
||||
* form the highlighted snippets.
|
||||
* @return Map keyed on field name, containing the array of formatted snippets
|
||||
* corresponding to the documents in <code>topDocs</code>.
|
||||
* If no highlights were found for a document, its value is <code>null</code>.
|
||||
* @throws IOException if an I/O error occurred during processing
|
||||
* @throws IllegalArgumentException if <code>field</code> was indexed without
|
||||
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
|
||||
*/
|
||||
public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, int[] docids, int maxPassages) throws IOException {
|
||||
final IndexReader reader = searcher.getIndexReader();
|
||||
query = rewrite(query);
|
||||
SortedSet<Term> queryTerms = new TreeSet<Term>();
|
||||
query.extractTerms(queryTerms);
|
||||
|
||||
IndexReaderContext readerContext = reader.getContext();
|
||||
List<AtomicReaderContext> leaves = readerContext.leaves();
|
||||
|
||||
|
@ -269,9 +292,9 @@ public class PostingsHighlighter {
|
|||
Term terms[] = fieldTerms.toArray(new Term[fieldTerms.size()]);
|
||||
Map<Integer,String> fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages);
|
||||
|
||||
String[] result = new String[scoreDocs.length];
|
||||
for (int j = 0; j < scoreDocs.length; j++) {
|
||||
result[j] = fieldHighlights.get(scoreDocs[j].doc);
|
||||
String[] result = new String[docids.length];
|
||||
for (int j = 0; j < docids.length; j++) {
|
||||
result[j] = fieldHighlights.get(docids[j]);
|
||||
}
|
||||
highlights.put(field, result);
|
||||
}
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -467,6 +468,44 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testSpecificDocIDs() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
|
||||
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
Field body = new Field("body", "", offsetsType);
|
||||
Document doc = new Document();
|
||||
doc.add(body);
|
||||
|
||||
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
|
||||
iw.addDocument(doc);
|
||||
body.setStringValue("Highlighting the first term. Hope it works.");
|
||||
iw.addDocument(doc);
|
||||
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter();
|
||||
Query query = new TermQuery(new Term("body", "highlighting"));
|
||||
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits);
|
||||
ScoreDoc[] hits = topDocs.scoreDocs;
|
||||
int[] docIDs = new int[2];
|
||||
docIDs[0] = hits[0].doc;
|
||||
docIDs[1] = hits[1].doc;
|
||||
String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 1).get("body");
|
||||
assertEquals(2, snippets.length);
|
||||
assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
|
||||
assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testCustomFieldValueSource() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
|
||||
|
|
|
@ -26,8 +26,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.postingshighlight.PassageFormatter;
|
||||
import org.apache.lucene.search.postingshighlight.PassageScorer;
|
||||
import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
|
||||
|
@ -129,16 +127,16 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn
|
|||
// if highlighting isnt enabled, then why call doHighlighting?
|
||||
if (isHighlightingEnabled(params)) {
|
||||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
TopDocs topDocs = toTopDocs(docs);
|
||||
int[] docIDs = toDocIDs(docs);
|
||||
|
||||
// fetch the unique keys
|
||||
String[] keys = getUniqueKeys(searcher, topDocs);
|
||||
String[] keys = getUniqueKeys(searcher, docIDs);
|
||||
|
||||
// query-time parameters
|
||||
String[] fieldNames = getHighlightFields(query, req, defaultFields);
|
||||
int numSnippets = params.getInt(HighlightParams.SNIPPETS, 1);
|
||||
|
||||
Map<String,String[]> snippets = highlighter.highlightFields(fieldNames, query, searcher, topDocs, numSnippets);
|
||||
Map<String,String[]> snippets = highlighter.highlightFields(fieldNames, query, searcher, docIDs, numSnippets);
|
||||
return encodeSnippets(keys, fieldNames, snippets);
|
||||
} else {
|
||||
return null;
|
||||
|
@ -171,38 +169,38 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn
|
|||
return list;
|
||||
}
|
||||
|
||||
/** Converts solr's DocList to a lucene TopDocs */
|
||||
protected TopDocs toTopDocs(DocList docs) {
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[docs.size()];
|
||||
/** Converts solr's DocList to the int[] docIDs */
|
||||
protected int[] toDocIDs(DocList docs) {
|
||||
int[] docIDs = new int[docs.size()];
|
||||
DocIterator iterator = docs.iterator();
|
||||
for (int i = 0; i < scoreDocs.length; i++) {
|
||||
for (int i = 0; i < docIDs.length; i++) {
|
||||
if (!iterator.hasNext()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
scoreDocs[i] = new ScoreDoc(iterator.nextDoc(), Float.NaN);
|
||||
docIDs[i] = iterator.nextDoc();
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
return new TopDocs(docs.matches(), scoreDocs, Float.NaN);
|
||||
return docIDs;
|
||||
}
|
||||
|
||||
/** Retrieves the unique keys for the topdocs to key the results */
|
||||
protected String[] getUniqueKeys(SolrIndexSearcher searcher, TopDocs topDocs) throws IOException {
|
||||
protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException {
|
||||
IndexSchema schema = searcher.getSchema();
|
||||
SchemaField keyField = schema.getUniqueKeyField();
|
||||
if (keyField != null) {
|
||||
Set<String> selector = Collections.singleton(keyField.getName());
|
||||
String uniqueKeys[] = new String[topDocs.scoreDocs.length];
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
int docid = topDocs.scoreDocs[i].doc;
|
||||
String uniqueKeys[] = new String[docIDs.length];
|
||||
for (int i = 0; i < docIDs.length; i++) {
|
||||
int docid = docIDs[i];
|
||||
StoredDocument doc = searcher.doc(docid, selector);
|
||||
String id = schema.printableUniqueKey(doc);
|
||||
uniqueKeys[i] = id;
|
||||
}
|
||||
return uniqueKeys;
|
||||
} else {
|
||||
return new String[topDocs.scoreDocs.length];
|
||||
return new String[docIDs.length];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue