mirror of https://github.com/apache/lucene.git
LUCENE-4906: PostingsHighlighter: add expert API to render highlights to Object
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1522619 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
980411d93d
commit
c3ccfbe8d1
|
@ -47,6 +47,11 @@ Optimizations
|
|||
|
||||
======================= Lucene 4.6.0 =======================
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-4906: PostingsHighlighter can now render to custom Object,
|
||||
for advanced use cases where String is too restrictive (Luca
|
||||
Cavanna, Robert Muir, Mike McCandless)
|
||||
|
||||
======================= Lucene 4.5.0 =======================
|
||||
|
||||
|
|
|
@ -31,8 +31,11 @@ public abstract class PassageFormatter {
|
|||
* @param passages top-N passages for the field. Note these are sorted in
|
||||
* the order that they appear in the document for convenience.
|
||||
* @param content content for the field.
|
||||
* @return formatted highlight
|
||||
* @return formatted highlight. Note that for the
|
||||
* non-expert APIs in {@link PostingsHighlighter} that
|
||||
* return String, the toString method on the Object
|
||||
* returned by this method is used to compute the string.
|
||||
*/
|
||||
public abstract String format(Passage passages[], String content);
|
||||
public abstract Object format(Passage passages[], String content);
|
||||
|
||||
}
|
||||
|
|
|
@ -267,7 +267,7 @@ public class PostingsHighlighter {
|
|||
|
||||
return highlightFields(fields, query, searcher, docids, maxPassages);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Highlights the top-N passages from multiple fields,
|
||||
* for the provided int[] docids.
|
||||
|
@ -280,7 +280,7 @@ public class PostingsHighlighter {
|
|||
* @param maxPassagesIn The maximum number of top-N ranked passages per-field used to
|
||||
* form the highlighted snippets.
|
||||
* @return Map keyed on field name, containing the array of formatted snippets
|
||||
* corresponding to the documents in <code>topDocs</code>.
|
||||
* corresponding to the documents in <code>docidsIn</code>.
|
||||
* If no highlights were found for a document, the
|
||||
* first {@code maxPassages} from the field will
|
||||
* be returned.
|
||||
|
@ -289,6 +289,45 @@ public class PostingsHighlighter {
|
|||
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
|
||||
*/
|
||||
public Map<String,String[]> highlightFields(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
|
||||
Map<String,String[]> snippets = new HashMap<String,String[]>();
|
||||
for(Map.Entry<String,Object[]> ent : highlightFieldsAsObjects(fieldsIn, query, searcher, docidsIn, maxPassagesIn).entrySet()) {
|
||||
Object[] snippetObjects = ent.getValue();
|
||||
String[] snippetStrings = new String[snippetObjects.length];
|
||||
snippets.put(ent.getKey(), snippetStrings);
|
||||
for(int i=0;i<snippetObjects.length;i++) {
|
||||
Object snippet = snippetObjects[i];
|
||||
if (snippet != null) {
|
||||
snippetStrings[i] = snippet.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return snippets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: highlights the top-N passages from multiple fields,
|
||||
* for the provided int[] docids, to custom Object as
|
||||
* returned by the {@link PassageFormatter}. Use
|
||||
* this API to render to something other than String.
|
||||
*
|
||||
* @param fieldsIn field names to highlight.
|
||||
* Must have a stored string value and also be indexed with offsets.
|
||||
* @param query query to highlight.
|
||||
* @param searcher searcher that was previously used to execute the query.
|
||||
* @param docidsIn containing the document IDs to highlight.
|
||||
* @param maxPassagesIn The maximum number of top-N ranked passages per-field used to
|
||||
* form the highlighted snippets.
|
||||
* @return Map keyed on field name, containing the array of formatted snippets
|
||||
* corresponding to the documents in <code>docidsIn</code>.
|
||||
* If no highlights were found for a document, the
|
||||
* first {@code maxPassages} from the field will
|
||||
* be returned.
|
||||
* @throws IOException if an I/O error occurred during processing
|
||||
* @throws IllegalArgumentException if <code>field</code> was indexed without
|
||||
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
|
||||
*/
|
||||
public Map<String,Object[]> highlightFieldsAsObjects(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
|
||||
if (fieldsIn.length < 1) {
|
||||
throw new IllegalArgumentException("fieldsIn must not be empty");
|
||||
}
|
||||
|
@ -335,7 +374,7 @@ public class PostingsHighlighter {
|
|||
// pull stored data:
|
||||
String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
|
||||
|
||||
Map<String,String[]> highlights = new HashMap<String,String[]>();
|
||||
Map<String,Object[]> highlights = new HashMap<String,Object[]>();
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
String field = fields[i];
|
||||
int numPassages = maxPassages[i];
|
||||
|
@ -350,9 +389,9 @@ public class PostingsHighlighter {
|
|||
for(Term term : fieldTerms) {
|
||||
terms[termUpto++] = term.bytes();
|
||||
}
|
||||
Map<Integer,String> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages);
|
||||
Map<Integer,Object> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages);
|
||||
|
||||
String[] result = new String[docids.length];
|
||||
Object[] result = new Object[docids.length];
|
||||
for (int j = 0; j < docidsIn.length; j++) {
|
||||
result[j] = fieldHighlights.get(docidsIn[j]);
|
||||
}
|
||||
|
@ -394,8 +433,8 @@ public class PostingsHighlighter {
|
|||
return ' ';
|
||||
}
|
||||
|
||||
private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
|
||||
Map<Integer,String> highlights = new HashMap<Integer,String>();
|
||||
private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
|
||||
Map<Integer,Object> highlights = new HashMap<Integer,Object>();
|
||||
|
||||
// reuse in the real sense... for docs in same segment we just advance our old enum
|
||||
DocsAndPositionsEnum postings[] = null;
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.BufferedReader;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -47,8 +48,8 @@ import org.apache.lucene.search.Sort;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
|
||||
public class TestPostingsHighlighter extends LuceneTestCase {
|
||||
|
@ -1068,4 +1069,54 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
|||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-4906
|
||||
public void testObjectFormatter() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
|
||||
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
Field body = new Field("body", "", offsetsType);
|
||||
Document doc = new Document();
|
||||
doc.add(body);
|
||||
|
||||
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
|
||||
iw.addDocument(doc);
|
||||
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter() {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
return new PassageFormatter() {
|
||||
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
|
||||
|
||||
@Override
|
||||
public String[] format(Passage passages[], String content) {
|
||||
// Just turns the String snippet into a length 2
|
||||
// array of String
|
||||
return new String[] {"blah blah", defaultFormatter.format(passages, content).toString()};
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
Query query = new TermQuery(new Term("body", "highlighting"));
|
||||
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
int[] docIDs = new int[1];
|
||||
docIDs[0] = topDocs.scoreDocs[0].doc;
|
||||
Map<String,Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[]{"body"}, query, searcher, docIDs, new int[] {1});
|
||||
Object[] bodySnippets = snippets.get("body");
|
||||
assertEquals(1, bodySnippets.length);
|
||||
assertTrue(Arrays.equals(new String[] {"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0]));
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue