mirror of https://github.com/apache/lucene.git
LUCENE-4846: PostingsHighlighter allow customizing how the values to be highlighted are loaded (default is still stored fields)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1458009 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1f6a3f6a94
commit
c5763b80ff
|
@ -102,6 +102,10 @@ New Features
|
|||
* LUCENE-4832: Add ToParentBlockJoinCollector.getTopGroupsWithAllChildDocs, to retrieve
|
||||
all children in each group. (Aleksey Aleev via Mike McCandless)
|
||||
|
||||
* LUCENE-4846: PostingsHighlighter subclasses can override where the
|
||||
String values come from (it still defaults to pulling from stored
|
||||
fields). (Robert Muir, Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
||||
|
|
|
@ -81,7 +81,7 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
* This is thread-safe, and can be used across different readers.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class PostingsHighlighter {
|
||||
public class PostingsHighlighter {
|
||||
|
||||
// TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
|
||||
// but if the analyzer is really fast and the field is tiny, this might really be
|
||||
|
@ -257,15 +257,7 @@ public final class PostingsHighlighter {
|
|||
Arrays.sort(fields);
|
||||
|
||||
// pull stored data:
|
||||
LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
|
||||
String contents[][] = new String[fields.length][docids.length];
|
||||
for (int i = 0; i < docids.length; i++) {
|
||||
searcher.doc(docids[i], visitor);
|
||||
for (int j = 0; j < fields.length; j++) {
|
||||
contents[j][i] = visitor.getValue(j).toString();
|
||||
}
|
||||
visitor.reset();
|
||||
}
|
||||
String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
|
||||
|
||||
Map<String,String[]> highlights = new HashMap<String,String[]>();
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
|
@ -285,6 +277,25 @@ public final class PostingsHighlighter {
|
|||
}
|
||||
return highlights;
|
||||
}
|
||||
|
||||
/** Loads the String values for each field X docID to be
|
||||
* highlighted. By default this loads from stored
|
||||
* fields, but a subclass can change the source. This
|
||||
* method should allocate the String[fields.length][docids.length]
|
||||
* and fill all values. The returned Strings must be
|
||||
* identical to what was indexed. */
|
||||
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
|
||||
String contents[][] = new String[fields.length][docids.length];
|
||||
LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
|
||||
for (int i = 0; i < docids.length; i++) {
|
||||
searcher.doc(docids[i], visitor);
|
||||
for (int j = 0; j < fields.length; j++) {
|
||||
contents[j][i] = visitor.getValue(j).toString();
|
||||
}
|
||||
visitor.reset();
|
||||
}
|
||||
return contents;
|
||||
}
|
||||
|
||||
private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, Term terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
|
||||
Map<Integer,String> highlights = new HashMap<Integer,String>();
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.postingshighlight;
|
|||
*/
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -465,4 +466,46 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
|||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testCustomFieldValueSource() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
Document doc = new Document();
|
||||
|
||||
FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
final String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.";
|
||||
Field body = new Field("body", text, offsetsType);
|
||||
doc.add(body);
|
||||
iw.addDocument(doc);
|
||||
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter()) {
|
||||
@Override
|
||||
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
|
||||
assert fields.length == 1;
|
||||
assert docids.length == 1;
|
||||
String[][] contents = new String[1][1];
|
||||
contents[0][0] = text;
|
||||
return contents;
|
||||
}
|
||||
};
|
||||
|
||||
Query query = new TermQuery(new Term("body", "test"));
|
||||
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue