mirror of https://github.com/apache/lucene.git
LUCENE-4846: PostingsHighlighter allow customizing how the values to be highlighted are loaded (default is still stored fields)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1458009 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1f6a3f6a94
commit
c5763b80ff
|
@ -102,6 +102,10 @@ New Features
|
||||||
* LUCENE-4832: Add ToParentBlockJoinCollector.getTopGroupsWithAllChildDocs, to retrieve
|
* LUCENE-4832: Add ToParentBlockJoinCollector.getTopGroupsWithAllChildDocs, to retrieve
|
||||||
all children in each group. (Aleksey Aleev via Mike McCandless)
|
all children in each group. (Aleksey Aleev via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-4846: PostingsHighlighter subclasses can override where the
|
||||||
|
String values come from (it still defaults to pulling from stored
|
||||||
|
fields). (Robert Muir, Mike McCandless)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
||||||
|
|
|
@ -81,7 +81,7 @@ import org.apache.lucene.util.UnicodeUtil;
|
||||||
* This is thread-safe, and can be used across different readers.
|
* This is thread-safe, and can be used across different readers.
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class PostingsHighlighter {
|
public class PostingsHighlighter {
|
||||||
|
|
||||||
// TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
|
// TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
|
||||||
// but if the analyzer is really fast and the field is tiny, this might really be
|
// but if the analyzer is really fast and the field is tiny, this might really be
|
||||||
|
@ -257,15 +257,7 @@ public final class PostingsHighlighter {
|
||||||
Arrays.sort(fields);
|
Arrays.sort(fields);
|
||||||
|
|
||||||
// pull stored data:
|
// pull stored data:
|
||||||
LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
|
String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
|
||||||
String contents[][] = new String[fields.length][docids.length];
|
|
||||||
for (int i = 0; i < docids.length; i++) {
|
|
||||||
searcher.doc(docids[i], visitor);
|
|
||||||
for (int j = 0; j < fields.length; j++) {
|
|
||||||
contents[j][i] = visitor.getValue(j).toString();
|
|
||||||
}
|
|
||||||
visitor.reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
Map<String,String[]> highlights = new HashMap<String,String[]>();
|
Map<String,String[]> highlights = new HashMap<String,String[]>();
|
||||||
for (int i = 0; i < fields.length; i++) {
|
for (int i = 0; i < fields.length; i++) {
|
||||||
|
@ -285,6 +277,25 @@ public final class PostingsHighlighter {
|
||||||
}
|
}
|
||||||
return highlights;
|
return highlights;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Loads the String values for each field X docID to be
|
||||||
|
* highlighted. By default this loads from stored
|
||||||
|
* fields, but a subclass can change the source. This
|
||||||
|
* method should allocate the String[fields.length][docids.length]
|
||||||
|
* and fill all values. The returned Strings must be
|
||||||
|
* identical to what was indexed. */
|
||||||
|
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
|
||||||
|
String contents[][] = new String[fields.length][docids.length];
|
||||||
|
LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
|
||||||
|
for (int i = 0; i < docids.length; i++) {
|
||||||
|
searcher.doc(docids[i], visitor);
|
||||||
|
for (int j = 0; j < fields.length; j++) {
|
||||||
|
contents[j][i] = visitor.getValue(j).toString();
|
||||||
|
}
|
||||||
|
visitor.reset();
|
||||||
|
}
|
||||||
|
return contents;
|
||||||
|
}
|
||||||
|
|
||||||
private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, Term terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
|
private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, Term terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
|
||||||
Map<Integer,String> highlights = new HashMap<Integer,String>();
|
Map<Integer,String> highlights = new HashMap<Integer,String>();
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.postingshighlight;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -465,4 +466,46 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
||||||
ir.close();
|
ir.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCustomFieldValueSource() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
|
||||||
|
iwc.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
|
||||||
|
FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||||
|
final String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.";
|
||||||
|
Field body = new Field("body", text, offsetsType);
|
||||||
|
doc.add(body);
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
IndexReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
IndexSearcher searcher = newSearcher(ir);
|
||||||
|
|
||||||
|
PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter()) {
|
||||||
|
@Override
|
||||||
|
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
|
||||||
|
assert fields.length == 1;
|
||||||
|
assert docids.length == 1;
|
||||||
|
String[][] contents = new String[1][1];
|
||||||
|
contents[0][0] = text;
|
||||||
|
return contents;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Query query = new TermQuery(new Term("body", "test"));
|
||||||
|
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
|
||||||
|
assertEquals(1, snippets.length);
|
||||||
|
assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue