mirror of https://github.com/apache/lucene.git
LUCENE-4861: make BreakIterator per-field in PostingsHighlighter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1463083 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
42b081cd5c
commit
48a28acbfd
|
@ -89,8 +89,8 @@ New Features
|
|||
* LUCENE-4820: Add payloads to Analyzing/FuzzySuggester, to record an
|
||||
arbitrary byte[] per suggestion (Mike McCandless)
|
||||
|
||||
* LUCENE-4816: Passing null as the BreakIterator to PostingsHighlighter
|
||||
now highlights the entire content as a single Passage. (Robert
|
||||
* LUCENE-4816: Add WholeBreakIterator to PostingsHighlighter
|
||||
for treating the entire content as a single Passage. (Robert
|
||||
Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-4827: Add additional ctor to PostingsHighlighter PassageScorer
|
||||
|
@ -148,6 +148,10 @@ New Features
|
|||
you only have points (1/doc) then "Intersects" is equivalent and faster.
|
||||
See the javadocs. (David Smiley)
|
||||
|
||||
* LUCENE-4861: Make BreakIterator per-field in PostingsHighlighter. This means
|
||||
you can override getBreakIterator(String field) to use different mechanisms
|
||||
for e.g. title vs. body fields. (Mike McCandless, Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace
|
||||
|
|
|
@ -96,7 +96,6 @@ public class PostingsHighlighter {
|
|||
public static final int DEFAULT_MAX_LENGTH = 10000;
|
||||
|
||||
private final int maxLength;
|
||||
private final BreakIterator breakIterator;
|
||||
|
||||
/** Set the first time {@link #getFormatter} is called,
|
||||
* and then reused. */
|
||||
|
@ -119,28 +118,20 @@ public class PostingsHighlighter {
|
|||
* @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
|
||||
*/
|
||||
public PostingsHighlighter(int maxLength) {
|
||||
this(maxLength, BreakIterator.getSentenceInstance(Locale.ROOT));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new highlighter with custom parameters.
|
||||
* @param maxLength maximum content size to process.
|
||||
* @param breakIterator used for finding passage
|
||||
* boundaries; pass null to highlight the entire
|
||||
* content as a single Passage.
|
||||
* @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
|
||||
*/
|
||||
public PostingsHighlighter(int maxLength, BreakIterator breakIterator) {
|
||||
if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
|
||||
// two reasons: no overflow problems in BreakIterator.preceding(offset+1),
|
||||
// our sentinel in the offsets queue uses this value to terminate.
|
||||
throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
|
||||
}
|
||||
if (breakIterator == null) {
|
||||
breakIterator = new WholeBreakIterator();
|
||||
}
|
||||
this.maxLength = maxLength;
|
||||
this.breakIterator = breakIterator;
|
||||
}
|
||||
|
||||
/** Returns the {@link BreakIterator} to use for
|
||||
* dividing text into passages. This returns
|
||||
* {@link BreakIterator#getSentenceInstance(Locale)} by default;
|
||||
* subclasses can override to customize. */
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
}
|
||||
|
||||
/** Returns the {@link PassageFormatter} to use for
|
||||
|
@ -303,8 +294,6 @@ public class PostingsHighlighter {
|
|||
IndexReaderContext readerContext = reader.getContext();
|
||||
List<AtomicReaderContext> leaves = readerContext.leaves();
|
||||
|
||||
BreakIterator bi = (BreakIterator)breakIterator.clone();
|
||||
|
||||
// Make our own copy because we sort in-place:
|
||||
int[] docids = new int[docidsIn.length];
|
||||
System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
|
||||
|
@ -330,7 +319,7 @@ public class PostingsHighlighter {
|
|||
for(Term term : fieldTerms) {
|
||||
terms[termUpto++] = term.bytes();
|
||||
}
|
||||
Map<Integer,String> fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages);
|
||||
Map<Integer,String> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, maxPassages);
|
||||
|
||||
String[] result = new String[docids.length];
|
||||
for (int j = 0; j < docidsIn.length; j++) {
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.text.BreakIterator;
|
|||
import java.text.CharacterIterator;
|
||||
|
||||
/** Just produces one single fragment for the entire text */
|
||||
final class WholeBreakIterator extends BreakIterator {
|
||||
public final class WholeBreakIterator extends BreakIterator {
|
||||
private CharacterIterator text;
|
||||
private int start;
|
||||
private int end;
|
||||
|
|
|
@ -457,7 +457,12 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000, null);
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return new WholeBreakIterator();
|
||||
}
|
||||
};
|
||||
Query query = new TermQuery(new Term("body", "test"));
|
||||
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
|
@ -527,7 +532,7 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
|||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000, null) {
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
|
||||
@Override
|
||||
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
|
||||
assert fields.length == 1;
|
||||
|
@ -536,6 +541,11 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
|||
contents[0][0] = text;
|
||||
return contents;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return new WholeBreakIterator();
|
||||
}
|
||||
};
|
||||
|
||||
Query query = new TermQuery(new Term("body", "test"));
|
||||
|
@ -636,7 +646,12 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000, null);
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return new WholeBreakIterator();
|
||||
}
|
||||
};
|
||||
Query query = new TermQuery(new Term("body", "highlighting"));
|
||||
int[] docIDs = new int[] {0};
|
||||
String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body");
|
||||
|
|
|
@ -113,8 +113,7 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
|
|||
private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
|
||||
for (int n = 1; n < maxTopN; n++) {
|
||||
final FakePassageFormatter f1 = new FakePassageFormatter();
|
||||
PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1,
|
||||
BreakIterator.getSentenceInstance(Locale.ROOT)) {
|
||||
PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
assertEquals("body", field);
|
||||
|
@ -123,8 +122,7 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
|
|||
};
|
||||
|
||||
final FakePassageFormatter f2 = new FakePassageFormatter();
|
||||
PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1,
|
||||
BreakIterator.getSentenceInstance(Locale.ROOT)) {
|
||||
PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
assertEquals("body", field);
|
||||
|
@ -269,8 +267,7 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000,
|
||||
BreakIterator.getSentenceInstance(Locale.ROOT)) {
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
|
||||
@Override
|
||||
protected PassageScorer getScorer(String field) {
|
||||
return new PassageScorer(1.2f, 0, 87);
|
||||
|
@ -309,8 +306,7 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000,
|
||||
BreakIterator.getSentenceInstance(Locale.ROOT)) {
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
|
||||
@Override
|
||||
protected PassageScorer getScorer(String field) {
|
||||
return new PassageScorer(0, 0.75f, 87);
|
||||
|
|
|
@ -80,7 +80,6 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn
|
|||
@Override
|
||||
public void init(PluginInfo info) {
|
||||
Map<String,String> attributes = info.attributes;
|
||||
BreakIterator breakIterator = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
|
||||
// scorer parameters: k1/b/pivot
|
||||
String k1 = attributes.get("k1");
|
||||
|
@ -127,7 +126,7 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn
|
|||
if (attributes.containsKey("maxLength")) {
|
||||
maxLength = Integer.parseInt(attributes.get("maxLength"));
|
||||
}
|
||||
highlighter = new PostingsHighlighter(maxLength, breakIterator) {
|
||||
highlighter = new PostingsHighlighter(maxLength) {
|
||||
@Override
|
||||
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
|
||||
if (summarizeEmptyBoolean) {
|
||||
|
|
Loading…
Reference in New Issue