LUCENE-4861: make BreakIterator per-field in PostingsHighlighter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1463083 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-04-01 04:39:49 +00:00
parent 42b081cd5c
commit 48a28acbfd
6 changed files with 39 additions and 36 deletions

View File

@ -89,8 +89,8 @@ New Features
* LUCENE-4820: Add payloads to Analyzing/FuzzySuggester, to record an
arbitrary byte[] per suggestion (Mike McCandless)
* LUCENE-4816: Passing null as the BreakIterator to PostingsHighlighter
now highlights the entire content as a single Passage. (Robert
* LUCENE-4816: Add WholeBreakIterator to PostingsHighlighter
for treating the entire content as a single Passage. (Robert
Muir, Mike McCandless)
* LUCENE-4827: Add additional ctor to PostingsHighlighter PassageScorer
@ -148,6 +148,10 @@ New Features
you only have points (1/doc) then "Intersects" is equivalent and faster.
See the javadocs. (David Smiley)
* LUCENE-4861: Make BreakIterator per-field in PostingsHighlighter. This means
you can override getBreakIterator(String field) to use different mechanisms
for e.g. title vs. body fields. (Mike McCandless, Robert Muir)
Optimizations
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace

View File

@ -96,7 +96,6 @@ public class PostingsHighlighter {
public static final int DEFAULT_MAX_LENGTH = 10000;
private final int maxLength;
private final BreakIterator breakIterator;
/** Set the first time {@link #getFormatter} is called,
* and then reused. */
@ -119,28 +118,20 @@ public class PostingsHighlighter {
* @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
*/
public PostingsHighlighter(int maxLength) {
this(maxLength, BreakIterator.getSentenceInstance(Locale.ROOT));
}
/**
* Creates a new highlighter with custom parameters.
* @param maxLength maximum content size to process.
* @param breakIterator used for finding passage
* boundaries; pass null to highlight the entire
* content as a single Passage.
* @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
*/
public PostingsHighlighter(int maxLength, BreakIterator breakIterator) {
if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
// two reasons: no overflow problems in BreakIterator.preceding(offset+1),
// our sentinel in the offsets queue uses this value to terminate.
throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
}
if (breakIterator == null) {
breakIterator = new WholeBreakIterator();
}
this.maxLength = maxLength;
this.breakIterator = breakIterator;
}
/** Returns the {@link BreakIterator} to use for
* dividing text into passages. This returns
* {@link BreakIterator#getSentenceInstance(Locale)} by default;
* subclasses can override to customize. */
protected BreakIterator getBreakIterator(String field) {
return BreakIterator.getSentenceInstance(Locale.ROOT);
}
/** Returns the {@link PassageFormatter} to use for
@ -303,8 +294,6 @@ public class PostingsHighlighter {
IndexReaderContext readerContext = reader.getContext();
List<AtomicReaderContext> leaves = readerContext.leaves();
BreakIterator bi = (BreakIterator)breakIterator.clone();
// Make our own copy because we sort in-place:
int[] docids = new int[docidsIn.length];
System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
@ -330,7 +319,7 @@ public class PostingsHighlighter {
for(Term term : fieldTerms) {
terms[termUpto++] = term.bytes();
}
Map<Integer,String> fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages);
Map<Integer,String> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, maxPassages);
String[] result = new String[docids.length];
for (int j = 0; j < docidsIn.length; j++) {

View File

@ -21,7 +21,7 @@ import java.text.BreakIterator;
import java.text.CharacterIterator;
/** Just produces one single fragment for the entire text */
final class WholeBreakIterator extends BreakIterator {
public final class WholeBreakIterator extends BreakIterator {
private CharacterIterator text;
private int start;
private int end;

View File

@ -457,7 +457,12 @@ public class TestPostingsHighlighter extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter(10000, null);
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
@ -527,7 +532,7 @@ public class TestPostingsHighlighter extends LuceneTestCase {
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter(10000, null) {
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
@Override
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
assert fields.length == 1;
@ -536,6 +541,11 @@ public class TestPostingsHighlighter extends LuceneTestCase {
contents[0][0] = text;
return contents;
}
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
Query query = new TermQuery(new Term("body", "test"));
@ -636,7 +646,12 @@ public class TestPostingsHighlighter extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter(10000, null);
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[] {0};
String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body");

View File

@ -113,8 +113,7 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
for (int n = 1; n < maxTopN; n++) {
final FakePassageFormatter f1 = new FakePassageFormatter();
PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1,
BreakIterator.getSentenceInstance(Locale.ROOT)) {
PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
@Override
protected PassageFormatter getFormatter(String field) {
assertEquals("body", field);
@ -123,8 +122,7 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
};
final FakePassageFormatter f2 = new FakePassageFormatter();
PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1,
BreakIterator.getSentenceInstance(Locale.ROOT)) {
PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
@Override
protected PassageFormatter getFormatter(String field) {
assertEquals("body", field);
@ -269,8 +267,7 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter(10000,
BreakIterator.getSentenceInstance(Locale.ROOT)) {
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
@Override
protected PassageScorer getScorer(String field) {
return new PassageScorer(1.2f, 0, 87);
@ -309,8 +306,7 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter(10000,
BreakIterator.getSentenceInstance(Locale.ROOT)) {
PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
@Override
protected PassageScorer getScorer(String field) {
return new PassageScorer(0, 0.75f, 87);

View File

@ -80,7 +80,6 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn
@Override
public void init(PluginInfo info) {
Map<String,String> attributes = info.attributes;
BreakIterator breakIterator = BreakIterator.getSentenceInstance(Locale.ROOT);
// scorer parameters: k1/b/pivot
String k1 = attributes.get("k1");
@ -127,7 +126,7 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn
if (attributes.containsKey("maxLength")) {
maxLength = Integer.parseInt(attributes.get("maxLength"));
}
highlighter = new PostingsHighlighter(maxLength, breakIterator) {
highlighter = new PostingsHighlighter(maxLength) {
@Override
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
if (summarizeEmptyBoolean) {