mirror of https://github.com/apache/lucene.git
UnifiedHighlighter: new passageSortComparator option (#13276)
new 'passageSortComparator' option to allow sorting other than offset order
This commit is contained in:
parent
d078fb774d
commit
8773725ac0
|
@ -298,6 +298,8 @@ Improvements
|
|||
* GITHUB#13385: Add Intervals.noIntervals() method to produce an empty IntervalsSource.
|
||||
(Aniketh Jain, Uwe Schindler, Alan Woodward))
|
||||
|
||||
* GITHUB#13276: UnifiedHighlighter: new 'passageSortComparator' option to allow sorting other than offset order. (Seunghan Jung)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ public class FieldHighlighter {
|
|||
protected final int maxPassages;
|
||||
protected final int maxNoHighlightPassages;
|
||||
protected final PassageFormatter passageFormatter;
|
||||
protected final Comparator<Passage> passageSortComparator;
|
||||
|
||||
public FieldHighlighter(
|
||||
String field,
|
||||
|
@ -48,7 +49,8 @@ public class FieldHighlighter {
|
|||
PassageScorer passageScorer,
|
||||
int maxPassages,
|
||||
int maxNoHighlightPassages,
|
||||
PassageFormatter passageFormatter) {
|
||||
PassageFormatter passageFormatter,
|
||||
Comparator<Passage> passageSortComparator) {
|
||||
this.field = field;
|
||||
this.fieldOffsetStrategy = fieldOffsetStrategy;
|
||||
this.breakIterator = breakIterator;
|
||||
|
@ -56,6 +58,7 @@ public class FieldHighlighter {
|
|||
this.maxPassages = maxPassages;
|
||||
this.maxNoHighlightPassages = maxNoHighlightPassages;
|
||||
this.passageFormatter = passageFormatter;
|
||||
this.passageSortComparator = passageSortComparator;
|
||||
}
|
||||
|
||||
public String getField() {
|
||||
|
@ -191,8 +194,7 @@ public class FieldHighlighter {
|
|||
maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
|
||||
|
||||
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
|
||||
// sort in ascending order
|
||||
Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
|
||||
Arrays.sort(passages, passageSortComparator);
|
||||
return passages;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.text.BreakIterator;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -86,6 +87,7 @@ import org.apache.lucene.util.InPlaceMergeSorter;
|
|||
* <li>{@link #getBreakIterator(String)}: Customize how the text is divided into passages.
|
||||
* <li>{@link #getScorer(String)}: Customize how passages are ranked.
|
||||
* <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
|
||||
* <li>{@link #getPassageSortComparator(String)}: Customize how snippets are formatted.
|
||||
* </ul>
|
||||
*
|
||||
* <p>This is thread-safe, notwithstanding the setters.
|
||||
|
@ -113,6 +115,8 @@ public class UnifiedHighlighter {
|
|||
private static final PassageScorer DEFAULT_PASSAGE_SCORER = new PassageScorer();
|
||||
private static final PassageFormatter DEFAULT_PASSAGE_FORMATTER = new DefaultPassageFormatter();
|
||||
private static final int DEFAULT_MAX_HIGHLIGHT_PASSAGES = -1;
|
||||
private static final Comparator<Passage> DEFAULT_PASSAGE_SORT_COMPARATOR =
|
||||
Comparator.comparingInt(Passage::getStartOffset);
|
||||
|
||||
protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher
|
||||
|
||||
|
@ -151,6 +155,8 @@ public class UnifiedHighlighter {
|
|||
|
||||
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
|
||||
|
||||
private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;
|
||||
|
||||
/**
|
||||
* Constructs the highlighter with the given index searcher and analyzer.
|
||||
*
|
||||
|
@ -276,6 +282,7 @@ public class UnifiedHighlighter {
|
|||
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
|
||||
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
|
||||
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
|
||||
private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;
|
||||
|
||||
/**
|
||||
* Constructor for UH builder which accepts {@link IndexSearcher} and {@link Analyzer} objects.
|
||||
|
@ -402,6 +409,11 @@ public class UnifiedHighlighter {
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder withPassageSortComparator(Comparator<Passage> value) {
|
||||
this.passageSortComparator = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnifiedHighlighter build() {
|
||||
return new UnifiedHighlighter(this);
|
||||
}
|
||||
|
@ -463,6 +475,7 @@ public class UnifiedHighlighter {
|
|||
this.formatter = builder.formatter;
|
||||
this.maxNoHighlightPassages = builder.maxNoHighlightPassages;
|
||||
this.cacheFieldValCharsThreshold = builder.cacheFieldValCharsThreshold;
|
||||
this.passageSortComparator = builder.passageSortComparator;
|
||||
}
|
||||
|
||||
/** Extracts matching terms */
|
||||
|
@ -614,6 +627,11 @@ public class UnifiedHighlighter {
|
|||
return formatter;
|
||||
}
|
||||
|
||||
/** Returns the {@link Comparator} to use for finally sorting passages. */
|
||||
protected Comparator<Passage> getPassageSortComparator(String field) {
|
||||
return passageSortComparator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of leading passages (as delineated by the {@link BreakIterator}) when no
|
||||
* highlights could be found. If it's less than 0 (the default) then this defaults to the {@code
|
||||
|
@ -1119,7 +1137,8 @@ public class UnifiedHighlighter {
|
|||
getScorer(field),
|
||||
maxPassages,
|
||||
getMaxNoHighlightPassages(field),
|
||||
getFormatter(field));
|
||||
getFormatter(field),
|
||||
getPassageSortComparator(field));
|
||||
}
|
||||
|
||||
protected FieldHighlighter newFieldHighlighter(
|
||||
|
@ -1129,7 +1148,8 @@ public class UnifiedHighlighter {
|
|||
PassageScorer passageScorer,
|
||||
int maxPassages,
|
||||
int maxNoHighlightPassages,
|
||||
PassageFormatter passageFormatter) {
|
||||
PassageFormatter passageFormatter,
|
||||
Comparator<Passage> passageSortComparator) {
|
||||
return new FieldHighlighter(
|
||||
field,
|
||||
fieldOffsetStrategy,
|
||||
|
@ -1137,7 +1157,8 @@ public class UnifiedHighlighter {
|
|||
passageScorer,
|
||||
maxPassages,
|
||||
maxNoHighlightPassages,
|
||||
passageFormatter);
|
||||
passageFormatter,
|
||||
passageSortComparator);
|
||||
}
|
||||
|
||||
protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) {
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search.uhighlight.visibility;
|
|||
import java.io.IOException;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -129,6 +130,11 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
return super.getFormatter(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Comparator<Passage> getPassageSortComparator(String field) {
|
||||
return super.getPassageSortComparator(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Analyzer getIndexAnalyzer() {
|
||||
return super.getIndexAnalyzer();
|
||||
|
@ -186,7 +192,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
getScorer(field),
|
||||
maxPassages,
|
||||
getMaxNoHighlightPassages(field),
|
||||
getFormatter(field));
|
||||
getFormatter(field),
|
||||
getPassageSortComparator(field));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -240,7 +247,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
public void testFieldHiglighterExtensibility() {
|
||||
final String fieldName = "fieldName";
|
||||
FieldHighlighter fieldHighlighter =
|
||||
new FieldHighlighter(fieldName, null, null, null, 1, 1, null) {
|
||||
new FieldHighlighter(fieldName, null, null, null, 1, 1, null, null) {
|
||||
@Override
|
||||
protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
|
||||
return super.highlightOffsetsEnums(offsetsEnums);
|
||||
|
@ -262,7 +269,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
PassageScorer passageScorer,
|
||||
int maxPassages,
|
||||
int maxNoHighlightPassages,
|
||||
PassageFormatter passageFormatter) {
|
||||
PassageFormatter passageFormatter,
|
||||
Comparator<Passage> passageSortComparator) {
|
||||
super(
|
||||
field,
|
||||
fieldOffsetStrategy,
|
||||
|
@ -270,7 +278,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
passageScorer,
|
||||
maxPassages,
|
||||
maxNoHighlightPassages,
|
||||
passageFormatter);
|
||||
passageFormatter,
|
||||
passageSortComparator);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue