mirror of https://github.com/apache/lucene.git
UnifiedHighlighter: new passageSortComparator option (#13276)
new 'passageSortComparator' option to allow sorting other than offset order
This commit is contained in:
parent
d078fb774d
commit
8773725ac0
|
@ -298,6 +298,8 @@ Improvements
|
||||||
* GITHUB#13385: Add Intervals.noIntervals() method to produce an empty IntervalsSource.
|
* GITHUB#13385: Add Intervals.noIntervals() method to produce an empty IntervalsSource.
|
||||||
(Aniketh Jain, Uwe Schindler, Alan Woodward))
|
(Aniketh Jain, Uwe Schindler, Alan Woodward))
|
||||||
|
|
||||||
|
* GITHUB#13276: UnifiedHighlighter: new 'passageSortComparator' option to allow sorting other than offset order. (Seunghan Jung)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,7 @@ public class FieldHighlighter {
|
||||||
protected final int maxPassages;
|
protected final int maxPassages;
|
||||||
protected final int maxNoHighlightPassages;
|
protected final int maxNoHighlightPassages;
|
||||||
protected final PassageFormatter passageFormatter;
|
protected final PassageFormatter passageFormatter;
|
||||||
|
protected final Comparator<Passage> passageSortComparator;
|
||||||
|
|
||||||
public FieldHighlighter(
|
public FieldHighlighter(
|
||||||
String field,
|
String field,
|
||||||
|
@ -48,7 +49,8 @@ public class FieldHighlighter {
|
||||||
PassageScorer passageScorer,
|
PassageScorer passageScorer,
|
||||||
int maxPassages,
|
int maxPassages,
|
||||||
int maxNoHighlightPassages,
|
int maxNoHighlightPassages,
|
||||||
PassageFormatter passageFormatter) {
|
PassageFormatter passageFormatter,
|
||||||
|
Comparator<Passage> passageSortComparator) {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.fieldOffsetStrategy = fieldOffsetStrategy;
|
this.fieldOffsetStrategy = fieldOffsetStrategy;
|
||||||
this.breakIterator = breakIterator;
|
this.breakIterator = breakIterator;
|
||||||
|
@ -56,6 +58,7 @@ public class FieldHighlighter {
|
||||||
this.maxPassages = maxPassages;
|
this.maxPassages = maxPassages;
|
||||||
this.maxNoHighlightPassages = maxNoHighlightPassages;
|
this.maxNoHighlightPassages = maxNoHighlightPassages;
|
||||||
this.passageFormatter = passageFormatter;
|
this.passageFormatter = passageFormatter;
|
||||||
|
this.passageSortComparator = passageSortComparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getField() {
|
public String getField() {
|
||||||
|
@ -191,8 +194,7 @@ public class FieldHighlighter {
|
||||||
maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
|
maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
|
||||||
|
|
||||||
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
|
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
|
||||||
// sort in ascending order
|
Arrays.sort(passages, passageSortComparator);
|
||||||
Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
|
|
||||||
return passages;
|
return passages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.text.BreakIterator;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -86,6 +87,7 @@ import org.apache.lucene.util.InPlaceMergeSorter;
|
||||||
* <li>{@link #getBreakIterator(String)}: Customize how the text is divided into passages.
|
* <li>{@link #getBreakIterator(String)}: Customize how the text is divided into passages.
|
||||||
* <li>{@link #getScorer(String)}: Customize how passages are ranked.
|
* <li>{@link #getScorer(String)}: Customize how passages are ranked.
|
||||||
* <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
|
* <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
|
||||||
|
* <li>{@link #getPassageSortComparator(String)}: Customize how snippets are formatted.
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* <p>This is thread-safe, notwithstanding the setters.
|
* <p>This is thread-safe, notwithstanding the setters.
|
||||||
|
@ -113,6 +115,8 @@ public class UnifiedHighlighter {
|
||||||
private static final PassageScorer DEFAULT_PASSAGE_SCORER = new PassageScorer();
|
private static final PassageScorer DEFAULT_PASSAGE_SCORER = new PassageScorer();
|
||||||
private static final PassageFormatter DEFAULT_PASSAGE_FORMATTER = new DefaultPassageFormatter();
|
private static final PassageFormatter DEFAULT_PASSAGE_FORMATTER = new DefaultPassageFormatter();
|
||||||
private static final int DEFAULT_MAX_HIGHLIGHT_PASSAGES = -1;
|
private static final int DEFAULT_MAX_HIGHLIGHT_PASSAGES = -1;
|
||||||
|
private static final Comparator<Passage> DEFAULT_PASSAGE_SORT_COMPARATOR =
|
||||||
|
Comparator.comparingInt(Passage::getStartOffset);
|
||||||
|
|
||||||
protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher
|
protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher
|
||||||
|
|
||||||
|
@ -151,6 +155,8 @@ public class UnifiedHighlighter {
|
||||||
|
|
||||||
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
|
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
|
||||||
|
|
||||||
|
private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs the highlighter with the given index searcher and analyzer.
|
* Constructs the highlighter with the given index searcher and analyzer.
|
||||||
*
|
*
|
||||||
|
@ -276,6 +282,7 @@ public class UnifiedHighlighter {
|
||||||
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
|
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
|
||||||
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
|
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
|
||||||
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
|
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
|
||||||
|
private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for UH builder which accepts {@link IndexSearcher} and {@link Analyzer} objects.
|
* Constructor for UH builder which accepts {@link IndexSearcher} and {@link Analyzer} objects.
|
||||||
|
@ -402,6 +409,11 @@ public class UnifiedHighlighter {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder withPassageSortComparator(Comparator<Passage> value) {
|
||||||
|
this.passageSortComparator = value;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public UnifiedHighlighter build() {
|
public UnifiedHighlighter build() {
|
||||||
return new UnifiedHighlighter(this);
|
return new UnifiedHighlighter(this);
|
||||||
}
|
}
|
||||||
|
@ -463,6 +475,7 @@ public class UnifiedHighlighter {
|
||||||
this.formatter = builder.formatter;
|
this.formatter = builder.formatter;
|
||||||
this.maxNoHighlightPassages = builder.maxNoHighlightPassages;
|
this.maxNoHighlightPassages = builder.maxNoHighlightPassages;
|
||||||
this.cacheFieldValCharsThreshold = builder.cacheFieldValCharsThreshold;
|
this.cacheFieldValCharsThreshold = builder.cacheFieldValCharsThreshold;
|
||||||
|
this.passageSortComparator = builder.passageSortComparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Extracts matching terms */
|
/** Extracts matching terms */
|
||||||
|
@ -614,6 +627,11 @@ public class UnifiedHighlighter {
|
||||||
return formatter;
|
return formatter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns the {@link Comparator} to use for finally sorting passages. */
|
||||||
|
protected Comparator<Passage> getPassageSortComparator(String field) {
|
||||||
|
return passageSortComparator;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the number of leading passages (as delineated by the {@link BreakIterator}) when no
|
* Returns the number of leading passages (as delineated by the {@link BreakIterator}) when no
|
||||||
* highlights could be found. If it's less than 0 (the default) then this defaults to the {@code
|
* highlights could be found. If it's less than 0 (the default) then this defaults to the {@code
|
||||||
|
@ -1119,7 +1137,8 @@ public class UnifiedHighlighter {
|
||||||
getScorer(field),
|
getScorer(field),
|
||||||
maxPassages,
|
maxPassages,
|
||||||
getMaxNoHighlightPassages(field),
|
getMaxNoHighlightPassages(field),
|
||||||
getFormatter(field));
|
getFormatter(field),
|
||||||
|
getPassageSortComparator(field));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected FieldHighlighter newFieldHighlighter(
|
protected FieldHighlighter newFieldHighlighter(
|
||||||
|
@ -1129,7 +1148,8 @@ public class UnifiedHighlighter {
|
||||||
PassageScorer passageScorer,
|
PassageScorer passageScorer,
|
||||||
int maxPassages,
|
int maxPassages,
|
||||||
int maxNoHighlightPassages,
|
int maxNoHighlightPassages,
|
||||||
PassageFormatter passageFormatter) {
|
PassageFormatter passageFormatter,
|
||||||
|
Comparator<Passage> passageSortComparator) {
|
||||||
return new FieldHighlighter(
|
return new FieldHighlighter(
|
||||||
field,
|
field,
|
||||||
fieldOffsetStrategy,
|
fieldOffsetStrategy,
|
||||||
|
@ -1137,7 +1157,8 @@ public class UnifiedHighlighter {
|
||||||
passageScorer,
|
passageScorer,
|
||||||
maxPassages,
|
maxPassages,
|
||||||
maxNoHighlightPassages,
|
maxNoHighlightPassages,
|
||||||
passageFormatter);
|
passageFormatter,
|
||||||
|
passageSortComparator);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) {
|
protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) {
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search.uhighlight.visibility;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.text.BreakIterator;
|
import java.text.BreakIterator;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -129,6 +130,11 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
||||||
return super.getFormatter(field);
|
return super.getFormatter(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Comparator<Passage> getPassageSortComparator(String field) {
|
||||||
|
return super.getPassageSortComparator(field);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Analyzer getIndexAnalyzer() {
|
public Analyzer getIndexAnalyzer() {
|
||||||
return super.getIndexAnalyzer();
|
return super.getIndexAnalyzer();
|
||||||
|
@ -186,7 +192,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
||||||
getScorer(field),
|
getScorer(field),
|
||||||
maxPassages,
|
maxPassages,
|
||||||
getMaxNoHighlightPassages(field),
|
getMaxNoHighlightPassages(field),
|
||||||
getFormatter(field));
|
getFormatter(field),
|
||||||
|
getPassageSortComparator(field));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -240,7 +247,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
||||||
public void testFieldHiglighterExtensibility() {
|
public void testFieldHiglighterExtensibility() {
|
||||||
final String fieldName = "fieldName";
|
final String fieldName = "fieldName";
|
||||||
FieldHighlighter fieldHighlighter =
|
FieldHighlighter fieldHighlighter =
|
||||||
new FieldHighlighter(fieldName, null, null, null, 1, 1, null) {
|
new FieldHighlighter(fieldName, null, null, null, 1, 1, null, null) {
|
||||||
@Override
|
@Override
|
||||||
protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
|
protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
|
||||||
return super.highlightOffsetsEnums(offsetsEnums);
|
return super.highlightOffsetsEnums(offsetsEnums);
|
||||||
|
@ -262,7 +269,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
||||||
PassageScorer passageScorer,
|
PassageScorer passageScorer,
|
||||||
int maxPassages,
|
int maxPassages,
|
||||||
int maxNoHighlightPassages,
|
int maxNoHighlightPassages,
|
||||||
PassageFormatter passageFormatter) {
|
PassageFormatter passageFormatter,
|
||||||
|
Comparator<Passage> passageSortComparator) {
|
||||||
super(
|
super(
|
||||||
field,
|
field,
|
||||||
fieldOffsetStrategy,
|
fieldOffsetStrategy,
|
||||||
|
@ -270,7 +278,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
||||||
passageScorer,
|
passageScorer,
|
||||||
maxPassages,
|
maxPassages,
|
||||||
maxNoHighlightPassages,
|
maxNoHighlightPassages,
|
||||||
passageFormatter);
|
passageFormatter,
|
||||||
|
passageSortComparator);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue