UnifiedHighlighter: new passageSortComparator option (#13276)

new 'passageSortComparator' option to allow sorting other than offset order
This commit is contained in:
정승한(Seunghan Jung)/Search Platform 2024-05-24 02:45:17 +09:00 committed by GitHub
parent d078fb774d
commit 8773725ac0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 44 additions and 10 deletions

View File

@ -298,6 +298,8 @@ Improvements
* GITHUB#13385: Add Intervals.noIntervals() method to produce an empty IntervalsSource.
(Aniketh Jain, Uwe Schindler, Alan Woodward))
* GITHUB#13276: UnifiedHighlighter: new 'passageSortComparator' option to allow sorting other than offset order. (Seunghan Jung)
Optimizations
---------------------

View File

@ -40,6 +40,7 @@ public class FieldHighlighter {
protected final int maxPassages;
protected final int maxNoHighlightPassages;
protected final PassageFormatter passageFormatter;
protected final Comparator<Passage> passageSortComparator;
public FieldHighlighter(
String field,
@ -48,7 +49,8 @@ public class FieldHighlighter {
PassageScorer passageScorer,
int maxPassages,
int maxNoHighlightPassages,
PassageFormatter passageFormatter) {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator) {
this.field = field;
this.fieldOffsetStrategy = fieldOffsetStrategy;
this.breakIterator = breakIterator;
@ -56,6 +58,7 @@ public class FieldHighlighter {
this.maxPassages = maxPassages;
this.maxNoHighlightPassages = maxNoHighlightPassages;
this.passageFormatter = passageFormatter;
this.passageSortComparator = passageSortComparator;
}
public String getField() {
@ -191,8 +194,7 @@ public class FieldHighlighter {
maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
// sort in ascending order
Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
Arrays.sort(passages, passageSortComparator);
return passages;
}

View File

@ -21,6 +21,7 @@ import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
@ -86,6 +87,7 @@ import org.apache.lucene.util.InPlaceMergeSorter;
* <li>{@link #getBreakIterator(String)}: Customize how the text is divided into passages.
* <li>{@link #getScorer(String)}: Customize how passages are ranked.
* <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
* <li>{@link #getPassageSortComparator(String)}: Customize how snippets are formatted.
* </ul>
*
* <p>This is thread-safe, notwithstanding the setters.
@ -113,6 +115,8 @@ public class UnifiedHighlighter {
private static final PassageScorer DEFAULT_PASSAGE_SCORER = new PassageScorer();
private static final PassageFormatter DEFAULT_PASSAGE_FORMATTER = new DefaultPassageFormatter();
private static final int DEFAULT_MAX_HIGHLIGHT_PASSAGES = -1;
private static final Comparator<Passage> DEFAULT_PASSAGE_SORT_COMPARATOR =
Comparator.comparingInt(Passage::getStartOffset);
protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher
@ -151,6 +155,8 @@ public class UnifiedHighlighter {
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;
/**
* Constructs the highlighter with the given index searcher and analyzer.
*
@ -276,6 +282,7 @@ public class UnifiedHighlighter {
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;
/**
* Constructor for UH builder which accepts {@link IndexSearcher} and {@link Analyzer} objects.
@ -402,6 +409,11 @@ public class UnifiedHighlighter {
return this;
}
public Builder withPassageSortComparator(Comparator<Passage> value) {
this.passageSortComparator = value;
return this;
}
public UnifiedHighlighter build() {
return new UnifiedHighlighter(this);
}
@ -463,6 +475,7 @@ public class UnifiedHighlighter {
this.formatter = builder.formatter;
this.maxNoHighlightPassages = builder.maxNoHighlightPassages;
this.cacheFieldValCharsThreshold = builder.cacheFieldValCharsThreshold;
this.passageSortComparator = builder.passageSortComparator;
}
/** Extracts matching terms */
@ -614,6 +627,11 @@ public class UnifiedHighlighter {
return formatter;
}
/** Returns the {@link Comparator} to use for finally sorting passages. */
protected Comparator<Passage> getPassageSortComparator(String field) {
return passageSortComparator;
}
/**
* Returns the number of leading passages (as delineated by the {@link BreakIterator}) when no
* highlights could be found. If it's less than 0 (the default) then this defaults to the {@code
@ -1119,7 +1137,8 @@ public class UnifiedHighlighter {
getScorer(field),
maxPassages,
getMaxNoHighlightPassages(field),
getFormatter(field));
getFormatter(field),
getPassageSortComparator(field));
}
protected FieldHighlighter newFieldHighlighter(
@ -1129,7 +1148,8 @@ public class UnifiedHighlighter {
PassageScorer passageScorer,
int maxPassages,
int maxNoHighlightPassages,
PassageFormatter passageFormatter) {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator) {
return new FieldHighlighter(
field,
fieldOffsetStrategy,
@ -1137,7 +1157,8 @@ public class UnifiedHighlighter {
passageScorer,
maxPassages,
maxNoHighlightPassages,
passageFormatter);
passageFormatter,
passageSortComparator);
}
protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search.uhighlight.visibility;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -129,6 +130,11 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
return super.getFormatter(field);
}
@Override
protected Comparator<Passage> getPassageSortComparator(String field) {
return super.getPassageSortComparator(field);
}
@Override
public Analyzer getIndexAnalyzer() {
return super.getIndexAnalyzer();
@ -186,7 +192,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
getScorer(field),
maxPassages,
getMaxNoHighlightPassages(field),
getFormatter(field));
getFormatter(field),
getPassageSortComparator(field));
}
@Override
@ -240,7 +247,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
public void testFieldHiglighterExtensibility() {
final String fieldName = "fieldName";
FieldHighlighter fieldHighlighter =
new FieldHighlighter(fieldName, null, null, null, 1, 1, null) {
new FieldHighlighter(fieldName, null, null, null, 1, 1, null, null) {
@Override
protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
return super.highlightOffsetsEnums(offsetsEnums);
@ -262,7 +269,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
PassageScorer passageScorer,
int maxPassages,
int maxNoHighlightPassages,
PassageFormatter passageFormatter) {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator) {
super(
field,
fieldOffsetStrategy,
@ -270,7 +278,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
passageScorer,
maxPassages,
maxNoHighlightPassages,
passageFormatter);
passageFormatter,
passageSortComparator);
}
@Override