UnifiedHighlighter: new passageSortComparator option ()

new 'passageSortComparator' option to allow sorting other than offset order
This commit is contained in:
정승한(Seunghan Jung)/Search Platform 2024-05-24 02:45:17 +09:00 committed by GitHub
parent d078fb774d
commit 8773725ac0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 44 additions and 10 deletions
lucene
CHANGES.txt
highlighter/src
java/org/apache/lucene/search/uhighlight
test/org/apache/lucene/search/uhighlight/visibility

View File

@ -298,6 +298,8 @@ Improvements
* GITHUB#13385: Add Intervals.noIntervals() method to produce an empty IntervalsSource. * GITHUB#13385: Add Intervals.noIntervals() method to produce an empty IntervalsSource.
(Aniketh Jain, Uwe Schindler, Alan Woodward)) (Aniketh Jain, Uwe Schindler, Alan Woodward))
* GITHUB#13276: UnifiedHighlighter: new 'passageSortComparator' option to allow sorting other than offset order. (Seunghan Jung)
Optimizations Optimizations
--------------------- ---------------------

View File

@ -40,6 +40,7 @@ public class FieldHighlighter {
protected final int maxPassages; protected final int maxPassages;
protected final int maxNoHighlightPassages; protected final int maxNoHighlightPassages;
protected final PassageFormatter passageFormatter; protected final PassageFormatter passageFormatter;
protected final Comparator<Passage> passageSortComparator;
public FieldHighlighter( public FieldHighlighter(
String field, String field,
@ -48,7 +49,8 @@ public class FieldHighlighter {
PassageScorer passageScorer, PassageScorer passageScorer,
int maxPassages, int maxPassages,
int maxNoHighlightPassages, int maxNoHighlightPassages,
PassageFormatter passageFormatter) { PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator) {
this.field = field; this.field = field;
this.fieldOffsetStrategy = fieldOffsetStrategy; this.fieldOffsetStrategy = fieldOffsetStrategy;
this.breakIterator = breakIterator; this.breakIterator = breakIterator;
@ -56,6 +58,7 @@ public class FieldHighlighter {
this.maxPassages = maxPassages; this.maxPassages = maxPassages;
this.maxNoHighlightPassages = maxNoHighlightPassages; this.maxNoHighlightPassages = maxNoHighlightPassages;
this.passageFormatter = passageFormatter; this.passageFormatter = passageFormatter;
this.passageSortComparator = passageSortComparator;
} }
public String getField() { public String getField() {
@ -191,8 +194,7 @@ public class FieldHighlighter {
maybeAddPassage(passageQueue, passageScorer, passage, contentLength); maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]); Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
// sort in ascending order Arrays.sort(passages, passageSortComparator);
Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
return passages; return passages;
} }

View File

@ -21,6 +21,7 @@ import java.text.BreakIterator;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@ -86,6 +87,7 @@ import org.apache.lucene.util.InPlaceMergeSorter;
* <li>{@link #getBreakIterator(String)}: Customize how the text is divided into passages. * <li>{@link #getBreakIterator(String)}: Customize how the text is divided into passages.
* <li>{@link #getScorer(String)}: Customize how passages are ranked. * <li>{@link #getScorer(String)}: Customize how passages are ranked.
* <li>{@link #getFormatter(String)}: Customize how snippets are formatted. * <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
* <li>{@link #getPassageSortComparator(String)}: Customize how snippets are formatted.
* </ul> * </ul>
* *
* <p>This is thread-safe, notwithstanding the setters. * <p>This is thread-safe, notwithstanding the setters.
@ -113,6 +115,8 @@ public class UnifiedHighlighter {
private static final PassageScorer DEFAULT_PASSAGE_SCORER = new PassageScorer(); private static final PassageScorer DEFAULT_PASSAGE_SCORER = new PassageScorer();
private static final PassageFormatter DEFAULT_PASSAGE_FORMATTER = new DefaultPassageFormatter(); private static final PassageFormatter DEFAULT_PASSAGE_FORMATTER = new DefaultPassageFormatter();
private static final int DEFAULT_MAX_HIGHLIGHT_PASSAGES = -1; private static final int DEFAULT_MAX_HIGHLIGHT_PASSAGES = -1;
private static final Comparator<Passage> DEFAULT_PASSAGE_SORT_COMPARATOR =
Comparator.comparingInt(Passage::getStartOffset);
protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher
@ -151,6 +155,8 @@ public class UnifiedHighlighter {
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD; private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;
/** /**
* Constructs the highlighter with the given index searcher and analyzer. * Constructs the highlighter with the given index searcher and analyzer.
* *
@ -276,6 +282,7 @@ public class UnifiedHighlighter {
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER; private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES; private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD; private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;
/** /**
* Constructor for UH builder which accepts {@link IndexSearcher} and {@link Analyzer} objects. * Constructor for UH builder which accepts {@link IndexSearcher} and {@link Analyzer} objects.
@ -402,6 +409,11 @@ public class UnifiedHighlighter {
return this; return this;
} }
public Builder withPassageSortComparator(Comparator<Passage> value) {
this.passageSortComparator = value;
return this;
}
public UnifiedHighlighter build() { public UnifiedHighlighter build() {
return new UnifiedHighlighter(this); return new UnifiedHighlighter(this);
} }
@ -463,6 +475,7 @@ public class UnifiedHighlighter {
this.formatter = builder.formatter; this.formatter = builder.formatter;
this.maxNoHighlightPassages = builder.maxNoHighlightPassages; this.maxNoHighlightPassages = builder.maxNoHighlightPassages;
this.cacheFieldValCharsThreshold = builder.cacheFieldValCharsThreshold; this.cacheFieldValCharsThreshold = builder.cacheFieldValCharsThreshold;
this.passageSortComparator = builder.passageSortComparator;
} }
/** Extracts matching terms */ /** Extracts matching terms */
@ -614,6 +627,11 @@ public class UnifiedHighlighter {
return formatter; return formatter;
} }
/** Returns the {@link Comparator} to use for finally sorting passages. */
protected Comparator<Passage> getPassageSortComparator(String field) {
return passageSortComparator;
}
/** /**
* Returns the number of leading passages (as delineated by the {@link BreakIterator}) when no * Returns the number of leading passages (as delineated by the {@link BreakIterator}) when no
* highlights could be found. If it's less than 0 (the default) then this defaults to the {@code * highlights could be found. If it's less than 0 (the default) then this defaults to the {@code
@ -1119,7 +1137,8 @@ public class UnifiedHighlighter {
getScorer(field), getScorer(field),
maxPassages, maxPassages,
getMaxNoHighlightPassages(field), getMaxNoHighlightPassages(field),
getFormatter(field)); getFormatter(field),
getPassageSortComparator(field));
} }
protected FieldHighlighter newFieldHighlighter( protected FieldHighlighter newFieldHighlighter(
@ -1129,7 +1148,8 @@ public class UnifiedHighlighter {
PassageScorer passageScorer, PassageScorer passageScorer,
int maxPassages, int maxPassages,
int maxNoHighlightPassages, int maxNoHighlightPassages,
PassageFormatter passageFormatter) { PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator) {
return new FieldHighlighter( return new FieldHighlighter(
field, field,
fieldOffsetStrategy, fieldOffsetStrategy,
@ -1137,7 +1157,8 @@ public class UnifiedHighlighter {
passageScorer, passageScorer,
maxPassages, maxPassages,
maxNoHighlightPassages, maxNoHighlightPassages,
passageFormatter); passageFormatter,
passageSortComparator);
} }
protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) { protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search.uhighlight.visibility;
import java.io.IOException; import java.io.IOException;
import java.text.BreakIterator; import java.text.BreakIterator;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -129,6 +130,11 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
return super.getFormatter(field); return super.getFormatter(field);
} }
@Override
protected Comparator<Passage> getPassageSortComparator(String field) {
return super.getPassageSortComparator(field);
}
@Override @Override
public Analyzer getIndexAnalyzer() { public Analyzer getIndexAnalyzer() {
return super.getIndexAnalyzer(); return super.getIndexAnalyzer();
@ -186,7 +192,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
getScorer(field), getScorer(field),
maxPassages, maxPassages,
getMaxNoHighlightPassages(field), getMaxNoHighlightPassages(field),
getFormatter(field)); getFormatter(field),
getPassageSortComparator(field));
} }
@Override @Override
@ -240,7 +247,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
public void testFieldHiglighterExtensibility() { public void testFieldHiglighterExtensibility() {
final String fieldName = "fieldName"; final String fieldName = "fieldName";
FieldHighlighter fieldHighlighter = FieldHighlighter fieldHighlighter =
new FieldHighlighter(fieldName, null, null, null, 1, 1, null) { new FieldHighlighter(fieldName, null, null, null, 1, 1, null, null) {
@Override @Override
protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException { protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
return super.highlightOffsetsEnums(offsetsEnums); return super.highlightOffsetsEnums(offsetsEnums);
@ -262,7 +269,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
PassageScorer passageScorer, PassageScorer passageScorer,
int maxPassages, int maxPassages,
int maxNoHighlightPassages, int maxNoHighlightPassages,
PassageFormatter passageFormatter) { PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator) {
super( super(
field, field,
fieldOffsetStrategy, fieldOffsetStrategy,
@ -270,7 +278,8 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
passageScorer, passageScorer,
maxPassages, maxPassages,
maxNoHighlightPassages, maxNoHighlightPassages,
passageFormatter); passageFormatter,
passageSortComparator);
} }
@Override @Override