mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 11:35:14 +00:00
LUCENE-6334: fix FastVectorHighlighter when a phrase spans more than one value in a multi-valued field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693156 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
27aa993b15
commit
7023d92ca8
@ -281,6 +281,10 @@ Bug fixes
|
|||||||
* LUCENE-6696: Fix FilterDirectoryReader.close() to never close the
|
* LUCENE-6696: Fix FilterDirectoryReader.close() to never close the
|
||||||
underlying reader several times. (Adrien Grand)
|
underlying reader several times. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-6334: FastVectorHighlighter failed to highlight phrases across
|
||||||
|
more than one value in a multi-valued field. (Chris Earle, Nik Everett
|
||||||
|
via Mike McCandless)
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
Changes in Runtime Behavior
|
||||||
|
|
||||||
* LUCENE-6501: The subreader structure in ParallelCompositeReader
|
* LUCENE-6501: The subreader structure in ParallelCompositeReader
|
||||||
|
@ -268,10 +268,39 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||||||
Iterator<Toffs> toffsIterator = subInfo.getTermsOffsets().iterator();
|
Iterator<Toffs> toffsIterator = subInfo.getTermsOffsets().iterator();
|
||||||
while (toffsIterator.hasNext()) {
|
while (toffsIterator.hasNext()) {
|
||||||
Toffs toffs = toffsIterator.next();
|
Toffs toffs = toffsIterator.next();
|
||||||
if (toffs.getStartOffset() >= fieldStart && toffs.getEndOffset() <= fieldEnd) {
|
if (toffs.getStartOffset() >= fieldEnd) {
|
||||||
|
// We've gone past this value so its not worth iterating any more.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
boolean startsAfterField = toffs.getStartOffset() >= fieldStart;
|
||||||
|
boolean endsBeforeField = toffs.getEndOffset() < fieldEnd;
|
||||||
|
if (startsAfterField && endsBeforeField) {
|
||||||
|
// The Toff is entirely within this value.
|
||||||
toffsList.add(toffs);
|
toffsList.add(toffs);
|
||||||
toffsIterator.remove();
|
toffsIterator.remove();
|
||||||
|
} else if (startsAfterField) {
|
||||||
|
/*
|
||||||
|
* The Toffs starts within this value but ends after this value
|
||||||
|
* so we clamp the returned Toffs to this value and leave the
|
||||||
|
* Toffs in the iterator for the next value of this field.
|
||||||
|
*/
|
||||||
|
toffsList.add(new Toffs(toffs.getStartOffset(), fieldEnd - 1));
|
||||||
|
} else if (endsBeforeField) {
|
||||||
|
/*
|
||||||
|
* The Toffs starts before this value but ends in this value
|
||||||
|
* which means we're really continuing from where we left off
|
||||||
|
* above. Since we use the remainder of the offset we can remove
|
||||||
|
* it from the iterator.
|
||||||
|
*/
|
||||||
|
toffsList.add(new Toffs(fieldStart, toffs.getEndOffset()));
|
||||||
|
toffsIterator.remove();
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* The Toffs spans the whole value so we clamp on both sides.
|
||||||
|
* This is basically a combination of both arms of the loop
|
||||||
|
* above.
|
||||||
|
*/
|
||||||
|
toffsList.add(new Toffs(fieldStart, fieldEnd - 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!toffsList.isEmpty()) {
|
if (!toffsList.isEmpty()) {
|
||||||
|
@ -574,6 +574,72 @@ public class FastVectorHighlighterTest extends LuceneTestCase {
|
|||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testPhrasesSpanningFieldValues() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
// positionIncrementGap is 0 so the pharse is found across multiple field
|
||||||
|
// values.
|
||||||
|
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||||
|
type.setStoreTermVectorOffsets(true);
|
||||||
|
type.setStoreTermVectorPositions(true);
|
||||||
|
type.setStoreTermVectors(true);
|
||||||
|
type.freeze();
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add( new Field( "field", "one two three five", type ) );
|
||||||
|
doc.add( new Field( "field", "two three four", type ) );
|
||||||
|
doc.add( new Field( "field", "five six five", type ) );
|
||||||
|
doc.add( new Field( "field", "six seven eight nine eight nine eight " +
|
||||||
|
"nine eight nine eight nine eight nine", type ) );
|
||||||
|
doc.add( new Field( "field", "eight nine", type ) );
|
||||||
|
doc.add( new Field( "field", "ten eleven", type ) );
|
||||||
|
doc.add( new Field( "field", "twelve thirteen", type ) );
|
||||||
|
writer.addDocument(doc);
|
||||||
|
|
||||||
|
BaseFragListBuilder fragListBuilder = new SimpleFragListBuilder();
|
||||||
|
BaseFragmentsBuilder fragmentsBuilder = new SimpleFragmentsBuilder();
|
||||||
|
fragmentsBuilder.setDiscreteMultiValueHighlighting(true);
|
||||||
|
FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, fragListBuilder, fragmentsBuilder);
|
||||||
|
IndexReader reader = DirectoryReader.open(writer, true);
|
||||||
|
int docId = 0;
|
||||||
|
|
||||||
|
// Phrase that spans a field value
|
||||||
|
Query q = new PhraseQuery("field", "four", "five");
|
||||||
|
FieldQuery fieldQuery = highlighter.getFieldQuery(q, reader);
|
||||||
|
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
|
||||||
|
assertEquals("two three <b>four</b>", bestFragments[0]);
|
||||||
|
assertEquals("<b>five</b> six five", bestFragments[1]);
|
||||||
|
assertEquals(2, bestFragments.length);
|
||||||
|
|
||||||
|
// Phrase that ends at a field value
|
||||||
|
q = new PhraseQuery("field", "three", "five");
|
||||||
|
fieldQuery = highlighter.getFieldQuery(q, reader);
|
||||||
|
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
|
||||||
|
assertEquals("one two <b>three five</b>", bestFragments[0]);
|
||||||
|
assertEquals(1, bestFragments.length);
|
||||||
|
|
||||||
|
// Phrase that spans across three values
|
||||||
|
q = new PhraseQuery("field", "nine", "ten", "eleven", "twelve");
|
||||||
|
fieldQuery = highlighter.getFieldQuery(q, reader);
|
||||||
|
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
|
||||||
|
assertEquals("eight <b>nine</b>", bestFragments[0]);
|
||||||
|
assertEquals("<b>ten eleven</b>", bestFragments[1]);
|
||||||
|
assertEquals("<b>twelve</b> thirteen", bestFragments[2]);
|
||||||
|
assertEquals(3, bestFragments.length);
|
||||||
|
|
||||||
|
// Term query that appears in multiple values
|
||||||
|
q = new TermQuery(new Term("field", "two"));
|
||||||
|
fieldQuery = highlighter.getFieldQuery(q, reader);
|
||||||
|
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
|
||||||
|
assertEquals("one <b>two</b> three five", bestFragments[0]);
|
||||||
|
assertEquals("<b>two</b> three four", bestFragments[1]);
|
||||||
|
assertEquals(2, bestFragments.length);
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
private void matchedFieldsTestCase( String fieldValue, String expected, Query... queryClauses ) throws IOException {
|
private void matchedFieldsTestCase( String fieldValue, String expected, Query... queryClauses ) throws IOException {
|
||||||
matchedFieldsTestCase( true, true, fieldValue, expected, queryClauses );
|
matchedFieldsTestCase( true, true, fieldValue, expected, queryClauses );
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user