mirror of https://github.com/apache/lucene.git
LUCENE-6334: fix FastVectorHighlighter when a phrase spans more than one value in a multi-valued field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693156 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
27aa993b15
commit
7023d92ca8
|
@ -281,6 +281,10 @@ Bug fixes
|
|||
* LUCENE-6696: Fix FilterDirectoryReader.close() to never close the
|
||||
underlying reader several times. (Adrien Grand)
|
||||
|
||||
* LUCENE-6334: FastVectorHighlighter failed to highlight phrases across
|
||||
more than one value in a multi-valued field. (Chris Earle, Nik Everett
|
||||
via Mike McCandless)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-6501: The subreader structure in ParallelCompositeReader
|
||||
|
|
|
@ -268,10 +268,39 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
Iterator<Toffs> toffsIterator = subInfo.getTermsOffsets().iterator();
|
||||
while (toffsIterator.hasNext()) {
|
||||
Toffs toffs = toffsIterator.next();
|
||||
if (toffs.getStartOffset() >= fieldStart && toffs.getEndOffset() <= fieldEnd) {
|
||||
|
||||
if (toffs.getStartOffset() >= fieldEnd) {
|
||||
// We've gone past this value so its not worth iterating any more.
|
||||
break;
|
||||
}
|
||||
boolean startsAfterField = toffs.getStartOffset() >= fieldStart;
|
||||
boolean endsBeforeField = toffs.getEndOffset() < fieldEnd;
|
||||
if (startsAfterField && endsBeforeField) {
|
||||
// The Toff is entirely within this value.
|
||||
toffsList.add(toffs);
|
||||
toffsIterator.remove();
|
||||
} else if (startsAfterField) {
|
||||
/*
|
||||
* The Toffs starts within this value but ends after this value
|
||||
* so we clamp the returned Toffs to this value and leave the
|
||||
* Toffs in the iterator for the next value of this field.
|
||||
*/
|
||||
toffsList.add(new Toffs(toffs.getStartOffset(), fieldEnd - 1));
|
||||
} else if (endsBeforeField) {
|
||||
/*
|
||||
* The Toffs starts before this value but ends in this value
|
||||
* which means we're really continuing from where we left off
|
||||
* above. Since we use the remainder of the offset we can remove
|
||||
* it from the iterator.
|
||||
*/
|
||||
toffsList.add(new Toffs(fieldStart, toffs.getEndOffset()));
|
||||
toffsIterator.remove();
|
||||
} else {
|
||||
/*
|
||||
* The Toffs spans the whole value so we clamp on both sides.
|
||||
* This is basically a combination of both arms of the loop
|
||||
* above.
|
||||
*/
|
||||
toffsList.add(new Toffs(fieldStart, fieldEnd - 1));
|
||||
}
|
||||
}
|
||||
if (!toffsList.isEmpty()) {
|
||||
|
|
|
@ -574,6 +574,72 @@ public class FastVectorHighlighterTest extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testPhrasesSpanningFieldValues() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
// positionIncrementGap is 0 so the pharse is found across multiple field
|
||||
// values.
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||
type.setStoreTermVectorOffsets(true);
|
||||
type.setStoreTermVectorPositions(true);
|
||||
type.setStoreTermVectors(true);
|
||||
type.freeze();
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add( new Field( "field", "one two three five", type ) );
|
||||
doc.add( new Field( "field", "two three four", type ) );
|
||||
doc.add( new Field( "field", "five six five", type ) );
|
||||
doc.add( new Field( "field", "six seven eight nine eight nine eight " +
|
||||
"nine eight nine eight nine eight nine", type ) );
|
||||
doc.add( new Field( "field", "eight nine", type ) );
|
||||
doc.add( new Field( "field", "ten eleven", type ) );
|
||||
doc.add( new Field( "field", "twelve thirteen", type ) );
|
||||
writer.addDocument(doc);
|
||||
|
||||
BaseFragListBuilder fragListBuilder = new SimpleFragListBuilder();
|
||||
BaseFragmentsBuilder fragmentsBuilder = new SimpleFragmentsBuilder();
|
||||
fragmentsBuilder.setDiscreteMultiValueHighlighting(true);
|
||||
FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, fragListBuilder, fragmentsBuilder);
|
||||
IndexReader reader = DirectoryReader.open(writer, true);
|
||||
int docId = 0;
|
||||
|
||||
// Phrase that spans a field value
|
||||
Query q = new PhraseQuery("field", "four", "five");
|
||||
FieldQuery fieldQuery = highlighter.getFieldQuery(q, reader);
|
||||
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
|
||||
assertEquals("two three <b>four</b>", bestFragments[0]);
|
||||
assertEquals("<b>five</b> six five", bestFragments[1]);
|
||||
assertEquals(2, bestFragments.length);
|
||||
|
||||
// Phrase that ends at a field value
|
||||
q = new PhraseQuery("field", "three", "five");
|
||||
fieldQuery = highlighter.getFieldQuery(q, reader);
|
||||
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
|
||||
assertEquals("one two <b>three five</b>", bestFragments[0]);
|
||||
assertEquals(1, bestFragments.length);
|
||||
|
||||
// Phrase that spans across three values
|
||||
q = new PhraseQuery("field", "nine", "ten", "eleven", "twelve");
|
||||
fieldQuery = highlighter.getFieldQuery(q, reader);
|
||||
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
|
||||
assertEquals("eight <b>nine</b>", bestFragments[0]);
|
||||
assertEquals("<b>ten eleven</b>", bestFragments[1]);
|
||||
assertEquals("<b>twelve</b> thirteen", bestFragments[2]);
|
||||
assertEquals(3, bestFragments.length);
|
||||
|
||||
// Term query that appears in multiple values
|
||||
q = new TermQuery(new Term("field", "two"));
|
||||
fieldQuery = highlighter.getFieldQuery(q, reader);
|
||||
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
|
||||
assertEquals("one <b>two</b> three five", bestFragments[0]);
|
||||
assertEquals("<b>two</b> three four", bestFragments[1]);
|
||||
assertEquals(2, bestFragments.length);
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void matchedFieldsTestCase( String fieldValue, String expected, Query... queryClauses ) throws IOException {
|
||||
matchedFieldsTestCase( true, true, fieldValue, expected, queryClauses );
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue