mirror of https://github.com/apache/lucene.git
LUCENE-7619: don't let offsets go backwards
This commit is contained in:
parent
39eec660ca
commit
0bdcfc291f
|
@ -195,6 +195,7 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
|
||||||
private int savedStartOffset;
|
private int savedStartOffset;
|
||||||
private int savedEndOffset;
|
private int savedEndOffset;
|
||||||
private AttributeSource.State savedState;
|
private AttributeSource.State savedState;
|
||||||
|
private int lastStartOffset;
|
||||||
|
|
||||||
// if length by start + end offsets doesn't match the term text then assume
|
// if length by start + end offsets doesn't match the term text then assume
|
||||||
// this is a synonym and don't adjust the offsets.
|
// this is a synonym and don't adjust the offsets.
|
||||||
|
@ -373,12 +374,24 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
|
||||||
int endPart = bufferedParts[4*bufferedPos+3];
|
int endPart = bufferedParts[4*bufferedPos+3];
|
||||||
bufferedPos++;
|
bufferedPos++;
|
||||||
|
|
||||||
|
int startOffset;
|
||||||
|
int endOffset;
|
||||||
|
|
||||||
if (hasIllegalOffsets) {
|
if (hasIllegalOffsets) {
|
||||||
offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
|
startOffset = savedStartOffset;
|
||||||
|
endOffset = savedEndOffset;
|
||||||
} else {
|
} else {
|
||||||
offsetAttribute.setOffset(savedStartOffset + startPart, savedStartOffset + endPart);
|
startOffset = savedStartOffset + startPart;
|
||||||
|
endOffset = savedStartOffset + endPart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// never let offsets go backwards:
|
||||||
|
startOffset = Math.max(startOffset, lastStartOffset);
|
||||||
|
endOffset = Math.max(endOffset, lastStartOffset);
|
||||||
|
|
||||||
|
offsetAttribute.setOffset(startOffset, endOffset);
|
||||||
|
lastStartOffset = startOffset;
|
||||||
|
|
||||||
if (termPart == null) {
|
if (termPart == null) {
|
||||||
termAttribute.copyBuffer(savedTermBuffer, startPart, endPart - startPart);
|
termAttribute.copyBuffer(savedTermBuffer, startPart, endPart - startPart);
|
||||||
} else {
|
} else {
|
||||||
|
@ -402,6 +415,7 @@ public final class WordDelimiterGraphFilter extends TokenFilter {
|
||||||
super.reset();
|
super.reset();
|
||||||
accumPosInc = 0;
|
accumPosInc = 0;
|
||||||
savedState = null;
|
savedState = null;
|
||||||
|
lastStartOffset = 0;
|
||||||
concat.clear();
|
concat.clear();
|
||||||
concatAll.clear();
|
concatAll.clear();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue