mirror of https://github.com/apache/lucene.git
LUCENE-3849: end() now sets position increment, so any trailing holes are counted
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1515887 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fbbdba2614
commit
2a5421ca68
|
@ -126,6 +126,11 @@ Bug Fixes
|
||||||
the default one) have their own limits (David Smiley, Robert Muir,
|
the default one) have their own limits (David Smiley, Robert Muir,
|
||||||
Mike McCandless)
|
Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-3849: TokenStreams now set the position increment in end(),
|
||||||
|
so we can handle trailing holes. If you have a custom TokenStream
|
||||||
|
implementing end() then be sure it calls super.end(). (Robert Muir,
|
||||||
|
Mike McCandless)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
||||||
|
|
|
@ -80,7 +80,8 @@ public final class KeywordTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end() {
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
}
|
}
|
||||||
|
|
|
@ -217,13 +217,15 @@ public class NGramTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end() {
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
assert bufferStart <= bufferEnd;
|
assert bufferStart <= bufferEnd;
|
||||||
int endOffset = offset;
|
int endOffset = offset;
|
||||||
for (int i = bufferStart; i < bufferEnd; ++i) {
|
for (int i = bufferStart; i < bufferEnd; ++i) {
|
||||||
endOffset += Character.charCount(buffer[i]);
|
endOffset += Character.charCount(buffer[i]);
|
||||||
}
|
}
|
||||||
endOffset = correctOffset(endOffset);
|
endOffset = correctOffset(endOffset);
|
||||||
|
// set final offset
|
||||||
offsetAtt.setOffset(endOffset, endOffset);
|
offsetAtt.setOffset(endOffset, endOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -191,7 +191,8 @@ public class PathHierarchyTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end() {
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
int finalOffset = correctOffset(charsRead);
|
int finalOffset = correctOffset(charsRead);
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
|
|
@ -176,7 +176,8 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end() {
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
}
|
}
|
||||||
|
|
|
@ -130,7 +130,8 @@ public final class PatternTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() {
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
final int ofs = correctOffset(str.length());
|
final int ofs = correctOffset(str.length());
|
||||||
offsetAtt.setOffset(ofs, ofs);
|
offsetAtt.setOffset(ofs, ofs);
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,6 +76,8 @@ public final class ClassicTokenizer extends Tokenizer {
|
||||||
"<CJ>",
|
"<CJ>",
|
||||||
"<ACRONYM_DEP>"
|
"<ACRONYM_DEP>"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
private int skippedPositions;
|
||||||
|
|
||||||
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
|
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
|
||||||
|
|
||||||
|
@ -130,7 +132,7 @@ public final class ClassicTokenizer extends Tokenizer {
|
||||||
@Override
|
@Override
|
||||||
public final boolean incrementToken() throws IOException {
|
public final boolean incrementToken() throws IOException {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
int posIncr = 1;
|
skippedPositions = 0;
|
||||||
|
|
||||||
while(true) {
|
while(true) {
|
||||||
int tokenType = scanner.getNextToken();
|
int tokenType = scanner.getNextToken();
|
||||||
|
@ -140,7 +142,7 @@ public final class ClassicTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scanner.yylength() <= maxTokenLength) {
|
if (scanner.yylength() <= maxTokenLength) {
|
||||||
posIncrAtt.setPositionIncrement(posIncr);
|
posIncrAtt.setPositionIncrement(skippedPositions+1);
|
||||||
scanner.getText(termAtt);
|
scanner.getText(termAtt);
|
||||||
final int start = scanner.yychar();
|
final int start = scanner.yychar();
|
||||||
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
|
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
|
||||||
|
@ -155,19 +157,23 @@ public final class ClassicTokenizer extends Tokenizer {
|
||||||
} else
|
} else
|
||||||
// When we skip a too-long term, we still increment the
|
// When we skip a too-long term, we still increment the
|
||||||
// position increment
|
// position increment
|
||||||
posIncr++;
|
skippedPositions++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end() {
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
|
int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
// adjust any skipped tokens
|
||||||
|
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
scanner.yyreset(input);
|
scanner.yyreset(input);
|
||||||
|
skippedPositions = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -90,6 +90,8 @@ public final class StandardTokenizer extends Tokenizer {
|
||||||
"<KATAKANA>",
|
"<KATAKANA>",
|
||||||
"<HANGUL>"
|
"<HANGUL>"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
private int skippedPositions;
|
||||||
|
|
||||||
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
|
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
|
||||||
|
|
||||||
|
@ -144,7 +146,7 @@ public final class StandardTokenizer extends Tokenizer {
|
||||||
@Override
|
@Override
|
||||||
public final boolean incrementToken() throws IOException {
|
public final boolean incrementToken() throws IOException {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
int posIncr = 1;
|
skippedPositions = 0;
|
||||||
|
|
||||||
while(true) {
|
while(true) {
|
||||||
int tokenType = scanner.getNextToken();
|
int tokenType = scanner.getNextToken();
|
||||||
|
@ -154,7 +156,7 @@ public final class StandardTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scanner.yylength() <= maxTokenLength) {
|
if (scanner.yylength() <= maxTokenLength) {
|
||||||
posIncrAtt.setPositionIncrement(posIncr);
|
posIncrAtt.setPositionIncrement(skippedPositions+1);
|
||||||
scanner.getText(termAtt);
|
scanner.getText(termAtt);
|
||||||
final int start = scanner.yychar();
|
final int start = scanner.yychar();
|
||||||
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
|
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
|
||||||
|
@ -163,19 +165,23 @@ public final class StandardTokenizer extends Tokenizer {
|
||||||
} else
|
} else
|
||||||
// When we skip a too-long term, we still increment the
|
// When we skip a too-long term, we still increment the
|
||||||
// position increment
|
// position increment
|
||||||
posIncr++;
|
skippedPositions++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end() {
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
|
int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
// adjust any skipped tokens
|
||||||
|
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
scanner.yyreset(input);
|
scanner.yyreset(input);
|
||||||
|
skippedPositions = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,6 +76,8 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
"<URL>",
|
"<URL>",
|
||||||
"<EMAIL>",
|
"<EMAIL>",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
private int skippedPositions;
|
||||||
|
|
||||||
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
|
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
|
||||||
|
|
||||||
|
@ -123,7 +125,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
@Override
|
@Override
|
||||||
public final boolean incrementToken() throws IOException {
|
public final boolean incrementToken() throws IOException {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
int posIncr = 1;
|
skippedPositions = 0;
|
||||||
|
|
||||||
while(true) {
|
while(true) {
|
||||||
int tokenType = scanner.getNextToken();
|
int tokenType = scanner.getNextToken();
|
||||||
|
@ -133,7 +135,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scanner.yylength() <= maxTokenLength) {
|
if (scanner.yylength() <= maxTokenLength) {
|
||||||
posIncrAtt.setPositionIncrement(posIncr);
|
posIncrAtt.setPositionIncrement(skippedPositions+1);
|
||||||
scanner.getText(termAtt);
|
scanner.getText(termAtt);
|
||||||
final int start = scanner.yychar();
|
final int start = scanner.yychar();
|
||||||
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
|
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
|
||||||
|
@ -142,19 +144,23 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
} else
|
} else
|
||||||
// When we skip a too-long term, we still increment the
|
// When we skip a too-long term, we still increment the
|
||||||
// position increment
|
// position increment
|
||||||
posIncr++;
|
skippedPositions++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end() {
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
|
int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
// adjust any skipped tokens
|
||||||
|
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
scanner.yyreset(input);
|
scanner.yyreset(input);
|
||||||
|
skippedPositions = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -142,7 +142,8 @@ public abstract class CharTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end() {
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,7 @@ public abstract class FilteringTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
protected final Version version;
|
protected final Version version;
|
||||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
private int skippedPositions;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new {@link FilteringTokenFilter}.
|
* Create a new {@link FilteringTokenFilter}.
|
||||||
|
@ -50,7 +51,7 @@ public abstract class FilteringTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final boolean incrementToken() throws IOException {
|
public final boolean incrementToken() throws IOException {
|
||||||
int skippedPositions = 0;
|
skippedPositions = 0;
|
||||||
while (input.incrementToken()) {
|
while (input.incrementToken()) {
|
||||||
if (accept()) {
|
if (accept()) {
|
||||||
if (skippedPositions != 0) {
|
if (skippedPositions != 0) {
|
||||||
|
@ -68,6 +69,12 @@ public abstract class FilteringTokenFilter extends TokenFilter {
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
super.reset();
|
super.reset();
|
||||||
|
skippedPositions = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
|
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -309,7 +309,8 @@ public final class WikipediaTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() {
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
|
final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
|
||||||
this.offsetAtt.setOffset(finalOffset, finalOffset);
|
this.offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
|
|
@ -90,6 +90,22 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
||||||
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
|
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
|
||||||
doTestStopPositons(stpf01);
|
doTestStopPositons(stpf01);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-3849: make sure after .end() we see the "ending" posInc
|
||||||
|
public void testEndStopword() throws Exception {
|
||||||
|
CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
|
||||||
|
StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet);
|
||||||
|
assertTokenStreamContents(stpf, new String[] { "test" },
|
||||||
|
new int[] {0},
|
||||||
|
new int[] {4},
|
||||||
|
null,
|
||||||
|
new int[] {1},
|
||||||
|
null,
|
||||||
|
7,
|
||||||
|
1,
|
||||||
|
null,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
private void doTestStopPositons(StopFilter stpf) throws IOException {
|
private void doTestStopPositons(StopFilter stpf) throws IOException {
|
||||||
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
|
||||||
|
|
|
@ -120,7 +120,8 @@ public final class ICUTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() {
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
final int finalOffset = (length < 0) ? offset : offset + length;
|
final int finalOffset = (length < 0) ? offset : offset + length;
|
||||||
offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
|
offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
|
||||||
}
|
}
|
||||||
|
|
|
@ -280,7 +280,8 @@ public final class JapaneseTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() {
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// Set final offset
|
// Set final offset
|
||||||
int finalOffset = correctOffset(pos);
|
int finalOffset = correctOffset(pos);
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
|
|
@ -112,7 +112,8 @@ public final class SentenceTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() {
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
final int finalOffset = correctOffset(tokenEnd);
|
final int finalOffset = correctOffset(tokenEnd);
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
|
|
@ -91,9 +91,4 @@ public abstract class BaseUIMATokenizer extends Tokenizer {
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
iterator = null;
|
iterator = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void end() throws IOException {
|
|
||||||
iterator = null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,7 +86,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() throws IOException {
|
public void end() throws IOException {
|
||||||
offsetAttr.setOffset(finalOffset, finalOffset);
|
|
||||||
super.end();
|
super.end();
|
||||||
|
offsetAttr.setOffset(finalOffset, finalOffset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,8 +107,8 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() throws IOException {
|
public void end() throws IOException {
|
||||||
offsetAttr.setOffset(finalOffset, finalOffset);
|
|
||||||
super.end();
|
super.end();
|
||||||
|
offsetAttr.setOffset(finalOffset, finalOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.lang.reflect.Modifier;
|
import java.lang.reflect.Modifier;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
@ -159,11 +160,18 @@ public abstract class TokenStream extends AttributeSource implements Closeable {
|
||||||
* setting the final offset of a stream. The final offset of a stream might
|
* setting the final offset of a stream. The final offset of a stream might
|
||||||
* differ from the offset of the last token eg in case one or more whitespaces
|
* differ from the offset of the last token eg in case one or more whitespaces
|
||||||
* followed after the last token, but a WhitespaceTokenizer was used.
|
* followed after the last token, but a WhitespaceTokenizer was used.
|
||||||
|
* <p>
|
||||||
|
* Additionally any skipped positions (such as those removed by a stopfilter)
|
||||||
|
* can be applied to the position increment, or any adjustment of other
|
||||||
|
* attributes where the end-of-stream value may be important.
|
||||||
*
|
*
|
||||||
* @throws IOException If an I/O error occurs
|
* @throws IOException If an I/O error occurs
|
||||||
*/
|
*/
|
||||||
public void end() throws IOException {
|
public void end() throws IOException {
|
||||||
// do nothing by default
|
clearAttributes(); // LUCENE-3849: don't consume dirty atts
|
||||||
|
if (hasAttribute(PositionIncrementAttribute.class)) {
|
||||||
|
getAttribute(PositionIncrementAttribute.class).setPositionIncrement(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -175,7 +175,9 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
||||||
}
|
}
|
||||||
// trigger streams to perform end-of-stream operations
|
// trigger streams to perform end-of-stream operations
|
||||||
stream.end();
|
stream.end();
|
||||||
|
// TODO: maybe add some safety? then again, its already checked
|
||||||
|
// when we come back around to the field...
|
||||||
|
fieldState.position += posIncrAttribute.getPositionIncrement();
|
||||||
fieldState.offset += offsetAttribute.endOffset();
|
fieldState.offset += offsetAttribute.endOffset();
|
||||||
success2 = true;
|
success2 = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -31,16 +31,19 @@ import org.apache.lucene.analysis.CannedBinaryTokenStream; // javadocs
|
||||||
*/
|
*/
|
||||||
public final class BinaryTokenStream extends TokenStream {
|
public final class BinaryTokenStream extends TokenStream {
|
||||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||||
|
private final BytesRef bytes;
|
||||||
private boolean available = true;
|
private boolean available = true;
|
||||||
|
|
||||||
public BinaryTokenStream(BytesRef bytes) {
|
public BinaryTokenStream(BytesRef bytes) {
|
||||||
bytesAtt.setBytesRef(bytes);
|
this.bytes = bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() {
|
public boolean incrementToken() {
|
||||||
if (available) {
|
if (available) {
|
||||||
|
clearAttributes();
|
||||||
available = false;
|
available = false;
|
||||||
|
bytesAtt.setBytesRef(bytes);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -50,6 +50,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.FieldCache;
|
import org.apache.lucene.search.FieldCache;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
|
@ -72,6 +73,9 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.SetOnce;
|
import org.apache.lucene.util.SetOnce;
|
||||||
import org.apache.lucene.util.ThreadInterruptedException;
|
import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
|
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||||
|
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -1899,6 +1903,65 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-3849
|
||||||
|
public void testStopwordsPosIncHole() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
Analyzer a = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
|
Tokenizer tokenizer = new MockTokenizer(reader);
|
||||||
|
TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
|
||||||
|
return new TokenStreamComponents(tokenizer, stream);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new TextField("body", "just a", Field.Store.NO));
|
||||||
|
doc.add(new TextField("body", "test of gaps", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
IndexReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
IndexSearcher is = newSearcher(ir);
|
||||||
|
PhraseQuery pq = new PhraseQuery();
|
||||||
|
pq.add(new Term("body", "just"), 0);
|
||||||
|
pq.add(new Term("body", "test"), 2);
|
||||||
|
// body:"just ? test"
|
||||||
|
assertEquals(1, is.search(pq, 5).totalHits);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// LUCENE-3849
|
||||||
|
public void testStopwordsPosIncHole2() throws Exception {
|
||||||
|
// use two stopfilters for testing here
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
final Automaton secondSet = BasicAutomata.makeString("foobar");
|
||||||
|
Analyzer a = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
|
Tokenizer tokenizer = new MockTokenizer(reader);
|
||||||
|
TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
|
||||||
|
stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet));
|
||||||
|
return new TokenStreamComponents(tokenizer, stream);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new TextField("body", "just a foobar", Field.Store.NO));
|
||||||
|
doc.add(new TextField("body", "test of gaps", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
IndexReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
IndexSearcher is = newSearcher(ir);
|
||||||
|
PhraseQuery pq = new PhraseQuery();
|
||||||
|
pq.add(new Term("body", "just"), 0);
|
||||||
|
pq.add(new Term("body", "test"), 3);
|
||||||
|
// body:"just ? ? test"
|
||||||
|
assertEquals(1, is.search(pq, 5).totalHits);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
// here we do better, there is no current segments file, so we don't delete anything.
|
// here we do better, there is no current segments file, so we don't delete anything.
|
||||||
// however, if you actually go and make a commit, the next time you run indexwriter
|
// however, if you actually go and make a commit, the next time you run indexwriter
|
||||||
// this file will be gone.
|
// this file will be gone.
|
||||||
|
|
|
@ -555,12 +555,16 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
private CharTermAttribute termAtt;
|
private CharTermAttribute termAtt;
|
||||||
private PositionIncrementAttribute posIncrAtt;
|
private PositionIncrementAttribute posIncrAtt;
|
||||||
private boolean returned;
|
private boolean returned;
|
||||||
|
private int val;
|
||||||
|
private final String word;
|
||||||
|
|
||||||
public SinglePositionTokenStream(String word) {
|
public SinglePositionTokenStream(String word) {
|
||||||
termAtt = addAttribute(CharTermAttribute.class);
|
termAtt = addAttribute(CharTermAttribute.class);
|
||||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
termAtt.setEmpty().append(word);
|
this.word = word;
|
||||||
returned = true;
|
returned = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the value we want to keep, as the position increment.
|
* Set the value we want to keep, as the position increment.
|
||||||
* Note that when TermPositions.nextPosition() is later used to
|
* Note that when TermPositions.nextPosition() is later used to
|
||||||
|
@ -574,15 +578,21 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
* This change is described in Lucene's JIRA: LUCENE-1542.
|
* This change is described in Lucene's JIRA: LUCENE-1542.
|
||||||
*/
|
*/
|
||||||
public void set(int val) {
|
public void set(int val) {
|
||||||
posIncrAtt.setPositionIncrement(val);
|
this.val = val;
|
||||||
returned = false;
|
returned = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (returned) {
|
if (returned) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return returned = true;
|
clearAttributes();
|
||||||
|
posIncrAtt.setPositionIncrement(val);
|
||||||
|
termAtt.setEmpty();
|
||||||
|
termAtt.append(word);
|
||||||
|
returned = true;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -264,7 +264,8 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end(){
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
offsetAtt.setOffset(getFinalOffset(),getFinalOffset());
|
offsetAtt.setOffset(getFinalOffset(),getFinalOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -114,7 +114,6 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
||||||
|
|
||||||
public PositionsTokenStream() {
|
public PositionsTokenStream() {
|
||||||
term = addAttribute(CharTermAttribute.class);
|
term = addAttribute(CharTermAttribute.class);
|
||||||
term.append(DOC_POSITIONS_TERM);
|
|
||||||
payload = addAttribute(PayloadAttribute.class);
|
payload = addAttribute(PayloadAttribute.class);
|
||||||
offset = addAttribute(OffsetAttribute.class);
|
offset = addAttribute(OffsetAttribute.class);
|
||||||
}
|
}
|
||||||
|
@ -125,6 +124,8 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
clearAttributes();
|
||||||
|
term.append(DOC_POSITIONS_TERM);
|
||||||
payload.setPayload(new BytesRef(Integer.toString(pos)));
|
payload.setPayload(new BytesRef(Integer.toString(pos)));
|
||||||
offset.setOffset(off, off);
|
offset.setOffset(off, off);
|
||||||
--pos;
|
--pos;
|
||||||
|
|
|
@ -50,7 +50,6 @@ public final class SuggestStopFilter extends TokenFilter {
|
||||||
private final CharArraySet stopWords;
|
private final CharArraySet stopWords;
|
||||||
|
|
||||||
private State endState;
|
private State endState;
|
||||||
private boolean ended;
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public SuggestStopFilter(TokenStream input, CharArraySet stopWords) {
|
public SuggestStopFilter(TokenStream input, CharArraySet stopWords) {
|
||||||
|
@ -61,28 +60,24 @@ public final class SuggestStopFilter extends TokenFilter {
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
super.reset();
|
super.reset();
|
||||||
ended = false;
|
|
||||||
endState = null;
|
endState = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() throws IOException {
|
public void end() throws IOException {
|
||||||
if (!ended) {
|
if (endState == null) {
|
||||||
super.end();
|
super.end();
|
||||||
} else {
|
} else {
|
||||||
// NOTE: we already called .end() from our .next() when
|
// NOTE: we already called .end() from our .next() when
|
||||||
// the stream was complete, so we do not call
|
// the stream was complete, so we do not call
|
||||||
// super.end() here
|
// super.end() here
|
||||||
|
restoreState(endState);
|
||||||
if (endState != null) {
|
|
||||||
restoreState(endState);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (ended) {
|
if (endState != null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,8 +96,9 @@ public final class SuggestStopFilter extends TokenFilter {
|
||||||
// It was a stopword; skip it
|
// It was a stopword; skip it
|
||||||
skippedPositions += posInc;
|
skippedPositions += posInc;
|
||||||
} else {
|
} else {
|
||||||
|
clearAttributes();
|
||||||
input.end();
|
input.end();
|
||||||
ended = true;
|
endState = captureState();
|
||||||
int finalEndOffset = offsetAtt.endOffset();
|
int finalEndOffset = offsetAtt.endOffset();
|
||||||
assert finalEndOffset >= endOffset;
|
assert finalEndOffset >= endOffset;
|
||||||
if (finalEndOffset > endOffset) {
|
if (finalEndOffset > endOffset) {
|
||||||
|
@ -112,7 +108,6 @@ public final class SuggestStopFilter extends TokenFilter {
|
||||||
} else {
|
} else {
|
||||||
// No token separator after final token that
|
// No token separator after final token that
|
||||||
// looked like a stop-word; don't filter it:
|
// looked like a stop-word; don't filter it:
|
||||||
endState = captureState();
|
|
||||||
restoreState(sav);
|
restoreState(sav);
|
||||||
posIncAtt.setPositionIncrement(skippedPositions + posIncAtt.getPositionIncrement());
|
posIncAtt.setPositionIncrement(skippedPositions + posIncAtt.getPositionIncrement());
|
||||||
keywordAtt.setKeyword(true);
|
keywordAtt.setKeyword(true);
|
||||||
|
|
|
@ -112,7 +112,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
||||||
// - offsets only move forwards (startOffset >=
|
// - offsets only move forwards (startOffset >=
|
||||||
// lastStartOffset)
|
// lastStartOffset)
|
||||||
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
|
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
|
||||||
int posLengths[], Integer finalOffset, boolean[] keywordAtts,
|
int posLengths[], Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts,
|
||||||
boolean offsetsAreCorrect) throws IOException {
|
boolean offsetsAreCorrect) throws IOException {
|
||||||
assertNotNull(output);
|
assertNotNull(output);
|
||||||
CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
|
CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
|
||||||
|
@ -136,7 +136,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
PositionIncrementAttribute posIncrAtt = null;
|
PositionIncrementAttribute posIncrAtt = null;
|
||||||
if (posIncrements != null) {
|
if (posIncrements != null || finalPosInc != null) {
|
||||||
assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
|
assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
|
||||||
posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
|
||||||
}
|
}
|
||||||
|
@ -255,19 +255,43 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
||||||
assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
|
assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ts.incrementToken()) {
|
if (ts.incrementToken()) {
|
||||||
fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token=" + termAtt.toString());
|
fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token=" + termAtt.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// repeat our extra safety checks for end()
|
||||||
|
ts.clearAttributes();
|
||||||
|
if (termAtt != null) termAtt.setEmpty().append("bogusTerm");
|
||||||
|
if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
|
||||||
|
if (typeAtt != null) typeAtt.setType("bogusType");
|
||||||
|
if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
|
||||||
|
if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
|
||||||
|
|
||||||
|
checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
|
||||||
|
|
||||||
ts.end();
|
ts.end();
|
||||||
|
assertTrue("super.end()/clearAttributes() was not called correctly in end()", checkClearAtt.getAndResetClearCalled());
|
||||||
|
|
||||||
if (finalOffset != null) {
|
if (finalOffset != null) {
|
||||||
assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset());
|
assertEquals("finalOffset", finalOffset.intValue(), offsetAtt.endOffset());
|
||||||
}
|
}
|
||||||
if (offsetAtt != null) {
|
if (offsetAtt != null) {
|
||||||
assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0);
|
assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0);
|
||||||
}
|
}
|
||||||
|
if (finalPosInc != null) {
|
||||||
|
assertEquals("finalPosInc", finalPosInc.intValue(), posIncrAtt.getPositionIncrement());
|
||||||
|
}
|
||||||
|
|
||||||
ts.close();
|
ts.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
|
||||||
|
int posLengths[], Integer finalOffset, boolean[] keywordAtts,
|
||||||
|
boolean offsetsAreCorrect) throws IOException {
|
||||||
|
assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect);
|
||||||
|
}
|
||||||
|
|
||||||
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset, boolean offsetsAreCorrect) throws IOException {
|
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset, boolean offsetsAreCorrect) throws IOException {
|
||||||
assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, offsetsAreCorrect);
|
assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, offsetsAreCorrect);
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,7 +58,8 @@ public final class MockTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
private int skippedPositions;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new MockTokenFilter.
|
* Create a new MockTokenFilter.
|
||||||
*
|
*
|
||||||
|
@ -76,7 +77,7 @@ public final class MockTokenFilter extends TokenFilter {
|
||||||
// initial token with posInc=0 ever
|
// initial token with posInc=0 ever
|
||||||
|
|
||||||
// return the first non-stop word found
|
// return the first non-stop word found
|
||||||
int skippedPositions = 0;
|
skippedPositions = 0;
|
||||||
while (input.incrementToken()) {
|
while (input.incrementToken()) {
|
||||||
if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
|
if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
|
||||||
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||||
|
@ -87,4 +88,16 @@ public final class MockTokenFilter extends TokenFilter {
|
||||||
// reached EOS -- return false
|
// reached EOS -- return false
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
|
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
skippedPositions = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -244,6 +244,7 @@ public class MockTokenizer extends Tokenizer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() throws IOException {
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
int finalOffset = correctOffset(off);
|
int finalOffset = correctOffset(off);
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
// some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.
|
// some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.
|
||||||
|
|
Loading…
Reference in New Issue