mirror of https://github.com/apache/lucene.git
LUCENE-3969: ValidatingTokenFilter shouldn't create new atts
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311405 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
11a65763d0
commit
3e098abaed
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
|
||||
// nocommit better name...?
|
||||
|
||||
|
@ -41,14 +42,22 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
private final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>();
|
||||
private final Map<Integer,Integer> posToEndOffset = new HashMap<Integer,Integer>();
|
||||
|
||||
// nocommit must be more careful here? check hasAttribute first...?
|
||||
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAtt = getAttrIfExists(PositionIncrementAttribute.class);
|
||||
private final PositionLengthAttribute posLenAtt = getAttrIfExists(PositionLengthAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = getAttrIfExists(OffsetAttribute.class);
|
||||
private final CharTermAttribute termAtt = getAttrIfExists(CharTermAttribute.class);
|
||||
|
||||
private final String name;
|
||||
|
||||
// Returns null if the attr wasn't already added
|
||||
private <A extends Attribute> A getAttrIfExists(Class<A> att) {
|
||||
if (hasAttribute(att)) {
|
||||
return getAttribute(att);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** The name arg is used to identify this stage when
|
||||
* throwing exceptions (useful if you have more than one
|
||||
* instance in your chain). */
|
||||
|
@ -63,6 +72,8 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (posIncAtt != null && offsetAtt != null) {
|
||||
|
||||
pos += posIncAtt.getPositionIncrement();
|
||||
if (pos == -1) {
|
||||
throw new IllegalStateException("first posInc must be > 0");
|
||||
|
@ -71,7 +82,8 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
final int startOffset = offsetAtt.startOffset();
|
||||
final int endOffset = offsetAtt.endOffset();
|
||||
|
||||
final int posLen = posLenAtt.getPositionLength();
|
||||
final int posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
|
||||
|
||||
if (!posToStartOffset.containsKey(pos)) {
|
||||
// First time we've seen a token leaving from this position:
|
||||
posToStartOffset.put(pos, startOffset);
|
||||
|
@ -101,11 +113,20 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: end? (what to validate?)
|
||||
@Override
|
||||
public void end() throws IOException {
|
||||
super.end();
|
||||
|
||||
// TODO: what else to validate
|
||||
|
||||
// nocommit check that endOffset is >= max(endOffset)
|
||||
// we've seen
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
|
|
|
@ -111,7 +111,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
// broken!
|
||||
EdgeNGramTokenizer.class,
|
||||
// broken!
|
||||
EdgeNGramTokenFilter.class
|
||||
EdgeNGramTokenFilter.class,
|
||||
// Not broken: we forcefully add this, so we shouldn't
|
||||
// also randomly pick it:
|
||||
ValidatingTokenFilter.class
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -135,11 +138,6 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (c == ValidatingTokenFilter.class) {
|
||||
// We insert this one ourselves after each stage...
|
||||
continue;
|
||||
}
|
||||
|
||||
for (final Constructor<?> ctor : c.getConstructors()) {
|
||||
// don't test deprecated ctors, they likely have known bugs:
|
||||
if (ctor.isAnnotationPresent(Deprecated.class) || ctor.isSynthetic()) {
|
||||
|
|
Loading…
Reference in New Issue