LUCENE-3969: ValidatingTokenFilter shouldn't create new atts

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311405 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-04-09 20:00:50 +00:00
parent 11a65763d0
commit 3e098abaed
2 changed files with 63 additions and 44 deletions

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.Attribute;
// nocommit better name...? // nocommit better name...?
@ -41,14 +42,22 @@ public final class ValidatingTokenFilter extends TokenFilter {
private final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>(); private final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>();
private final Map<Integer,Integer> posToEndOffset = new HashMap<Integer,Integer>(); private final Map<Integer,Integer> posToEndOffset = new HashMap<Integer,Integer>();
// nocommit must be more careful here? check hasAttribute first...? private final PositionIncrementAttribute posIncAtt = getAttrIfExists(PositionIncrementAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); private final PositionLengthAttribute posLenAtt = getAttrIfExists(PositionLengthAttribute.class);
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); private final OffsetAttribute offsetAtt = getAttrIfExists(OffsetAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final CharTermAttribute termAtt = getAttrIfExists(CharTermAttribute.class);
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final String name; private final String name;
// Returns null if the attr wasn't already added
private <A extends Attribute> A getAttrIfExists(Class<A> att) {
if (hasAttribute(att)) {
return getAttribute(att);
} else {
return null;
}
}
/** The name arg is used to identify this stage when /** The name arg is used to identify this stage when
* throwing exceptions (useful if you have more than one * throwing exceptions (useful if you have more than one
* instance in your chain). */ * instance in your chain). */
@ -63,6 +72,8 @@ public final class ValidatingTokenFilter extends TokenFilter {
return false; return false;
} }
if (posIncAtt != null && offsetAtt != null) {
pos += posIncAtt.getPositionIncrement(); pos += posIncAtt.getPositionIncrement();
if (pos == -1) { if (pos == -1) {
throw new IllegalStateException("first posInc must be > 0"); throw new IllegalStateException("first posInc must be > 0");
@ -71,7 +82,8 @@ public final class ValidatingTokenFilter extends TokenFilter {
final int startOffset = offsetAtt.startOffset(); final int startOffset = offsetAtt.startOffset();
final int endOffset = offsetAtt.endOffset(); final int endOffset = offsetAtt.endOffset();
final int posLen = posLenAtt.getPositionLength(); final int posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
if (!posToStartOffset.containsKey(pos)) { if (!posToStartOffset.containsKey(pos)) {
// First time we've seen a token leaving from this position: // First time we've seen a token leaving from this position:
posToStartOffset.put(pos, startOffset); posToStartOffset.put(pos, startOffset);
@ -101,11 +113,20 @@ public final class ValidatingTokenFilter extends TokenFilter {
throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt); throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
} }
} }
}
return true; return true;
} }
// TODO: end? (what to validate?) @Override
public void end() throws IOException {
super.end();
// TODO: what else to validate
// nocommit check that endOffset is >= max(endOffset)
// we've seen
}
@Override @Override
public void reset() throws IOException { public void reset() throws IOException {

View File

@ -111,7 +111,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
// broken! // broken!
EdgeNGramTokenizer.class, EdgeNGramTokenizer.class,
// broken! // broken!
EdgeNGramTokenFilter.class EdgeNGramTokenFilter.class,
// Not broken: we forcefully add this, so we shouldn't
// also randomly pick it:
ValidatingTokenFilter.class
); );
} }
@ -135,11 +138,6 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
continue; continue;
} }
if (c == ValidatingTokenFilter.class) {
// We insert this one ourselves after each stage...
continue;
}
for (final Constructor<?> ctor : c.getConstructors()) { for (final Constructor<?> ctor : c.getConstructors()) {
// don't test deprecated ctors, they likely have known bugs: // don't test deprecated ctors, they likely have known bugs:
if (ctor.isAnnotationPresent(Deprecated.class) || ctor.isSynthetic()) { if (ctor.isAnnotationPresent(Deprecated.class) || ctor.isSynthetic()) {