LUCENE-3969: ValidatingTokenFilter shouldn't create new atts

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311405 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-04-09 20:00:50 +00:00
parent 11a65763d0
commit 3e098abaed
2 changed files with 63 additions and 44 deletions

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.Attribute;
// nocommit better name...?
@ -41,14 +42,22 @@ public final class ValidatingTokenFilter extends TokenFilter {
private final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>();
private final Map<Integer,Integer> posToEndOffset = new HashMap<Integer,Integer>();
// nocommit must be more careful here? check hasAttribute first...?
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAtt = getAttrIfExists(PositionIncrementAttribute.class);
private final PositionLengthAttribute posLenAtt = getAttrIfExists(PositionLengthAttribute.class);
private final OffsetAttribute offsetAtt = getAttrIfExists(OffsetAttribute.class);
private final CharTermAttribute termAtt = getAttrIfExists(CharTermAttribute.class);
private final String name;
// Returns null if the attr wasn't already added
private <A extends Attribute> A getAttrIfExists(Class<A> att) {
if (hasAttribute(att)) {
return getAttribute(att);
} else {
return null;
}
}
/** The name arg is used to identify this stage when
* throwing exceptions (useful if you have more than one
* instance in your chain). */
@ -63,49 +72,61 @@ public final class ValidatingTokenFilter extends TokenFilter {
return false;
}
pos += posIncAtt.getPositionIncrement();
if (pos == -1) {
throw new IllegalStateException("first posInc must be > 0");
}
if (posIncAtt != null && offsetAtt != null) {
final int startOffset = offsetAtt.startOffset();
final int endOffset = offsetAtt.endOffset();
final int posLen = posLenAtt.getPositionLength();
if (!posToStartOffset.containsKey(pos)) {
// First time we've seen a token leaving from this position:
posToStartOffset.put(pos, startOffset);
System.out.println(" + s " + pos + " -> " + startOffset);
} else {
// We've seen a token leaving from this position
// before; verify the startOffset is the same:
System.out.println(" + vs " + pos + " -> " + startOffset);
final int oldStartOffset = posToStartOffset.get(pos);
if (oldStartOffset != startOffset) {
throw new IllegalStateException(name + ": inconsistent startOffset as pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
pos += posIncAtt.getPositionIncrement();
if (pos == -1) {
throw new IllegalStateException("first posInc must be > 0");
}
}
final int endPos = pos + posLen;
final int startOffset = offsetAtt.startOffset();
final int endOffset = offsetAtt.endOffset();
if (!posToEndOffset.containsKey(endPos)) {
// First time we've seen a token arriving to this position:
posToEndOffset.put(endPos, endOffset);
System.out.println(" + e " + endPos + " -> " + endOffset);
} else {
// We've seen a token arriving to this position
// before; verify the endOffset is the same:
System.out.println(" + ve " + endPos + " -> " + endOffset);
final int oldEndOffset = posToEndOffset.get(endPos);
if (oldEndOffset != endOffset) {
throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
final int posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
if (!posToStartOffset.containsKey(pos)) {
// First time we've seen a token leaving from this position:
posToStartOffset.put(pos, startOffset);
System.out.println(" + s " + pos + " -> " + startOffset);
} else {
// We've seen a token leaving from this position
// before; verify the startOffset is the same:
System.out.println(" + vs " + pos + " -> " + startOffset);
final int oldStartOffset = posToStartOffset.get(pos);
if (oldStartOffset != startOffset) {
throw new IllegalStateException(name + ": inconsistent startOffset as pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
}
}
final int endPos = pos + posLen;
if (!posToEndOffset.containsKey(endPos)) {
// First time we've seen a token arriving to this position:
posToEndOffset.put(endPos, endOffset);
System.out.println(" + e " + endPos + " -> " + endOffset);
} else {
// We've seen a token arriving to this position
// before; verify the endOffset is the same:
System.out.println(" + ve " + endPos + " -> " + endOffset);
final int oldEndOffset = posToEndOffset.get(endPos);
if (oldEndOffset != endOffset) {
throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
}
}
}
return true;
}
// TODO: end? (what to validate?)
@Override
public void end() throws IOException {
super.end();
// TODO: what else to validate
// nocommit check that endOffset is >= max(endOffset)
// we've seen
}
@Override
public void reset() throws IOException {

View File

@ -111,7 +111,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
// broken!
EdgeNGramTokenizer.class,
// broken!
EdgeNGramTokenFilter.class
EdgeNGramTokenFilter.class,
// Not broken: we forcefully add this, so we shouldn't
// also randomly pick it:
ValidatingTokenFilter.class
);
}
@ -135,11 +138,6 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
continue;
}
if (c == ValidatingTokenFilter.class) {
// We insert this one ourselves after each stage...
continue;
}
for (final Constructor<?> ctor : c.getConstructors()) {
// don't test deprecated ctors, they likely have known bugs:
if (ctor.isAnnotationPresent(Deprecated.class) || ctor.isSynthetic()) {