mirror of https://github.com/apache/lucene.git
LUCENE-3969: ValidatingTokenFilter shouldn't create new atts
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311405 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
11a65763d0
commit
3e098abaed
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||||
|
import org.apache.lucene.util.Attribute;
|
||||||
|
|
||||||
// nocommit better name...?
|
// nocommit better name...?
|
||||||
|
|
||||||
|
@ -41,14 +42,22 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
||||||
private final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>();
|
private final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>();
|
||||||
private final Map<Integer,Integer> posToEndOffset = new HashMap<Integer,Integer>();
|
private final Map<Integer,Integer> posToEndOffset = new HashMap<Integer,Integer>();
|
||||||
|
|
||||||
// nocommit must be more careful here? check hasAttribute first...?
|
private final PositionIncrementAttribute posIncAtt = getAttrIfExists(PositionIncrementAttribute.class);
|
||||||
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
private final PositionLengthAttribute posLenAtt = getAttrIfExists(PositionLengthAttribute.class);
|
||||||
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
|
private final OffsetAttribute offsetAtt = getAttrIfExists(OffsetAttribute.class);
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
private final CharTermAttribute termAtt = getAttrIfExists(CharTermAttribute.class);
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
|
||||||
|
|
||||||
private final String name;
|
private final String name;
|
||||||
|
|
||||||
|
// Returns null if the attr wasn't already added
|
||||||
|
private <A extends Attribute> A getAttrIfExists(Class<A> att) {
|
||||||
|
if (hasAttribute(att)) {
|
||||||
|
return getAttribute(att);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** The name arg is used to identify this stage when
|
/** The name arg is used to identify this stage when
|
||||||
* throwing exceptions (useful if you have more than one
|
* throwing exceptions (useful if you have more than one
|
||||||
* instance in your chain). */
|
* instance in your chain). */
|
||||||
|
@ -63,49 +72,61 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
pos += posIncAtt.getPositionIncrement();
|
if (posIncAtt != null && offsetAtt != null) {
|
||||||
if (pos == -1) {
|
|
||||||
throw new IllegalStateException("first posInc must be > 0");
|
|
||||||
}
|
|
||||||
|
|
||||||
final int startOffset = offsetAtt.startOffset();
|
pos += posIncAtt.getPositionIncrement();
|
||||||
final int endOffset = offsetAtt.endOffset();
|
if (pos == -1) {
|
||||||
|
throw new IllegalStateException("first posInc must be > 0");
|
||||||
final int posLen = posLenAtt.getPositionLength();
|
|
||||||
if (!posToStartOffset.containsKey(pos)) {
|
|
||||||
// First time we've seen a token leaving from this position:
|
|
||||||
posToStartOffset.put(pos, startOffset);
|
|
||||||
System.out.println(" + s " + pos + " -> " + startOffset);
|
|
||||||
} else {
|
|
||||||
// We've seen a token leaving from this position
|
|
||||||
// before; verify the startOffset is the same:
|
|
||||||
System.out.println(" + vs " + pos + " -> " + startOffset);
|
|
||||||
final int oldStartOffset = posToStartOffset.get(pos);
|
|
||||||
if (oldStartOffset != startOffset) {
|
|
||||||
throw new IllegalStateException(name + ": inconsistent startOffset as pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
final int endPos = pos + posLen;
|
final int startOffset = offsetAtt.startOffset();
|
||||||
|
final int endOffset = offsetAtt.endOffset();
|
||||||
|
|
||||||
if (!posToEndOffset.containsKey(endPos)) {
|
final int posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
|
||||||
// First time we've seen a token arriving to this position:
|
|
||||||
posToEndOffset.put(endPos, endOffset);
|
if (!posToStartOffset.containsKey(pos)) {
|
||||||
System.out.println(" + e " + endPos + " -> " + endOffset);
|
// First time we've seen a token leaving from this position:
|
||||||
} else {
|
posToStartOffset.put(pos, startOffset);
|
||||||
// We've seen a token arriving to this position
|
System.out.println(" + s " + pos + " -> " + startOffset);
|
||||||
// before; verify the endOffset is the same:
|
} else {
|
||||||
System.out.println(" + ve " + endPos + " -> " + endOffset);
|
// We've seen a token leaving from this position
|
||||||
final int oldEndOffset = posToEndOffset.get(endPos);
|
// before; verify the startOffset is the same:
|
||||||
if (oldEndOffset != endOffset) {
|
System.out.println(" + vs " + pos + " -> " + startOffset);
|
||||||
throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
|
final int oldStartOffset = posToStartOffset.get(pos);
|
||||||
|
if (oldStartOffset != startOffset) {
|
||||||
|
throw new IllegalStateException(name + ": inconsistent startOffset as pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final int endPos = pos + posLen;
|
||||||
|
|
||||||
|
if (!posToEndOffset.containsKey(endPos)) {
|
||||||
|
// First time we've seen a token arriving to this position:
|
||||||
|
posToEndOffset.put(endPos, endOffset);
|
||||||
|
System.out.println(" + e " + endPos + " -> " + endOffset);
|
||||||
|
} else {
|
||||||
|
// We've seen a token arriving to this position
|
||||||
|
// before; verify the endOffset is the same:
|
||||||
|
System.out.println(" + ve " + endPos + " -> " + endOffset);
|
||||||
|
final int oldEndOffset = posToEndOffset.get(endPos);
|
||||||
|
if (oldEndOffset != endOffset) {
|
||||||
|
throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: end? (what to validate?)
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
|
|
||||||
|
// TODO: what else to validate
|
||||||
|
|
||||||
|
// nocommit check that endOffset is >= max(endOffset)
|
||||||
|
// we've seen
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
|
|
|
@ -111,7 +111,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
||||||
// broken!
|
// broken!
|
||||||
EdgeNGramTokenizer.class,
|
EdgeNGramTokenizer.class,
|
||||||
// broken!
|
// broken!
|
||||||
EdgeNGramTokenFilter.class
|
EdgeNGramTokenFilter.class,
|
||||||
|
// Not broken: we forcefully add this, so we shouldn't
|
||||||
|
// also randomly pick it:
|
||||||
|
ValidatingTokenFilter.class
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,11 +138,6 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c == ValidatingTokenFilter.class) {
|
|
||||||
// We insert this one ourselves after each stage...
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (final Constructor<?> ctor : c.getConstructors()) {
|
for (final Constructor<?> ctor : c.getConstructors()) {
|
||||||
// don't test deprecated ctors, they likely have known bugs:
|
// don't test deprecated ctors, they likely have known bugs:
|
||||||
if (ctor.isAnnotationPresent(Deprecated.class) || ctor.isSynthetic()) {
|
if (ctor.isAnnotationPresent(Deprecated.class) || ctor.isSynthetic()) {
|
||||||
|
|
Loading…
Reference in New Issue