From 3e098abaedf532b12f429e885828cee6f3799615 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 9 Apr 2012 20:00:50 +0000 Subject: [PATCH] LUCENE-3969: ValidatingTokenFilter shouldn't create new atts git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311405 13f79535-47bb-0310-9956-ffa450edef68 --- .../analysis/ValidatingTokenFilter.java | 97 +++++++++++-------- .../analysis/core/TestRandomChains.java | 10 +- 2 files changed, 63 insertions(+), 44 deletions(-) diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java index 264999cdc9b..fe98feb3116 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.util.Attribute; // nocommit better name...? @@ -41,14 +42,22 @@ public final class ValidatingTokenFilter extends TokenFilter { private final Map posToStartOffset = new HashMap(); private final Map posToEndOffset = new HashMap(); - // nocommit must be more careful here? check hasAttribute first...? - private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); - private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncAtt = getAttrIfExists(PositionIncrementAttribute.class); + private final PositionLengthAttribute posLenAtt = getAttrIfExists(PositionLengthAttribute.class); + private final OffsetAttribute offsetAtt = getAttrIfExists(OffsetAttribute.class); + private final CharTermAttribute termAtt = getAttrIfExists(CharTermAttribute.class); private final String name; + // Returns null if the attr wasn't already added + private A getAttrIfExists(Class att) { + if (hasAttribute(att)) { + return getAttribute(att); + } else { + return null; + } + } + /** The name arg is used to identify this stage when * throwing exceptions (useful if you have more than one * instance in your chain). */ @@ -63,49 +72,61 @@ public final class ValidatingTokenFilter extends TokenFilter { return false; } - pos += posIncAtt.getPositionIncrement(); - if (pos == -1) { - throw new IllegalStateException("first posInc must be > 0"); - } + if (posIncAtt != null && offsetAtt != null) { - final int startOffset = offsetAtt.startOffset(); - final int endOffset = offsetAtt.endOffset(); - - final int posLen = posLenAtt.getPositionLength(); - if (!posToStartOffset.containsKey(pos)) { - // First time we've seen a token leaving from this position: - posToStartOffset.put(pos, startOffset); - System.out.println(" + s " + pos + " -> " + startOffset); - } else { - // We've seen a token leaving from this position - // before; verify the startOffset is the same: - System.out.println(" + vs " + pos + " -> " + startOffset); - final int oldStartOffset = posToStartOffset.get(pos); - if (oldStartOffset != startOffset) { - throw new IllegalStateException(name + ": inconsistent startOffset as pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt); + pos += posIncAtt.getPositionIncrement(); + if (pos == -1) { + throw new IllegalStateException("first posInc must be > 0"); } - } - final int endPos = pos + posLen; + final int startOffset = offsetAtt.startOffset(); + final int endOffset = offsetAtt.endOffset(); - if (!posToEndOffset.containsKey(endPos)) { - // First time we've seen a token arriving to this position: - posToEndOffset.put(endPos, endOffset); - System.out.println(" + e " + endPos + " -> " + endOffset); - } else { - // We've seen a token arriving to this position - // before; verify the endOffset is the same: - System.out.println(" + ve " + endPos + " -> " + endOffset); - final int oldEndOffset = posToEndOffset.get(endPos); - if (oldEndOffset != endOffset) { - throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt); + final int posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength(); + + if (!posToStartOffset.containsKey(pos)) { + // First time we've seen a token leaving from this position: + posToStartOffset.put(pos, startOffset); + System.out.println(" + s " + pos + " -> " + startOffset); + } else { + // We've seen a token leaving from this position + // before; verify the startOffset is the same: + System.out.println(" + vs " + pos + " -> " + startOffset); + final int oldStartOffset = posToStartOffset.get(pos); + if (oldStartOffset != startOffset) { + throw new IllegalStateException(name + ": inconsistent startOffset as pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt); + } + } + + final int endPos = pos + posLen; + + if (!posToEndOffset.containsKey(endPos)) { + // First time we've seen a token arriving to this position: + posToEndOffset.put(endPos, endOffset); + System.out.println(" + e " + endPos + " -> " + endOffset); + } else { + // We've seen a token arriving to this position + // before; verify the endOffset is the same: + System.out.println(" + ve " + endPos + " -> " + endOffset); + final int oldEndOffset = posToEndOffset.get(endPos); + if (oldEndOffset != endOffset) { + throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt); + } } } return true; } - // TODO: end? (what to validate?) + @Override + public void end() throws IOException { + super.end(); + + // TODO: what else to validate + + // nocommit check that endOffset is >= max(endOffset) + // we've seen + } @Override public void reset() throws IOException { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java index 477e0bc16cd..4f348f57626 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java @@ -111,7 +111,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase { // broken! EdgeNGramTokenizer.class, // broken! - EdgeNGramTokenFilter.class + EdgeNGramTokenFilter.class, + // Not broken: we forcefully add this, so we shouldn't + // also randomly pick it: + ValidatingTokenFilter.class ); } @@ -135,11 +138,6 @@ public class TestRandomChains extends BaseTokenStreamTestCase { continue; } - if (c == ValidatingTokenFilter.class) { - // We insert this one ourselves after each stage... - continue; - } - for (final Constructor ctor : c.getConstructors()) { // don't test deprecated ctors, they likely have known bugs: if (ctor.isAnnotationPresent(Deprecated.class) || ctor.isSynthetic()) {