mirror of https://github.com/apache/lucene.git
LUCENE-8517: do not wrap FixedShingleFilter with conditional in TestRandomChains
This commit is contained in:
parent
72ca4488d1
commit
54907903e8
|
@ -88,6 +88,7 @@ import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
|
|||
import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
|
||||
import org.apache.lucene.analysis.payloads.IdentityEncoder;
|
||||
import org.apache.lucene.analysis.payloads.PayloadEncoder;
|
||||
import org.apache.lucene.analysis.shingle.FixedShingleFilter;
|
||||
import org.apache.lucene.analysis.shingle.ShingleFilter;
|
||||
import org.apache.lucene.analysis.snowball.TestSnowball;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -129,6 +130,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
// expose inconsistent offsets
|
||||
// https://issues.apache.org/jira/browse/LUCENE-4170
|
||||
avoidConditionals.add(ShingleFilter.class);
|
||||
avoidConditionals.add(FixedShingleFilter.class);
|
||||
// FlattenGraphFilter changes the output graph entirely, so wrapping it in a condition
|
||||
// can break position lengths
|
||||
avoidConditionals.add(FlattenGraphFilter.class);
|
||||
|
@ -590,7 +592,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
|
||||
static class MockRandomAnalyzer extends Analyzer {
|
||||
final long seed;
|
||||
|
||||
|
||||
MockRandomAnalyzer(long seed) {
|
||||
this.seed = seed;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,11 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -38,6 +42,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
|||
* offsets are consistent with one another). */
|
||||
public final class ValidatingTokenFilter extends TokenFilter {
|
||||
|
||||
private static final int MAX_DEBUG_TOKENS = 20;
|
||||
|
||||
private int pos;
|
||||
private int lastStartOffset;
|
||||
|
||||
|
@ -50,6 +56,9 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
private final OffsetAttribute offsetAtt = getAttribute(OffsetAttribute.class);
|
||||
private final CharTermAttribute termAtt = getAttribute(CharTermAttribute.class);
|
||||
|
||||
// record all the Tokens seen so they can be dumped on failure
|
||||
private final List<Token> tokens = new LinkedList<>();
|
||||
|
||||
private final String name;
|
||||
|
||||
/** The name arg is used to identify this stage when
|
||||
|
@ -72,28 +81,38 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
int startOffset = 0;
|
||||
int endOffset = 0;
|
||||
int posLen = 0;
|
||||
int posInc = 0;
|
||||
|
||||
if (posIncAtt != null) {
|
||||
posInc = posIncAtt.getPositionIncrement();
|
||||
}
|
||||
if (offsetAtt != null) {
|
||||
startOffset = offsetAtt.startOffset();
|
||||
endOffset = offsetAtt.endOffset();
|
||||
}
|
||||
|
||||
posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
|
||||
|
||||
addToken(startOffset, endOffset, posInc);
|
||||
|
||||
// System.out.println(name + ": " + this);
|
||||
|
||||
if (posIncAtt != null) {
|
||||
pos += posIncAtt.getPositionIncrement();
|
||||
pos += posInc;
|
||||
if (pos == -1) {
|
||||
dumpValidatingTokenFilters(this, System.err);
|
||||
throw new IllegalStateException(name + ": first posInc must be > 0");
|
||||
}
|
||||
}
|
||||
|
||||
if (offsetAtt != null) {
|
||||
startOffset = offsetAtt.startOffset();
|
||||
endOffset = offsetAtt.endOffset();
|
||||
|
||||
if (offsetAtt.startOffset() < lastStartOffset) {
|
||||
if (startOffset < lastStartOffset) {
|
||||
dumpValidatingTokenFilters(this, System.err);
|
||||
throw new IllegalStateException(name + ": offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset);
|
||||
}
|
||||
lastStartOffset = offsetAtt.startOffset();
|
||||
}
|
||||
|
||||
posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
|
||||
|
||||
if (offsetAtt != null && posIncAtt != null) {
|
||||
|
||||
if (!posToStartOffset.containsKey(pos)) {
|
||||
|
@ -106,6 +125,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
// System.out.println(name + " + vs " + pos + " -> " + startOffset);
|
||||
final int oldStartOffset = posToStartOffset.get(pos);
|
||||
if (oldStartOffset != startOffset) {
|
||||
dumpValidatingTokenFilters(this, System.err);
|
||||
throw new IllegalStateException(name + ": inconsistent startOffset at pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
|
||||
}
|
||||
}
|
||||
|
@ -122,6 +142,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
//System.out.println(name + " + ve " + endPos + " -> " + endOffset);
|
||||
final int oldEndOffset = posToEndOffset.get(endPos);
|
||||
if (oldEndOffset != endOffset) {
|
||||
dumpValidatingTokenFilters(this, System.err);
|
||||
throw new IllegalStateException(name + ": inconsistent endOffset at pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
|
||||
}
|
||||
}
|
||||
|
@ -147,5 +168,39 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
posToStartOffset.clear();
|
||||
posToEndOffset.clear();
|
||||
lastStartOffset = 0;
|
||||
tokens.clear();
|
||||
}
|
||||
|
||||
|
||||
private void addToken(int startOffset, int endOffset, int posInc) {
|
||||
if (tokens.size() == MAX_DEBUG_TOKENS) {
|
||||
tokens.remove(0);
|
||||
}
|
||||
tokens.add(new Token(termAtt.toString(), posInc, startOffset, endOffset));
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints details about consumed tokens stored in any ValidatingTokenFilters in the input chain
|
||||
* @param in
|
||||
* @param out
|
||||
*/
|
||||
public static void dumpValidatingTokenFilters(TokenStream in, PrintStream out) {
|
||||
if (in instanceof TokenFilter) {
|
||||
dumpValidatingTokenFilters(((TokenFilter) in).input, out);
|
||||
if (in instanceof ValidatingTokenFilter) {
|
||||
out.println(((ValidatingTokenFilter) in).dump());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String dump() {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buf.append(name).append(": ");
|
||||
for (Token token : tokens) {
|
||||
buf.append(String.format(Locale.ROOT, "%s<[%d-%d] +%d> ",
|
||||
token, token.startOffset(), token.endOffset(), token.getPositionIncrement()));
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue