mirror of https://github.com/apache/lucene.git
fix test failure: posLen must be 1 when keepOrig is false for a syn match
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1391083 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f85dc67f41
commit
f0efba1a06
|
@ -480,7 +480,7 @@ public final class SynonymFilter extends TokenFilter {
|
|||
// endOffset (ie, endOffset of the last input
|
||||
// token it matched):
|
||||
endOffset = matchEndOffset;
|
||||
posLen = matchInputLength;
|
||||
posLen = keepOrig ? matchInputLength : 1;
|
||||
} else {
|
||||
// This rule has more than one output token; we
|
||||
// can't pick any particular endOffset for this
|
||||
|
|
|
@ -40,7 +40,6 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
|
||||
public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -53,6 +52,9 @@ public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
|
|||
private OffsetAttribute offsetAtt;
|
||||
|
||||
private void add(String input, String output, boolean keepOrig) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" add input=" + input + " output=" + output + " keepOrig=" + keepOrig);
|
||||
}
|
||||
b.add(new CharsRef(input.replaceAll(" +", "\u0000")),
|
||||
new CharsRef(output.replaceAll(" +", "\u0000")),
|
||||
keepOrig);
|
||||
|
@ -137,6 +139,56 @@ public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
|
|||
assertEquals(expectedUpto, expected.length);
|
||||
}
|
||||
|
||||
public void testDontKeepOrig() throws Exception {
|
||||
b = new SynonymMap.Builder(true);
|
||||
add("a b", "foo", false);
|
||||
|
||||
final SynonymMap map = b.build();
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
|
||||
}
|
||||
};
|
||||
|
||||
assertAnalyzesTo(analyzer, "a b c",
|
||||
new String[] {"foo", "c"},
|
||||
new int[] {0, 4},
|
||||
new int[] {3, 5},
|
||||
null,
|
||||
new int[] {1, 1},
|
||||
new int[] {1, 1},
|
||||
true);
|
||||
checkAnalysisConsistency(random(), analyzer, false, "a b c");
|
||||
}
|
||||
|
||||
public void testDoKeepOrig() throws Exception {
|
||||
b = new SynonymMap.Builder(true);
|
||||
add("a b", "foo", true);
|
||||
|
||||
final SynonymMap map = b.build();
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
|
||||
}
|
||||
};
|
||||
|
||||
assertAnalyzesTo(analyzer, "a b c",
|
||||
new String[] {"a", "foo", "b", "c"},
|
||||
new int[] {0, 0, 2, 4},
|
||||
new int[] {1, 3, 3, 5},
|
||||
null,
|
||||
new int[] {1, 0, 1, 1},
|
||||
new int[] {1, 2, 1, 1},
|
||||
true);
|
||||
checkAnalysisConsistency(random(), analyzer, false, "a b c");
|
||||
}
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
b = new SynonymMap.Builder(true);
|
||||
add("a", "foo", true);
|
||||
|
@ -284,7 +336,7 @@ public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
|
|||
if (synOutputs.length == 1) {
|
||||
// Add full endOffset
|
||||
endOffset = (inputIDX*2) + syn.in.length();
|
||||
posLen = (1+syn.in.length())/2;
|
||||
posLen = syn.keepOrig ? (1+syn.in.length())/2 : 1;
|
||||
} else {
|
||||
// Add endOffset matching input token's
|
||||
endOffset = (matchIDX*2) + 1;
|
||||
|
@ -540,6 +592,9 @@ public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
|
|||
for (int i = 0; i < numIters; i++) {
|
||||
b = new SynonymMap.Builder(random.nextBoolean());
|
||||
final int numEntries = atLeast(10);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: iter=" + i + " numEntries=" + numEntries);
|
||||
}
|
||||
for (int j = 0; j < numEntries; j++) {
|
||||
add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue