mirror of https://github.com/apache/lucene.git
SOLR-1400: handle zero length term buffer in TrimFilter
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@812494 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
18a105e35c
commit
fe06ba88ff
|
@ -532,6 +532,8 @@ Bug Fixes
|
|||
63. SOLR-1398: Add offset corrections in PatternTokenizerFactory.
|
||||
(Anders Melchiorsen, koji)
|
||||
|
||||
64. SOLR-1400: Properly handle zero-length tokens in TrimFilter (Peter Wolanin, gsingers)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
1. Upgraded to Lucene 2.4.0 (yonik)
|
||||
|
|
|
@ -51,6 +51,11 @@ public final class TrimFilter extends TokenFilter {
|
|||
|
||||
char[] termBuffer = termAtt.termBuffer();
|
||||
int len = termAtt.termLength();
|
||||
//TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
|
||||
//also return true
|
||||
if (len == 0){
|
||||
return true;
|
||||
}
|
||||
int start = 0;
|
||||
int end = 0;
|
||||
int endOff = 0;
|
||||
|
|
|
@ -17,10 +17,11 @@
|
|||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -29,28 +30,41 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
public class TestTrimFilter extends BaseTokenTestCase {
|
||||
|
||||
public void testTrim() throws Exception {
|
||||
char[] a = " a ".toCharArray();
|
||||
char[] b = "b ".toCharArray();
|
||||
char[] ccc = "cCc".toCharArray();
|
||||
char[] whitespace = " ".toCharArray();
|
||||
char[] empty = "".toCharArray();
|
||||
TokenStream ts = new TrimFilter
|
||||
(new IterTokenStream(new Token(" a ", 1, 5),
|
||||
new Token("b ",6,10),
|
||||
new Token("cCc",11,15),
|
||||
new Token(" ",16,20)), false );
|
||||
(new IterTokenStream(new Token(a, 0, a.length, 1, 5),
|
||||
new Token(b, 0, b.length, 6, 10),
|
||||
new Token(ccc, 0, ccc.length, 11, 15),
|
||||
new Token(whitespace, 0, whitespace.length, 16, 20),
|
||||
new Token(empty, 0, empty.length, 21, 21)), false);
|
||||
|
||||
Token token = ts.next();
|
||||
TermAttribute token;
|
||||
assertTrue(ts.incrementToken());
|
||||
token = (TermAttribute) ts.getAttribute(TermAttribute.class);
|
||||
assertEquals("a", new String(token.termBuffer(), 0, token.termLength()));
|
||||
token = ts.next();
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
|
||||
token = ts.next();
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
|
||||
token = ts.next();
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
|
||||
token = ts.next();
|
||||
assertNull(token);
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
|
||||
assertFalse(ts.incrementToken());
|
||||
|
||||
a = " a".toCharArray();
|
||||
b = "b ".toCharArray();
|
||||
ccc = " c ".toCharArray();
|
||||
whitespace = " ".toCharArray();
|
||||
ts = new TrimFilter(new IterTokenStream(
|
||||
new Token(" a", 0,2),
|
||||
new Token("b ", 0,2),
|
||||
new Token(" c ",0,3),
|
||||
new Token(" ",0,3)), true );
|
||||
new Token(a, 0, a.length, 0, 2),
|
||||
new Token(b, 0, b.length, 0, 2),
|
||||
new Token(ccc, 0, ccc.length, 0, 3),
|
||||
new Token(whitespace, 0, whitespace.length, 0, 3)), true);
|
||||
|
||||
List<Token> expect = tokens("a,1,1,2 b,1,0,1 c,1,1,2 ,1,3,3");
|
||||
List<Token> real = getTokens(ts);
|
||||
|
|
Loading…
Reference in New Issue