mirror of https://github.com/apache/lucene.git
SOLR-1400: handle zero length term buffer in TrimFilter
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@812494 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
18a105e35c
commit
fe06ba88ff
|
@ -532,6 +532,8 @@ Bug Fixes
|
||||||
63. SOLR-1398: Add offset corrections in PatternTokenizerFactory.
|
63. SOLR-1398: Add offset corrections in PatternTokenizerFactory.
|
||||||
(Anders Melchiorsen, koji)
|
(Anders Melchiorsen, koji)
|
||||||
|
|
||||||
|
64. SOLR-1400: Properly handle zero-length tokens in TrimFilter (Peter Wolanin, gsingers)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
1. Upgraded to Lucene 2.4.0 (yonik)
|
1. Upgraded to Lucene 2.4.0 (yonik)
|
||||||
|
|
|
@ -51,6 +51,11 @@ public final class TrimFilter extends TokenFilter {
|
||||||
|
|
||||||
char[] termBuffer = termAtt.termBuffer();
|
char[] termBuffer = termAtt.termBuffer();
|
||||||
int len = termAtt.termLength();
|
int len = termAtt.termLength();
|
||||||
|
//TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
|
||||||
|
//also return true
|
||||||
|
if (len == 0){
|
||||||
|
return true;
|
||||||
|
}
|
||||||
int start = 0;
|
int start = 0;
|
||||||
int end = 0;
|
int end = 0;
|
||||||
int endOff = 0;
|
int endOff = 0;
|
||||||
|
|
|
@ -17,50 +17,64 @@
|
||||||
|
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @version $Id:$
|
* @version $Id:$
|
||||||
*/
|
*/
|
||||||
public class TestTrimFilter extends BaseTokenTestCase {
|
public class TestTrimFilter extends BaseTokenTestCase {
|
||||||
|
|
||||||
public void testTrim() throws Exception {
|
|
||||||
TokenStream ts = new TrimFilter
|
|
||||||
(new IterTokenStream(new Token(" a ", 1, 5),
|
|
||||||
new Token("b ",6,10),
|
|
||||||
new Token("cCc",11,15),
|
|
||||||
new Token(" ",16,20)), false );
|
|
||||||
|
|
||||||
Token token = ts.next();
|
public void testTrim() throws Exception {
|
||||||
|
char[] a = " a ".toCharArray();
|
||||||
|
char[] b = "b ".toCharArray();
|
||||||
|
char[] ccc = "cCc".toCharArray();
|
||||||
|
char[] whitespace = " ".toCharArray();
|
||||||
|
char[] empty = "".toCharArray();
|
||||||
|
TokenStream ts = new TrimFilter
|
||||||
|
(new IterTokenStream(new Token(a, 0, a.length, 1, 5),
|
||||||
|
new Token(b, 0, b.length, 6, 10),
|
||||||
|
new Token(ccc, 0, ccc.length, 11, 15),
|
||||||
|
new Token(whitespace, 0, whitespace.length, 16, 20),
|
||||||
|
new Token(empty, 0, empty.length, 21, 21)), false);
|
||||||
|
|
||||||
|
TermAttribute token;
|
||||||
|
assertTrue(ts.incrementToken());
|
||||||
|
token = (TermAttribute) ts.getAttribute(TermAttribute.class);
|
||||||
assertEquals("a", new String(token.termBuffer(), 0, token.termLength()));
|
assertEquals("a", new String(token.termBuffer(), 0, token.termLength()));
|
||||||
token = ts.next();
|
assertTrue(ts.incrementToken());
|
||||||
assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
|
assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
|
||||||
token = ts.next();
|
assertTrue(ts.incrementToken());
|
||||||
assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
|
assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
|
||||||
token = ts.next();
|
assertTrue(ts.incrementToken());
|
||||||
assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
|
assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
|
||||||
token = ts.next();
|
assertTrue(ts.incrementToken());
|
||||||
assertNull(token);
|
assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
|
||||||
|
assertFalse(ts.incrementToken());
|
||||||
|
|
||||||
|
a = " a".toCharArray();
|
||||||
|
b = "b ".toCharArray();
|
||||||
|
ccc = " c ".toCharArray();
|
||||||
|
whitespace = " ".toCharArray();
|
||||||
|
ts = new TrimFilter(new IterTokenStream(
|
||||||
|
new Token(a, 0, a.length, 0, 2),
|
||||||
|
new Token(b, 0, b.length, 0, 2),
|
||||||
|
new Token(ccc, 0, ccc.length, 0, 3),
|
||||||
|
new Token(whitespace, 0, whitespace.length, 0, 3)), true);
|
||||||
|
|
||||||
ts = new TrimFilter( new IterTokenStream(
|
List<Token> expect = tokens("a,1,1,2 b,1,0,1 c,1,1,2 ,1,3,3");
|
||||||
new Token(" a", 0,2),
|
|
||||||
new Token("b ", 0,2),
|
|
||||||
new Token(" c ",0,3),
|
|
||||||
new Token(" ",0,3)), true );
|
|
||||||
|
|
||||||
List<Token> expect = tokens( "a,1,1,2 b,1,0,1 c,1,1,2 ,1,3,3" );
|
|
||||||
List<Token> real = getTokens(ts);
|
List<Token> real = getTokens(ts);
|
||||||
for( Token t : expect ) {
|
for (Token t : expect) {
|
||||||
System.out.println( "TEST:" + t );
|
System.out.println("TEST:" + t);
|
||||||
}
|
}
|
||||||
for( Token t : real ) {
|
for (Token t : real) {
|
||||||
System.out.println( "REAL:" + t );
|
System.out.println("REAL:" + t);
|
||||||
}
|
}
|
||||||
assertTokEqualOff( expect, real );
|
assertTokEqualOff(expect, real);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue