LUCENE-1466: add test case for MappingCharFilter

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@787875 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-06-24 00:11:50 +00:00
parent 629c7ed4c7
commit 7792aebe92
2 changed files with 343 additions and 0 deletions

View File

@ -0,0 +1,185 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.util.LuceneTestCase;
public abstract class BaseTokenTestCase extends LuceneTestCase {
public static String tsToString(TokenStream in) throws IOException {
StringBuilder out = new StringBuilder();
Token t = in.next();
if (null != t)
out.append(new String(t.termBuffer(), 0, t.termLength()));
for (t = in.next(); null != t; t = in.next()) {
out.append(" ").append(new String(t.termBuffer(), 0, t.termLength()));
}
in.close();
return out.toString();
}
/*
public List<String> tok2str(Iterable<Token> tokLst) {
ArrayList<String> lst = new ArrayList<String>();
for ( Token t : tokLst ) {
lst.add( new String(t.termBuffer(), 0, t.termLength()));
}
return lst;
}
*/
public void assertTokEqual(List/*<Token>*/ a, List/*<Token>*/ b) {
assertTokEq(a,b,false);
assertTokEq(b,a,false);
}
public void assertTokEqualOff(List/*<Token>*/ a, List/*<Token>*/ b) {
assertTokEq(a,b,true);
assertTokEq(b,a,true);
}
private void assertTokEq(List/*<Token>*/ a, List/*<Token>*/ b, boolean checkOff) {
int pos=0;
for (Iterator iter = a.iterator(); iter.hasNext();) {
Token tok = (Token)iter.next();
pos += tok.getPositionIncrement();
if (!tokAt(b, new String(tok.termBuffer(), 0, tok.termLength()), pos
, checkOff ? tok.startOffset() : -1
, checkOff ? tok.endOffset() : -1
))
{
fail(a + "!=" + b);
}
}
}
public boolean tokAt(List/*<Token>*/ lst, String val, int tokPos, int startOff, int endOff) {
int pos=0;
for (Iterator iter = lst.iterator(); iter.hasNext();) {
Token tok = (Token)iter.next();
pos += tok.getPositionIncrement();
if (pos==tokPos && new String(tok.termBuffer(), 0, tok.termLength()).equals(val)
&& (startOff==-1 || tok.startOffset()==startOff)
&& (endOff ==-1 || tok.endOffset() ==endOff )
)
{
return true;
}
}
return false;
}
/***
* Return a list of tokens according to a test string format:
* a b c => returns List<Token> [a,b,c]
* a/b => tokens a and b share the same spot (b.positionIncrement=0)
* a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
* a,1,10,11 => "a" with positionIncrement=1, startOffset=10, endOffset=11
*/
public List/*<Token>*/ tokens(String str) {
String[] arr = str.split(" ");
List/*<Token>*/ result = new ArrayList/*<Token>*/();
for (int i=0; i<arr.length; i++) {
String[] toks = arr[i].split("/");
String[] params = toks[0].split(",");
int posInc;
int start;
int end;
if (params.length > 1) {
posInc = Integer.parseInt(params[1]);
} else {
posInc = 1;
}
if (params.length > 2) {
start = Integer.parseInt(params[2]);
} else {
start = 0;
}
if (params.length > 3) {
end = Integer.parseInt(params[3]);
} else {
end = start + params[0].length();
}
Token t = new Token(params[0],start,end,"TEST");
t.setPositionIncrement(posInc);
result.add(t);
for (int j=1; j<toks.length; j++) {
t = new Token(toks[j],0,0,"TEST");
t.setPositionIncrement(0);
result.add(t);
}
}
return result;
}
//------------------------------------------------------------------------
// These may be useful beyond test cases...
//------------------------------------------------------------------------
static List/*<Token>*/ getTokens(TokenStream tstream) throws IOException {
List/*<Token>*/ tokens = new ArrayList/*<Token>*/();
while (true) {
Token t = tstream.next();
if (t==null) break;
tokens.add(t);
}
return tokens;
}
/*
public static class IterTokenStream extends TokenStream {
Iterator<Token> toks;
public IterTokenStream(Token... toks) {
this.toks = Arrays.asList(toks).iterator();
}
public IterTokenStream(Iterable<Token> toks) {
this.toks = toks.iterator();
}
public IterTokenStream(Iterator<Token> toks) {
this.toks = toks;
}
public IterTokenStream(String ... text) {
int off = 0;
ArrayList<Token> t = new ArrayList<Token>( text.length );
for( String txt : text ) {
t.add( new Token( txt, off, off+txt.length() ) );
off += txt.length() + 2;
}
this.toks = t.iterator();
}
@Override
public Token next() {
if (toks.hasNext()) {
return toks.next();
}
return null;
}
}
*/
}

View File

@ -0,0 +1,158 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis;
import java.io.StringReader;
import java.util.List;
public class TestMappingCharFilter extends BaseTokenTestCase {
NormalizeCharMap normMap;
public void setUp() throws Exception {
super.setUp();
normMap = new NormalizeCharMap();
normMap.add( "aa", "a" );
normMap.add( "bbb", "b" );
normMap.add( "cccc", "cc" );
normMap.add( "h", "i" );
normMap.add( "j", "jj" );
normMap.add( "k", "kkk" );
normMap.add( "ll", "llll" );
normMap.add( "empty", "" );
}
public void testNothingChange() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "x" );
assertTokEqualOff( expect, real );
}
public void test1to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "i" );
assertTokEqualOff( expect, real );
}
public void test1to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "j" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "jj,1,0,1" );
assertTokEqualOff( expect, real );
}
public void test1to3() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "k" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "kkk,1,0,1" );
assertTokEqualOff( expect, real );
}
public void test2to4() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "ll" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "llll,1,0,2" );
assertTokEqualOff( expect, real );
}
public void test2to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "aa" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "a,1,0,2" );
assertTokEqualOff( expect, real );
}
public void test3to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "bbb" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "b,1,0,3" );
assertTokEqualOff( expect, real );
}
public void test4to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "cccc" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "cc,1,0,4" );
assertTokEqualOff( expect, real );
}
public void test5to0() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "empty" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
assertEquals( 0, real.size() );
}
//
// 1111111111222
// 01234567890123456789012
//(in) h i j k ll cccc bbb aa
//
// 1111111111222
// 01234567890123456789012
//(out) i i jj kkk llll cc b a
//
// h, 0, 1 => i, 0, 1
// i, 2, 3 => i, 2, 3
// j, 4, 5 => jj, 4, 5
// k, 6, 7 => kkk, 6, 7
// ll, 8,10 => llll, 8,10
// cccc,11,15 => cc,11,15
// bbb,16,19 => b,16,19
// aa,20,22 => a,20,22
//
public void testTokenStream() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "i,1,0,1 i,1,2,3 jj,1,4,5 kkk,1,6,7 llll,1,8,10 cc,1,11,15 b,1,16,19 a,1,20,22" );
assertTokEqualOff( expect, real );
}
//
//
// 0123456789
//(in) aaaa ll h
//(out-1) aa llll i
//(out-2) a llllllll i
//
// aaaa,0,4 => a,0,4
// ll,5,7 => llllllll,5,7
// h,8,9 => i,8,9
public void testChained() throws Exception {
CharStream cs = new MappingCharFilter( normMap,
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
TokenStream ts = new WhitespaceTokenizer( cs );
List real = getTokens( ts );
List expect = tokens( "a,1,0,4 llllllll,1,5,7 i,1,8,9" );
assertTokEqualOff( expect, real );
}
}