mirror of https://github.com/apache/lucene.git
LUCENE-3456: use MockTokenizer in analysis tests
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1175650 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
67fe9a6b81
commit
e927a69698
|
@ -17,9 +17,9 @@ package org.apache.solr.analysis;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
|
||||||
Set<?> words = factory.getCommonWords();
|
Set<?> words = factory.getCommonWords();
|
||||||
assertTrue("words is null and it shouldn't be", words != null);
|
assertTrue("words is null and it shouldn't be", words != null);
|
||||||
assertTrue(words.contains("the"));
|
assertTrue(words.contains("the"));
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("testing the factory"));
|
Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "testing", "testing_the", "the", "the_factory", "factory" });
|
new String[] { "testing", "testing_the", "the", "the_factory", "factory" });
|
||||||
|
|
|
@ -16,9 +16,9 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
|
||||||
Set<?> words = factory.getCommonWords();
|
Set<?> words = factory.getCommonWords();
|
||||||
assertTrue("words is null and it shouldn't be", words != null);
|
assertTrue("words is null and it shouldn't be", words != null);
|
||||||
assertTrue(words.contains("the"));
|
assertTrue(words.contains("the"));
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("testing the factory"));
|
Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "testing_the", "the_factory" });
|
new String[] { "testing_the", "the_factory" });
|
||||||
|
|
|
@ -20,8 +20,8 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
|
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
|
||||||
public void testDefaults() throws Exception {
|
public void testDefaults() throws Exception {
|
||||||
DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
|
DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
|
||||||
factory.init(new HashMap<String, String>());
|
factory.init(new HashMap<String, String>());
|
||||||
TokenStream inputStream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
|
TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
TokenStream filteredStream = factory.create(inputStream);
|
TokenStream filteredStream = factory.create(inputStream);
|
||||||
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
|
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
|
||||||
|
@ -44,7 +44,7 @@ public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
|
||||||
parameters.put("maxCodeLength", "8");
|
parameters.put("maxCodeLength", "8");
|
||||||
factory.init(parameters);
|
factory.init(parameters);
|
||||||
|
|
||||||
TokenStream inputStream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
|
TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
TokenStream filteredStream = factory.create(inputStream);
|
TokenStream filteredStream = factory.create(inputStream);
|
||||||
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
|
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
|
||||||
|
@ -57,12 +57,13 @@ public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
|
||||||
public void testReset() throws Exception {
|
public void testReset() throws Exception {
|
||||||
DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
|
DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
|
||||||
factory.init(new HashMap<String, String>());
|
factory.init(new HashMap<String, String>());
|
||||||
TokenStream inputStream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
|
TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
TokenStream filteredStream = factory.create(inputStream);
|
TokenStream filteredStream = factory.create(inputStream);
|
||||||
CharTermAttribute termAtt = filteredStream.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = filteredStream.addAttribute(CharTermAttribute.class);
|
||||||
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
|
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
|
||||||
|
|
||||||
|
filteredStream.reset();
|
||||||
assertTrue(filteredStream.incrementToken());
|
assertTrue(filteredStream.incrementToken());
|
||||||
assertEquals(13, termAtt.length());
|
assertEquals(13, termAtt.length());
|
||||||
assertEquals("international", termAtt.toString());
|
assertEquals("international", termAtt.toString());
|
||||||
|
|
|
@ -21,8 +21,8 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
public class LengthFilterTest extends BaseTokenTestCase {
|
public class LengthFilterTest extends BaseTokenTestCase {
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ public class LengthFilterTest extends BaseTokenTestCase {
|
||||||
// default: args.put("enablePositionIncrements", "false");
|
// default: args.put("enablePositionIncrements", "false");
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
String test = "foo foobar super-duper-trooper";
|
String test = "foo foobar super-duper-trooper";
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(test)));
|
TokenStream stream = factory.create(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
|
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
|
||||||
|
|
||||||
factory = new LengthFilterFactory();
|
factory = new LengthFilterFactory();
|
||||||
|
@ -43,7 +43,7 @@ public class LengthFilterTest extends BaseTokenTestCase {
|
||||||
args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
|
args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
|
||||||
args.put("enablePositionIncrements", "true");
|
args.put("enablePositionIncrements", "true");
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(test)));
|
stream = factory.create(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
|
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -16,9 +16,9 @@ package org.apache.solr.analysis;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.common.util.StrUtils;
|
import org.apache.solr.common.util.StrUtils;
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
|
@ -52,8 +52,8 @@ public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {
|
||||||
|
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
factory.inform(new LinesMockSolrResourceLoader(new ArrayList<String>()));
|
factory.inform(new LinesMockSolrResourceLoader(new ArrayList<String>()));
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION,
|
Tokenizer tokenizer = new MockTokenizer(
|
||||||
new StringReader(StrUtils.join(Arrays.asList(test), ' ')));
|
new StringReader(StrUtils.join(Arrays.asList(test), ' ')), MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, gold);
|
assertTokenStreamContents(stream, gold);
|
||||||
}
|
}
|
||||||
|
@ -90,7 +90,7 @@ public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
factory.inform(loader);
|
factory.inform(loader);
|
||||||
Reader reader = new StringReader("ridding of some stemming");
|
Reader reader = new StringReader("ridding of some stemming");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "ridding", "of", "some", "stem" });
|
assertTokenStreamContents(stream, new String[] { "ridding", "of", "some", "stem" });
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Brazilian stem filter factory is working.
|
* Simple tests to ensure the Brazilian stem filter factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestBrazilianStemFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("Brasília");
|
Reader reader = new StringReader("Brasília");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
BrazilianStemFilterFactory factory = new BrazilianStemFilterFactory();
|
BrazilianStemFilterFactory factory = new BrazilianStemFilterFactory();
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "brasil" });
|
assertTokenStreamContents(stream, new String[] { "brasil" });
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Bulgarian stem filter factory is working.
|
* Simple tests to ensure the Bulgarian stem filter factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestBulgarianStemFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("компютри");
|
Reader reader = new StringReader("компютри");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
BulgarianStemFilterFactory factory = new BulgarianStemFilterFactory();
|
BulgarianStemFilterFactory factory = new BulgarianStemFilterFactory();
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "компютр" });
|
assertTokenStreamContents(stream, new String[] { "компютр" });
|
||||||
|
|
|
@ -21,10 +21,9 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -41,56 +40,56 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
||||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||||
factory.init( args );
|
factory.init( args );
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("kiTTEN"))),
|
new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "Kitten" });
|
new String[] { "Kitten" });
|
||||||
|
|
||||||
factory.forceFirstLetter = true;
|
factory.forceFirstLetter = true;
|
||||||
|
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("and"))),
|
new MockTokenizer(new StringReader("and"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "And" });
|
new String[] { "And" });
|
||||||
|
|
||||||
//first is forced, but it's not a keep word, either
|
//first is forced, but it's not a keep word, either
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("AnD"))),
|
new MockTokenizer(new StringReader("AnD"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "And" });
|
new String[] { "And" });
|
||||||
|
|
||||||
factory.forceFirstLetter = false;
|
factory.forceFirstLetter = false;
|
||||||
|
|
||||||
//first is not forced, but it's not a keep word, either
|
//first is not forced, but it's not a keep word, either
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("AnD"))),
|
new MockTokenizer(new StringReader("AnD"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "And" });
|
new String[] { "And" });
|
||||||
|
|
||||||
factory.forceFirstLetter = true;
|
factory.forceFirstLetter = true;
|
||||||
|
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("big"))),
|
new MockTokenizer(new StringReader("big"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "Big" });
|
new String[] { "Big" });
|
||||||
|
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("BIG"))),
|
new MockTokenizer(new StringReader("BIG"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "BIG" });
|
new String[] { "BIG" });
|
||||||
|
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new KeywordTokenizer(new StringReader("Hello thEre my Name is Ryan"))),
|
new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.KEYWORD, false)),
|
||||||
new String[] { "Hello there my name is ryan" });
|
new String[] { "Hello there my name is ryan" });
|
||||||
|
|
||||||
// now each token
|
// now each token
|
||||||
factory.onlyFirstWord = false;
|
factory.onlyFirstWord = false;
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan"))),
|
new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
|
new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
|
||||||
|
|
||||||
// now only the long words
|
// now only the long words
|
||||||
factory.minWordLength = 3;
|
factory.minWordLength = 3;
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan"))),
|
new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
|
new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
|
||||||
|
|
||||||
// without prefix
|
// without prefix
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley"))),
|
new MockTokenizer(new StringReader("McKinley"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "Mckinley" });
|
new String[] { "Mckinley" });
|
||||||
|
|
||||||
// Now try some prefixes
|
// Now try some prefixes
|
||||||
|
@ -98,19 +97,19 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
||||||
args.put( "okPrefix", "McK" ); // all words
|
args.put( "okPrefix", "McK" ); // all words
|
||||||
factory.init( args );
|
factory.init( args );
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley"))),
|
new MockTokenizer(new StringReader("McKinley"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "McKinley" });
|
new String[] { "McKinley" });
|
||||||
|
|
||||||
// now try some stuff with numbers
|
// now try some stuff with numbers
|
||||||
factory.forceFirstLetter = false;
|
factory.forceFirstLetter = false;
|
||||||
factory.onlyFirstWord = false;
|
factory.onlyFirstWord = false;
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("1st 2nd third"))),
|
new MockTokenizer(new StringReader("1st 2nd third"), MockTokenizer.WHITESPACE, false)),
|
||||||
new String[] { "1st", "2nd", "Third" });
|
new String[] { "1st", "2nd", "Third" });
|
||||||
|
|
||||||
factory.forceFirstLetter = true;
|
factory.forceFirstLetter = true;
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new KeywordTokenizer(new StringReader("the The the"))),
|
new MockTokenizer(new StringReader("the The the"), MockTokenizer.KEYWORD, false)),
|
||||||
new String[] { "The The the" });
|
new String[] { "The The the" });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,17 +123,17 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
||||||
factory.init( args );
|
factory.init( args );
|
||||||
factory.forceFirstLetter = true;
|
factory.forceFirstLetter = true;
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new KeywordTokenizer(new StringReader("kiTTEN"))),
|
new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
|
||||||
new String[] { "KiTTEN" });
|
new String[] { "KiTTEN" });
|
||||||
|
|
||||||
factory.forceFirstLetter = false;
|
factory.forceFirstLetter = false;
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new KeywordTokenizer(new StringReader("kiTTEN"))),
|
new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
|
||||||
new String[] { "kiTTEN" });
|
new String[] { "kiTTEN" });
|
||||||
|
|
||||||
factory.keep = null;
|
factory.keep = null;
|
||||||
assertTokenStreamContents(factory.create(
|
assertTokenStreamContents(factory.create(
|
||||||
new KeywordTokenizer(new StringReader("kiTTEN"))),
|
new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
|
||||||
new String[] { "Kitten" });
|
new String[] { "Kitten" });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,8 +148,8 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
||||||
args.put(CapitalizationFilterFactory.MIN_WORD_LENGTH, "5");
|
args.put(CapitalizationFilterFactory.MIN_WORD_LENGTH, "5");
|
||||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(
|
Tokenizer tokenizer = new MockTokenizer(new StringReader(
|
||||||
"helo testing"));
|
"helo testing"), MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream ts = factory.create(tokenizer);
|
TokenStream ts = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(ts, new String[] {"helo", "Testing"});
|
assertTokenStreamContents(ts, new String[] {"helo", "Testing"});
|
||||||
}
|
}
|
||||||
|
@ -164,8 +163,8 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
||||||
args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
|
args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
|
||||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(
|
Tokenizer tokenizer = new MockTokenizer(new StringReader(
|
||||||
"one two three four"));
|
"one two three four"), MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream ts = factory.create(tokenizer);
|
TokenStream ts = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(ts, new String[] {"One", "Two", "Three", "Four"});
|
assertTokenStreamContents(ts, new String[] {"One", "Two", "Three", "Four"});
|
||||||
}
|
}
|
||||||
|
@ -178,8 +177,8 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
||||||
args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
|
args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
|
||||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
Tokenizer tokenizer = new KeywordTokenizer(new StringReader(
|
Tokenizer tokenizer = new MockTokenizer(new StringReader(
|
||||||
"one two three four"));
|
"one two three four"), MockTokenizer.KEYWORD, false);
|
||||||
TokenStream ts = factory.create(tokenizer);
|
TokenStream ts = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(ts, new String[] {"one two three four"});
|
assertTokenStreamContents(ts, new String[] {"one two three four"});
|
||||||
}
|
}
|
||||||
|
@ -194,8 +193,8 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
||||||
args.put(CapitalizationFilterFactory.MAX_TOKEN_LENGTH, "2");
|
args.put(CapitalizationFilterFactory.MAX_TOKEN_LENGTH, "2");
|
||||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(
|
Tokenizer tokenizer = new MockTokenizer(new StringReader(
|
||||||
"this is a test"));
|
"this is a test"), MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream ts = factory.create(tokenizer);
|
TokenStream ts = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(ts, new String[] {"this", "is", "A", "test"});
|
assertTokenStreamContents(ts, new String[] {"this", "is", "A", "test"});
|
||||||
}
|
}
|
||||||
|
@ -209,7 +208,7 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
||||||
args.put(CapitalizationFilterFactory.FORCE_FIRST_LETTER, "true");
|
args.put(CapitalizationFilterFactory.FORCE_FIRST_LETTER, "true");
|
||||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("kitten"));
|
Tokenizer tokenizer = new MockTokenizer(new StringReader("kitten"), MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream ts = factory.create(tokenizer);
|
TokenStream ts = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(ts, new String[] {"Kitten"});
|
assertTokenStreamContents(ts, new String[] {"Kitten"});
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Chinese filter factory is working.
|
* Simple tests to ensure the Chinese filter factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestChineseFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testFiltering() throws Exception {
|
public void testFiltering() throws Exception {
|
||||||
Reader reader = new StringReader("this 1234 Is such a silly filter");
|
Reader reader = new StringReader("this 1234 Is such a silly filter");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
ChineseFilterFactory factory = new ChineseFilterFactory();
|
ChineseFilterFactory factory = new ChineseFilterFactory();
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "Is", "silly", "filter" });
|
assertTokenStreamContents(stream, new String[] { "Is", "silly", "filter" });
|
||||||
|
|
|
@ -28,8 +28,8 @@ import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
|
|
||||||
|
@ -51,9 +51,9 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
factory.inform(new StringMockSolrResourceLoader(""));
|
factory.inform(new StringMockSolrResourceLoader(""));
|
||||||
TokenStream tsUpper = factory.create(
|
TokenStream tsUpper = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(turkishUpperCase)));
|
new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
|
||||||
TokenStream tsLower = factory.create(
|
TokenStream tsLower = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(turkishLowerCase)));
|
new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
|
||||||
assertCollatesToSame(tsUpper, tsLower);
|
assertCollatesToSame(tsUpper, tsLower);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,9 +71,9 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
factory.inform(new StringMockSolrResourceLoader(""));
|
factory.inform(new StringMockSolrResourceLoader(""));
|
||||||
TokenStream tsUpper = factory.create(
|
TokenStream tsUpper = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(turkishUpperCase)));
|
new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
|
||||||
TokenStream tsLower = factory.create(
|
TokenStream tsLower = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(turkishLowerCase)));
|
new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
|
||||||
assertCollatesToSame(tsUpper, tsLower);
|
assertCollatesToSame(tsUpper, tsLower);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,9 +92,9 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
factory.inform(new StringMockSolrResourceLoader(""));
|
factory.inform(new StringMockSolrResourceLoader(""));
|
||||||
TokenStream tsFull = factory.create(
|
TokenStream tsFull = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(fullWidth)));
|
new MockTokenizer(new StringReader(fullWidth), MockTokenizer.KEYWORD, false));
|
||||||
TokenStream tsHalf = factory.create(
|
TokenStream tsHalf = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(halfWidth)));
|
new MockTokenizer(new StringReader(halfWidth), MockTokenizer.KEYWORD, false));
|
||||||
assertCollatesToSame(tsFull, tsHalf);
|
assertCollatesToSame(tsFull, tsHalf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,9 +112,9 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
factory.inform(new StringMockSolrResourceLoader(""));
|
factory.inform(new StringMockSolrResourceLoader(""));
|
||||||
TokenStream tsUpper = factory.create(
|
TokenStream tsUpper = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(upperCase)));
|
new MockTokenizer(new StringReader(upperCase), MockTokenizer.KEYWORD, false));
|
||||||
TokenStream tsLower = factory.create(
|
TokenStream tsLower = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(lowerCase)));
|
new MockTokenizer(new StringReader(lowerCase), MockTokenizer.KEYWORD, false));
|
||||||
assertCollatesToSame(tsUpper, tsLower);
|
assertCollatesToSame(tsUpper, tsLower);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,9 +148,9 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
factory.inform(new StringMockSolrResourceLoader(tailoredRules));
|
factory.inform(new StringMockSolrResourceLoader(tailoredRules));
|
||||||
TokenStream tsUmlaut = factory.create(
|
TokenStream tsUmlaut = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(germanUmlaut)));
|
new MockTokenizer(new StringReader(germanUmlaut), MockTokenizer.KEYWORD, false));
|
||||||
TokenStream tsOE = factory.create(
|
TokenStream tsOE = factory.create(
|
||||||
new KeywordTokenizer(new StringReader(germanOE)));
|
new MockTokenizer(new StringReader(germanOE), MockTokenizer.KEYWORD, false));
|
||||||
|
|
||||||
assertCollatesToSame(tsUmlaut, tsOE);
|
assertCollatesToSame(tsUmlaut, tsOE);
|
||||||
}
|
}
|
||||||
|
@ -177,6 +177,8 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||||
|
|
||||||
private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
|
private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
stream1.reset();
|
||||||
|
stream2.reset();
|
||||||
CharTermAttribute term1 = stream1
|
CharTermAttribute term1 = stream1
|
||||||
.addAttribute(CharTermAttribute.class);
|
.addAttribute(CharTermAttribute.class);
|
||||||
CharTermAttribute term2 = stream2
|
CharTermAttribute term2 = stream2
|
||||||
|
@ -186,5 +188,9 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||||
assertEquals(term1.toString(), term2.toString());
|
assertEquals(term1.toString(), term2.toString());
|
||||||
assertFalse(stream1.incrementToken());
|
assertFalse(stream1.incrementToken());
|
||||||
assertFalse(stream2.incrementToken());
|
assertFalse(stream2.incrementToken());
|
||||||
|
stream1.end();
|
||||||
|
stream2.end();
|
||||||
|
stream1.close();
|
||||||
|
stream2.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Czech stem filter factory is working.
|
* Simple tests to ensure the Czech stem filter factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestCzechStemFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("angličtí");
|
Reader reader = new StringReader("angličtí");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
CzechStemFilterFactory factory = new CzechStemFilterFactory();
|
CzechStemFilterFactory factory = new CzechStemFilterFactory();
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "anglick" });
|
assertTokenStreamContents(stream, new String[] { "anglick" });
|
||||||
|
|
|
@ -21,8 +21,8 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
|
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
|
||||||
import org.apache.lucene.analysis.payloads.FloatEncoder;
|
import org.apache.lucene.analysis.payloads.FloatEncoder;
|
||||||
import org.apache.lucene.analysis.payloads.PayloadHelper;
|
import org.apache.lucene.analysis.payloads.PayloadHelper;
|
||||||
|
@ -40,8 +40,9 @@ public class TestDelimitedPayloadTokenFilterFactory extends BaseTokenTestCase {
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
factory.inform(loader);
|
factory.inform(loader);
|
||||||
|
|
||||||
TokenStream input = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("the|0.1 quick|0.1 red|0.1"));
|
TokenStream input = new MockTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"), MockTokenizer.WHITESPACE, false);
|
||||||
DelimitedPayloadTokenFilter tf = factory.create(input);
|
DelimitedPayloadTokenFilter tf = factory.create(input);
|
||||||
|
tf.reset();
|
||||||
while (tf.incrementToken()){
|
while (tf.incrementToken()){
|
||||||
PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
|
PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
|
||||||
assertTrue("payAttr is null and it shouldn't be", payAttr != null);
|
assertTrue("payAttr is null and it shouldn't be", payAttr != null);
|
||||||
|
@ -62,8 +63,9 @@ public class TestDelimitedPayloadTokenFilterFactory extends BaseTokenTestCase {
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
factory.inform(loader);
|
factory.inform(loader);
|
||||||
|
|
||||||
TokenStream input = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("the*0.1 quick*0.1 red*0.1"));
|
TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false);
|
||||||
DelimitedPayloadTokenFilter tf = factory.create(input);
|
DelimitedPayloadTokenFilter tf = factory.create(input);
|
||||||
|
tf.reset();
|
||||||
while (tf.incrementToken()){
|
while (tf.incrementToken()){
|
||||||
PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
|
PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
|
||||||
assertTrue("payAttr is null and it shouldn't be", payAttr != null);
|
assertTrue("payAttr is null and it shouldn't be", payAttr != null);
|
||||||
|
|
|
@ -22,9 +22,9 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ public class TestDictionaryCompoundWordTokenFilterFactory extends BaseTokenTestC
|
||||||
*/
|
*/
|
||||||
public void testDecompounding() throws Exception {
|
public void testDecompounding() throws Exception {
|
||||||
Reader reader = new StringReader("I like to play softball");
|
Reader reader = new StringReader("I like to play softball");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
DictionaryCompoundWordTokenFilterFactory factory = new DictionaryCompoundWordTokenFilterFactory();
|
DictionaryCompoundWordTokenFilterFactory factory = new DictionaryCompoundWordTokenFilterFactory();
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||||
|
|
|
@ -22,9 +22,9 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ public class TestElisionFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testElision() throws Exception {
|
public void testElision() throws Exception {
|
||||||
Reader reader = new StringReader("l'avion");
|
Reader reader = new StringReader("l'avion");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
ElisionFilterFactory factory = new ElisionFilterFactory();
|
ElisionFilterFactory factory = new ElisionFilterFactory();
|
||||||
factory.init(DEFAULT_VERSION_PARAM);
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
|
@ -54,7 +54,7 @@ public class TestElisionFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testDefaultArticles() throws Exception {
|
public void testDefaultArticles() throws Exception {
|
||||||
Reader reader = new StringReader("l'avion");
|
Reader reader = new StringReader("l'avion");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
ElisionFilterFactory factory = new ElisionFilterFactory();
|
ElisionFilterFactory factory = new ElisionFilterFactory();
|
||||||
factory.init(DEFAULT_VERSION_PARAM);
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the English minimal stem factory is working.
|
* Simple tests to ensure the English minimal stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestEnglishMinimalStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("bricks");
|
Reader reader = new StringReader("bricks");
|
||||||
EnglishMinimalStemFilterFactory factory = new EnglishMinimalStemFilterFactory();
|
EnglishMinimalStemFilterFactory factory = new EnglishMinimalStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "brick" });
|
assertTokenStreamContents(stream, new String[] { "brick" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Finnish light stem factory is working.
|
* Simple tests to ensure the Finnish light stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestFinnishLightStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("aseistettujen");
|
Reader reader = new StringReader("aseistettujen");
|
||||||
FinnishLightStemFilterFactory factory = new FinnishLightStemFilterFactory();
|
FinnishLightStemFilterFactory factory = new FinnishLightStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "aseistet" });
|
assertTokenStreamContents(stream, new String[] { "aseistet" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the French light stem factory is working.
|
* Simple tests to ensure the French light stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestFrenchLightStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("administrativement");
|
Reader reader = new StringReader("administrativement");
|
||||||
FrenchLightStemFilterFactory factory = new FrenchLightStemFilterFactory();
|
FrenchLightStemFilterFactory factory = new FrenchLightStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "administratif" });
|
assertTokenStreamContents(stream, new String[] { "administratif" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the French minimal stem factory is working.
|
* Simple tests to ensure the French minimal stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestFrenchMinimalStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("chevaux");
|
Reader reader = new StringReader("chevaux");
|
||||||
FrenchMinimalStemFilterFactory factory = new FrenchMinimalStemFilterFactory();
|
FrenchMinimalStemFilterFactory factory = new FrenchMinimalStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "cheval" });
|
assertTokenStreamContents(stream, new String[] { "cheval" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Galician stem factory is working.
|
* Simple tests to ensure the Galician stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestGalicianStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("cariñosa");
|
Reader reader = new StringReader("cariñosa");
|
||||||
GalicianStemFilterFactory factory = new GalicianStemFilterFactory();
|
GalicianStemFilterFactory factory = new GalicianStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "cariñ" });
|
assertTokenStreamContents(stream, new String[] { "cariñ" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the German light stem factory is working.
|
* Simple tests to ensure the German light stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestGermanLightStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("häuser");
|
Reader reader = new StringReader("häuser");
|
||||||
GermanLightStemFilterFactory factory = new GermanLightStemFilterFactory();
|
GermanLightStemFilterFactory factory = new GermanLightStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "haus" });
|
assertTokenStreamContents(stream, new String[] { "haus" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the German minimal stem factory is working.
|
* Simple tests to ensure the German minimal stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestGermanMinimalStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("bilder");
|
Reader reader = new StringReader("bilder");
|
||||||
GermanMinimalStemFilterFactory factory = new GermanMinimalStemFilterFactory();
|
GermanMinimalStemFilterFactory factory = new GermanMinimalStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "bild" });
|
assertTokenStreamContents(stream, new String[] { "bild" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the German stem filter factory is working.
|
* Simple tests to ensure the German stem filter factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestGermanStemFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("Tischen");
|
Reader reader = new StringReader("Tischen");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
GermanStemFilterFactory factory = new GermanStemFilterFactory();
|
GermanStemFilterFactory factory = new GermanStemFilterFactory();
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "tisch" });
|
assertTokenStreamContents(stream, new String[] { "tisch" });
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Greek lowercase filter factory is working.
|
* Simple tests to ensure the Greek lowercase filter factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestGreekLowerCaseFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testNormalization() throws Exception {
|
public void testNormalization() throws Exception {
|
||||||
Reader reader = new StringReader("Μάϊος ΜΆΪΟΣ");
|
Reader reader = new StringReader("Μάϊος ΜΆΪΟΣ");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
GreekLowerCaseFilterFactory factory = new GreekLowerCaseFilterFactory();
|
GreekLowerCaseFilterFactory factory = new GreekLowerCaseFilterFactory();
|
||||||
factory.init(DEFAULT_VERSION_PARAM);
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
|
|
|
@ -3,9 +3,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
|
import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -31,7 +31,7 @@ import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
|
||||||
public class TestGreekStemFilterFactory extends BaseTokenTestCase {
|
public class TestGreekStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("άνθρωπος");
|
Reader reader = new StringReader("άνθρωπος");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream normalized = new GreekLowerCaseFilter(DEFAULT_VERSION, tokenizer);
|
TokenStream normalized = new GreekLowerCaseFilter(DEFAULT_VERSION, tokenizer);
|
||||||
GreekStemFilterFactory factory = new GreekStemFilterFactory();
|
GreekStemFilterFactory factory = new GreekStemFilterFactory();
|
||||||
TokenStream stream = factory.create(normalized);
|
TokenStream stream = factory.create(normalized);
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Hungarian light stem factory is working.
|
* Simple tests to ensure the Hungarian light stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestHungarianLightStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("házakat");
|
Reader reader = new StringReader("házakat");
|
||||||
HungarianLightStemFilterFactory factory = new HungarianLightStemFilterFactory();
|
HungarianLightStemFilterFactory factory = new HungarianLightStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "haz" });
|
assertTokenStreamContents(stream, new String[] { "haz" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ public class TestHunspellStemFilterFactory extends BaseTokenTestCase {
|
||||||
factory.inform(new SolrResourceLoader("solr"));
|
factory.inform(new SolrResourceLoader("solr"));
|
||||||
|
|
||||||
Reader reader = new StringReader("abc");
|
Reader reader = new StringReader("abc");
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "ab" });
|
assertTokenStreamContents(stream, new String[] { "ab" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,9 +22,9 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenTest
|
||||||
*/
|
*/
|
||||||
public void testHyphenationWithDictionary() throws Exception {
|
public void testHyphenationWithDictionary() throws Exception {
|
||||||
Reader reader = new StringReader("min veninde som er lidt af en læsehest");
|
Reader reader = new StringReader("min veninde som er lidt af en læsehest");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||||
|
@ -60,7 +60,7 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenTest
|
||||||
*/
|
*/
|
||||||
public void testHyphenationOnly() throws Exception {
|
public void testHyphenationOnly() throws Exception {
|
||||||
Reader reader = new StringReader("basketballkurv");
|
Reader reader = new StringReader("basketballkurv");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||||
|
|
|
@ -22,9 +22,9 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Indonesian stem filter factory is working.
|
* Simple tests to ensure the Indonesian stem filter factory is working.
|
||||||
|
@ -35,7 +35,7 @@ public class TestIndonesianStemFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("dibukukannya");
|
Reader reader = new StringReader("dibukukannya");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
|
IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
|
||||||
Map<String,String> args = new HashMap<String,String>();
|
Map<String,String> args = new HashMap<String,String>();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
|
@ -48,7 +48,7 @@ public class TestIndonesianStemFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testStemmingInflectional() throws Exception {
|
public void testStemmingInflectional() throws Exception {
|
||||||
Reader reader = new StringReader("dibukukannya");
|
Reader reader = new StringReader("dibukukannya");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
|
IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
|
||||||
Map<String,String> args = new HashMap<String,String>();
|
Map<String,String> args = new HashMap<String,String>();
|
||||||
args.put("stemDerivational", "false");
|
args.put("stemDerivational", "false");
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Italian light stem factory is working.
|
* Simple tests to ensure the Italian light stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestItalianLightStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("ragazzo ragazzi");
|
Reader reader = new StringReader("ragazzo ragazzi");
|
||||||
ItalianLightStemFilterFactory factory = new ItalianLightStemFilterFactory();
|
ItalianLightStemFilterFactory factory = new ItalianLightStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "ragazz", "ragazz" });
|
assertTokenStreamContents(stream, new String[] { "ragazz", "ragazz" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,8 +3,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -30,7 +30,7 @@ public class TestKStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("bricks");
|
Reader reader = new StringReader("bricks");
|
||||||
KStemFilterFactory factory = new KStemFilterFactory();
|
KStemFilterFactory factory = new KStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "brick" });
|
assertTokenStreamContents(stream, new String[] { "brick" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,8 +23,8 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
|
@ -36,7 +36,7 @@ import org.apache.solr.core.SolrResourceLoader;
|
||||||
public class TestKeywordMarkerFilterFactory extends BaseTokenTestCase {
|
public class TestKeywordMarkerFilterFactory extends BaseTokenTestCase {
|
||||||
public void testKeywords() throws IOException {
|
public void testKeywords() throws IOException {
|
||||||
Reader reader = new StringReader("dogs cats");
|
Reader reader = new StringReader("dogs cats");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
|
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
|
||||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
|
@ -50,7 +50,7 @@ public class TestKeywordMarkerFilterFactory extends BaseTokenTestCase {
|
||||||
|
|
||||||
public void testKeywordsCaseInsensitive() throws IOException {
|
public void testKeywordsCaseInsensitive() throws IOException {
|
||||||
Reader reader = new StringReader("dogs cats Cats");
|
Reader reader = new StringReader("dogs cats Cats");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
|
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
|
||||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Latvian stem factory is working.
|
* Simple tests to ensure the Latvian stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestLatvianStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("tirgiem tirgus");
|
Reader reader = new StringReader("tirgiem tirgus");
|
||||||
LatvianStemFilterFactory factory = new LatvianStemFilterFactory();
|
LatvianStemFilterFactory factory = new LatvianStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "tirg", "tirg" });
|
assertTokenStreamContents(stream, new String[] { "tirg", "tirg" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.solr.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
|
@ -46,7 +45,7 @@ public class TestMultiWordSynonyms extends BaseTokenTestCase {
|
||||||
SlowSynonymMap synMap = new SlowSynonymMap(true);
|
SlowSynonymMap synMap = new SlowSynonymMap(true);
|
||||||
SlowSynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null);
|
SlowSynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null);
|
||||||
|
|
||||||
SlowSynonymFilter ts = new SlowSynonymFilter(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("a e")), synMap);
|
SlowSynonymFilter ts = new SlowSynonymFilter(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false), synMap);
|
||||||
// This fails because ["e","e"] is the value of the token stream
|
// This fails because ["e","e"] is the value of the token stream
|
||||||
assertTokenStreamContents(ts, new String[] { "a", "e" });
|
assertTokenStreamContents(ts, new String[] { "a", "e" });
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,9 +22,9 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the NGram filter factories are working.
|
* Simple tests to ensure the NGram filter factories are working.
|
||||||
|
@ -64,7 +64,7 @@ public class TestNGramFilters extends BaseTokenTestCase {
|
||||||
Map<String,String> args = new HashMap<String,String>();
|
Map<String,String> args = new HashMap<String,String>();
|
||||||
NGramFilterFactory factory = new NGramFilterFactory();
|
NGramFilterFactory factory = new NGramFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "t", "e", "s", "t", "te", "es", "st" });
|
new String[] { "t", "e", "s", "t", "te", "es", "st" });
|
||||||
}
|
}
|
||||||
|
@ -78,7 +78,7 @@ public class TestNGramFilters extends BaseTokenTestCase {
|
||||||
args.put("maxGramSize", "3");
|
args.put("maxGramSize", "3");
|
||||||
NGramFilterFactory factory = new NGramFilterFactory();
|
NGramFilterFactory factory = new NGramFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "te", "es", "st", "tes", "est" });
|
new String[] { "te", "es", "st", "tes", "est" });
|
||||||
}
|
}
|
||||||
|
@ -129,7 +129,7 @@ public class TestNGramFilters extends BaseTokenTestCase {
|
||||||
Map<String,String> args = new HashMap<String,String>();
|
Map<String,String> args = new HashMap<String,String>();
|
||||||
EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
|
EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "t" });
|
new String[] { "t" });
|
||||||
}
|
}
|
||||||
|
@ -143,7 +143,7 @@ public class TestNGramFilters extends BaseTokenTestCase {
|
||||||
args.put("maxGramSize", "2");
|
args.put("maxGramSize", "2");
|
||||||
EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
|
EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "t", "te" });
|
new String[] { "t", "te" });
|
||||||
}
|
}
|
||||||
|
@ -156,7 +156,7 @@ public class TestNGramFilters extends BaseTokenTestCase {
|
||||||
args.put("side", "back");
|
args.put("side", "back");
|
||||||
EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
|
EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "y" });
|
new String[] { "y" });
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,8 +24,8 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharReader;
|
import org.apache.lucene.analysis.CharReader;
|
||||||
import org.apache.lucene.analysis.CharStream;
|
import org.apache.lucene.analysis.CharStream;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure this factory is working
|
* Simple tests to ensure this factory is working
|
||||||
|
@ -44,7 +44,7 @@ public class TestPatternReplaceCharFilterFactory extends BaseTokenTestCase {
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
CharStream cs = factory.create(
|
CharStream cs = factory.create(
|
||||||
CharReader.get( new StringReader( BLOCK ) ) );
|
CharReader.get( new StringReader( BLOCK ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
new String[] { "this", "is", "test." },
|
new String[] { "this", "is", "test." },
|
||||||
new int[] { 0, 5, 8 },
|
new int[] { 0, 5, 8 },
|
||||||
|
@ -61,8 +61,11 @@ public class TestPatternReplaceCharFilterFactory extends BaseTokenTestCase {
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
CharStream cs = factory.create(
|
CharStream cs = factory.create(
|
||||||
CharReader.get( new StringReader( BLOCK ) ) );
|
CharReader.get( new StringReader( BLOCK ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
|
ts.reset();
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
|
ts.end();
|
||||||
|
ts.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// 012345678
|
// 012345678
|
||||||
|
@ -77,7 +80,7 @@ public class TestPatternReplaceCharFilterFactory extends BaseTokenTestCase {
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
CharStream cs = factory.create(
|
CharStream cs = factory.create(
|
||||||
CharReader.get( new StringReader( BLOCK ) ) );
|
CharReader.get( new StringReader( BLOCK ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
new String[] { "aa#bb#cc" },
|
new String[] { "aa#bb#cc" },
|
||||||
new int[] { 0 },
|
new int[] { 0 },
|
||||||
|
|
|
@ -17,8 +17,8 @@
|
||||||
|
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -37,7 +37,7 @@ public class TestPatternReplaceFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("replacement", "-");
|
args.put("replacement", "-");
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream ts = factory.create
|
TokenStream ts = factory.create
|
||||||
(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)));
|
(new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false));
|
||||||
|
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
new String[] { "-foo-foo-foo-", "-", "c-" });
|
new String[] { "-foo-foo-foo-", "-", "c-" });
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Persian normalization factory is working.
|
* Simple tests to ensure the Persian normalization factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestPersianNormalizationFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testNormalization() throws Exception {
|
public void testNormalization() throws Exception {
|
||||||
Reader reader = new StringReader("های");
|
Reader reader = new StringReader("های");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
PersianNormalizationFilterFactory factory = new PersianNormalizationFilterFactory();
|
PersianNormalizationFilterFactory factory = new PersianNormalizationFilterFactory();
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "هاي" });
|
assertTokenStreamContents(stream, new String[] { "هاي" });
|
||||||
|
|
|
@ -22,9 +22,9 @@ import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.commons.codec.language.Metaphone;
|
import org.apache.commons.codec.language.Metaphone;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -89,8 +89,7 @@ public class TestPhoneticFilterFactory extends BaseTokenTestCase {
|
||||||
|
|
||||||
static void assertAlgorithm(String algName, String inject, String input,
|
static void assertAlgorithm(String algName, String inject, String input,
|
||||||
String[] expected) throws Exception {
|
String[] expected) throws Exception {
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION,
|
Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||||
new StringReader(input));
|
|
||||||
Map<String,String> args = new HashMap<String,String>();
|
Map<String,String> args = new HashMap<String,String>();
|
||||||
args.put("encoder", algName);
|
args.put("encoder", algName);
|
||||||
args.put("inject", inject);
|
args.put("inject", inject);
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Porter stem filter factory is working.
|
* Simple tests to ensure the Porter stem filter factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestPorterStemFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("dogs");
|
Reader reader = new StringReader("dogs");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
PorterStemFilterFactory factory = new PorterStemFilterFactory();
|
PorterStemFilterFactory factory = new PorterStemFilterFactory();
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "dog" });
|
assertTokenStreamContents(stream, new String[] { "dog" });
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Portuguese Light stem factory is working.
|
* Simple tests to ensure the Portuguese Light stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestPortugueseLightStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("evidentemente");
|
Reader reader = new StringReader("evidentemente");
|
||||||
PortugueseLightStemFilterFactory factory = new PortugueseLightStemFilterFactory();
|
PortugueseLightStemFilterFactory factory = new PortugueseLightStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "evident" });
|
assertTokenStreamContents(stream, new String[] { "evident" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Portuguese Minimal stem factory is working.
|
* Simple tests to ensure the Portuguese Minimal stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestPortugueseMinimalStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("questões");
|
Reader reader = new StringReader("questões");
|
||||||
PortugueseMinimalStemFilterFactory factory = new PortugueseMinimalStemFilterFactory();
|
PortugueseMinimalStemFilterFactory factory = new PortugueseMinimalStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "questão" });
|
assertTokenStreamContents(stream, new String[] { "questão" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Portuguese stem factory is working.
|
* Simple tests to ensure the Portuguese stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestPortugueseStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("maluquice");
|
Reader reader = new StringReader("maluquice");
|
||||||
PortugueseStemFilterFactory factory = new PortugueseStemFilterFactory();
|
PortugueseStemFilterFactory factory = new PortugueseStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "maluc" });
|
assertTokenStreamContents(stream, new String[] { "maluc" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Reverse string filter factory is working.
|
* Simple tests to ensure the Reverse string filter factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestReverseStringFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testReversing() throws Exception {
|
public void testReversing() throws Exception {
|
||||||
Reader reader = new StringReader("simple test");
|
Reader reader = new StringReader("simple test");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
ReverseStringFilterFactory factory = new ReverseStringFilterFactory();
|
ReverseStringFilterFactory factory = new ReverseStringFilterFactory();
|
||||||
factory.init(DEFAULT_VERSION_PARAM);
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
|
|
|
@ -25,8 +25,8 @@ import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.search.AutomatonQuery;
|
import org.apache.lucene.search.AutomatonQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
|
@ -66,7 +66,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
|
||||||
String text = "simple text";
|
String text = "simple text";
|
||||||
args.put("withOriginal", "true");
|
args.put("withOriginal", "true");
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream input = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(text)));
|
TokenStream input = factory.create(new MockTokenizer(new StringReader(text), MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(input,
|
assertTokenStreamContents(input,
|
||||||
new String[] { "\u0001elpmis", "simple", "\u0001txet", "text" },
|
new String[] { "\u0001elpmis", "simple", "\u0001txet", "text" },
|
||||||
new int[] { 1, 0, 1, 0 });
|
new int[] { 1, 0, 1, 0 });
|
||||||
|
@ -74,7 +74,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
|
||||||
// now without original tokens
|
// now without original tokens
|
||||||
args.put("withOriginal", "false");
|
args.put("withOriginal", "false");
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
input = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(text)));
|
input = factory.create(new MockTokenizer(new StringReader(text), MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(input,
|
assertTokenStreamContents(input,
|
||||||
new String[] { "\u0001elpmis", "\u0001txet" },
|
new String[] { "\u0001elpmis", "\u0001txet" },
|
||||||
new int[] { 1, 1 });
|
new int[] { 1, 1 });
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Russian light stem factory is working.
|
* Simple tests to ensure the Russian light stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestRussianLightStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("журналы");
|
Reader reader = new StringReader("журналы");
|
||||||
RussianLightStemFilterFactory factory = new RussianLightStemFilterFactory();
|
RussianLightStemFilterFactory factory = new RussianLightStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "журнал" });
|
assertTokenStreamContents(stream, new String[] { "журнал" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Shingle filter factory works.
|
* Simple tests to ensure the Shingle filter factory works.
|
||||||
|
@ -37,7 +37,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
Map<String,String> args = new HashMap<String,String>();
|
Map<String,String> args = new HashMap<String,String>();
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] {"this", "this is", "is",
|
assertTokenStreamContents(stream, new String[] {"this", "this is", "is",
|
||||||
"is a", "a", "a test", "test"});
|
"is a", "a", "a test", "test"});
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("outputUnigrams", "false");
|
args.put("outputUnigrams", "false");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] {"this is", "is a", "a test"});
|
new String[] {"this is", "is a", "a test"});
|
||||||
}
|
}
|
||||||
|
@ -65,7 +65,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("maxShingleSize", "3");
|
args.put("maxShingleSize", "3");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] {"this", "this is", "this is a", "is",
|
new String[] {"this", "this is", "this is a", "is",
|
||||||
"is a", "is a test", "a", "a test", "test"});
|
"is a", "is a test", "a", "a test", "test"});
|
||||||
|
@ -81,7 +81,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("maxShingleSize", "4");
|
args.put("maxShingleSize", "4");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "this", "this is a", "this is a test",
|
new String[] { "this", "this is a", "this is a test",
|
||||||
"is", "is a test", "a", "test" });
|
"is", "is a test", "a", "test" });
|
||||||
|
@ -98,7 +98,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("outputUnigrams", "false");
|
args.put("outputUnigrams", "false");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "this is a", "this is a test", "is a test" });
|
new String[] { "this is a", "this is a test", "is a test" });
|
||||||
}
|
}
|
||||||
|
@ -113,7 +113,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("maxShingleSize", "3");
|
args.put("maxShingleSize", "3");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "this", "this is a", "is", "is a test", "a", "test" });
|
new String[] { "this", "this is a", "is", "is a test", "a", "test" });
|
||||||
}
|
}
|
||||||
|
@ -129,7 +129,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("outputUnigrams", "false");
|
args.put("outputUnigrams", "false");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "this is a", "is a test" });
|
new String[] { "this is a", "is a test" });
|
||||||
}
|
}
|
||||||
|
@ -143,7 +143,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("tokenSeparator", "=BLAH=");
|
args.put("tokenSeparator", "=BLAH=");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "this", "this=BLAH=is", "is", "is=BLAH=a",
|
new String[] { "this", "this=BLAH=is", "is", "is=BLAH=a",
|
||||||
"a", "a=BLAH=test", "test" });
|
"a", "a=BLAH=test", "test" });
|
||||||
|
@ -159,7 +159,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("outputUnigrams", "false");
|
args.put("outputUnigrams", "false");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "this=BLAH=is", "is=BLAH=a", "a=BLAH=test" });
|
new String[] { "this=BLAH=is", "is=BLAH=a", "a=BLAH=test" });
|
||||||
}
|
}
|
||||||
|
@ -173,7 +173,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("tokenSeparator", "");
|
args.put("tokenSeparator", "");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "this", "thisis", "is", "isa", "a", "atest", "test" });
|
new String[] { "this", "thisis", "is", "isa", "a", "atest", "test" });
|
||||||
}
|
}
|
||||||
|
@ -190,7 +190,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("tokenSeparator", "=BLAH=");
|
args.put("tokenSeparator", "=BLAH=");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "this", "this=BLAH=is=BLAH=a",
|
new String[] { "this", "this=BLAH=is=BLAH=a",
|
||||||
"this=BLAH=is=BLAH=a=BLAH=test", "is",
|
"this=BLAH=is=BLAH=a=BLAH=test", "is",
|
||||||
|
@ -211,7 +211,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("outputUnigrams", "false");
|
args.put("outputUnigrams", "false");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "this=BLAH=is=BLAH=a", "this=BLAH=is=BLAH=a=BLAH=test",
|
new String[] { "this=BLAH=is=BLAH=a", "this=BLAH=is=BLAH=a=BLAH=test",
|
||||||
"is=BLAH=a=BLAH=test", });
|
"is=BLAH=a=BLAH=test", });
|
||||||
|
@ -232,7 +232,7 @@ public class TestShingleFilterFactory extends BaseTokenTestCase {
|
||||||
args.put("outputUnigramsIfNoShingles", "true");
|
args.put("outputUnigramsIfNoShingles", "true");
|
||||||
ShingleFilterFactory factory = new ShingleFilterFactory();
|
ShingleFilterFactory factory = new ShingleFilterFactory();
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "test" });
|
assertTokenStreamContents(stream, new String[] { "test" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Spanish Light stem factory is working.
|
* Simple tests to ensure the Spanish Light stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestSpanishLightStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("sociedades");
|
Reader reader = new StringReader("sociedades");
|
||||||
SpanishLightStemFilterFactory factory = new SpanishLightStemFilterFactory();
|
SpanishLightStemFilterFactory factory = new SpanishLightStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "sociedad" });
|
assertTokenStreamContents(stream, new String[] { "sociedad" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,9 +22,9 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the standard lucene factories are working.
|
* Simple tests to ensure the standard lucene factories are working.
|
||||||
|
@ -158,7 +158,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testASCIIFolding() throws Exception {
|
public void testASCIIFolding() throws Exception {
|
||||||
Reader reader = new StringReader("Česká");
|
Reader reader = new StringReader("Česká");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
|
ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
|
||||||
factory.init(DEFAULT_VERSION_PARAM);
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
|
|
|
@ -23,8 +23,8 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
|
@ -37,7 +37,7 @@ public class TestStemmerOverrideFilterFactory extends BaseTokenTestCase {
|
||||||
public void testKeywords() throws IOException {
|
public void testKeywords() throws IOException {
|
||||||
// our stemdict stems dogs to 'cat'
|
// our stemdict stems dogs to 'cat'
|
||||||
Reader reader = new StringReader("testing dogs");
|
Reader reader = new StringReader("testing dogs");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
|
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
|
||||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
|
@ -51,7 +51,7 @@ public class TestStemmerOverrideFilterFactory extends BaseTokenTestCase {
|
||||||
|
|
||||||
public void testKeywordsCaseInsensitive() throws IOException {
|
public void testKeywordsCaseInsensitive() throws IOException {
|
||||||
Reader reader = new StringReader("testing DoGs");
|
Reader reader = new StringReader("testing DoGs");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
|
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
|
||||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Swedish Light stem factory is working.
|
* Simple tests to ensure the Swedish Light stem factory is working.
|
||||||
|
@ -30,7 +30,7 @@ public class TestSwedishLightStemFilterFactory extends BaseTokenTestCase {
|
||||||
public void testStemming() throws Exception {
|
public void testStemming() throws Exception {
|
||||||
Reader reader = new StringReader("äpplen äpple");
|
Reader reader = new StringReader("äpplen äpple");
|
||||||
SwedishLightStemFilterFactory factory = new SwedishLightStemFilterFactory();
|
SwedishLightStemFilterFactory factory = new SwedishLightStemFilterFactory();
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
|
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "äppl", "äppl" });
|
assertTokenStreamContents(stream, new String[] { "äppl", "äppl" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.th.ThaiWordFilter;
|
import org.apache.lucene.analysis.th.ThaiWordFilter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -35,7 +35,7 @@ public class TestThaiWordFilterFactory extends BaseTokenTestCase {
|
||||||
public void testWordBreak() throws Exception {
|
public void testWordBreak() throws Exception {
|
||||||
assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
|
assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
|
||||||
Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี");
|
Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
ThaiWordFilterFactory factory = new ThaiWordFilterFactory();
|
ThaiWordFilterFactory factory = new ThaiWordFilterFactory();
|
||||||
factory.init(DEFAULT_VERSION_PARAM);
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
|
|
|
@ -21,8 +21,8 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure this factory is working
|
* Simple tests to ensure this factory is working
|
||||||
|
@ -33,7 +33,7 @@ public class TestTrimFilterFactory extends BaseTokenTestCase {
|
||||||
Map<String,String> args = new HashMap<String,String>();
|
Map<String,String> args = new HashMap<String,String>();
|
||||||
args.put("updateOffsets", "false");
|
args.put("updateOffsets", "false");
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
TokenStream ts = factory.create(new KeywordTokenizer(new StringReader("trim me ")));
|
TokenStream ts = factory.create(new MockTokenizer(new StringReader("trim me "), MockTokenizer.KEYWORD, false));
|
||||||
assertTokenStreamContents(ts, new String[] { "trim me" });
|
assertTokenStreamContents(ts, new String[] { "trim me" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.solr.analysis;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the Turkish lowercase filter factory is working.
|
* Simple tests to ensure the Turkish lowercase filter factory is working.
|
||||||
|
@ -33,7 +33,7 @@ public class TestTurkishLowerCaseFilterFactory extends BaseTokenTestCase {
|
||||||
*/
|
*/
|
||||||
public void testCasing() throws Exception {
|
public void testCasing() throws Exception {
|
||||||
Reader reader = new StringReader("AĞACI");
|
Reader reader = new StringReader("AĞACI");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
TurkishLowerCaseFilterFactory factory = new TurkishLowerCaseFilterFactory();
|
TurkishLowerCaseFilterFactory factory = new TurkishLowerCaseFilterFactory();
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "ağacı" });
|
assertTokenStreamContents(stream, new String[] { "ağacı" });
|
||||||
|
|
|
@ -21,8 +21,8 @@ import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
|
@ -210,12 +210,12 @@ public class TestWordDelimiterFilterFactory extends SolrTestCaseJ4 {
|
||||||
factoryDefault.inform(loader);
|
factoryDefault.inform(loader);
|
||||||
|
|
||||||
TokenStream ts = factoryDefault.create(
|
TokenStream ts = factoryDefault.create(
|
||||||
new WhitespaceTokenizer(BaseTokenTestCase.DEFAULT_VERSION, new StringReader(testText)));
|
new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
|
||||||
BaseTokenTestCase.assertTokenStreamContents(ts,
|
BaseTokenTestCase.assertTokenStreamContents(ts,
|
||||||
new String[] { "I", "borrowed", "5", "400", "00", "540000", "at", "25", "interest", "rate", "interestrate" });
|
new String[] { "I", "borrowed", "5", "400", "00", "540000", "at", "25", "interest", "rate", "interestrate" });
|
||||||
|
|
||||||
ts = factoryDefault.create(
|
ts = factoryDefault.create(
|
||||||
new WhitespaceTokenizer(BaseTokenTestCase.DEFAULT_VERSION, new StringReader("foo\u200Dbar")));
|
new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
|
||||||
BaseTokenTestCase.assertTokenStreamContents(ts,
|
BaseTokenTestCase.assertTokenStreamContents(ts,
|
||||||
new String[] { "foo", "bar", "foobar" });
|
new String[] { "foo", "bar", "foobar" });
|
||||||
|
|
||||||
|
@ -228,13 +228,13 @@ public class TestWordDelimiterFilterFactory extends SolrTestCaseJ4 {
|
||||||
factoryCustom.inform(loader);
|
factoryCustom.inform(loader);
|
||||||
|
|
||||||
ts = factoryCustom.create(
|
ts = factoryCustom.create(
|
||||||
new WhitespaceTokenizer(BaseTokenTestCase.DEFAULT_VERSION, new StringReader(testText)));
|
new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
|
||||||
BaseTokenTestCase.assertTokenStreamContents(ts,
|
BaseTokenTestCase.assertTokenStreamContents(ts,
|
||||||
new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" });
|
new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" });
|
||||||
|
|
||||||
/* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
|
/* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
|
||||||
ts = factoryCustom.create(
|
ts = factoryCustom.create(
|
||||||
new WhitespaceTokenizer(BaseTokenTestCase.DEFAULT_VERSION, new StringReader("foo\u200Dbar")));
|
new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
|
||||||
BaseTokenTestCase.assertTokenStreamContents(ts,
|
BaseTokenTestCase.assertTokenStreamContents(ts,
|
||||||
new String[] { "foo\u200Dbar" });
|
new String[] { "foo\u200Dbar" });
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue