LUCENE-3456: more use of MockTokenizer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1175579 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-09-25 21:10:50 +00:00
parent 1e577e15b5
commit e7229ee7a4
14 changed files with 36 additions and 36 deletions

View File

@ -227,7 +227,7 @@
<!-- KeywordTokenizer does no actual tokenizing, so the entire <!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<!-- The LowerCase TokenFilter does what you expect, which can be <!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
--> -->

View File

@ -208,7 +208,7 @@
<!-- KeywordTokenizer does no actual tokenizing, so the entire <!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<!-- The LowerCase TokenFilter does what you expect, which can be <!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
--> -->

View File

@ -208,7 +208,7 @@
<!-- KeywordTokenizer does no actual tokenizing, so the entire <!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<!-- The LowerCase TokenFilter does what you expect, which can be <!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
--> -->

View File

@ -143,7 +143,7 @@
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="keywordtok" class="solr.TextField"> <fieldtype name="keywordtok" class="solr.TextField">
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="standardtok" class="solr.TextField"> <fieldtype name="standardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
@ -186,13 +186,13 @@
</fieldtype> </fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField"> <fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.PatternReplaceFilterFactory" <filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all" pattern="([^a-zA-Z])" replacement="_" replace="all"
/> />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<fieldtype name="porterfilt" class="solr.TextField"> <fieldtype name="porterfilt" class="solr.TextField">

View File

@ -389,7 +389,7 @@
KeywordTokenizer does no actual tokenizing, so the entire KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<!-- <!--
The LowerCase TokenFilter does what you expect, which can be The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
@ -448,7 +448,7 @@
<fieldType name="lowercase" class="solr.TextField" <fieldType name="lowercase" class="solr.TextField"
positionIncrementGap="100"> positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.LowerCaseFilterFactory" />
</analyzer> </analyzer>
</fieldType> </fieldType>

View File

@ -135,7 +135,7 @@
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="keywordtok" class="solr.TextField"> <fieldtype name="keywordtok" class="solr.TextField">
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="standardtok" class="solr.TextField"> <fieldtype name="standardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
@ -178,13 +178,13 @@
</fieldtype> </fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField"> <fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.PatternReplaceFilterFactory" <filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all" pattern="([^a-zA-Z])" replacement="_" replace="all"
/> />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<fieldtype name="porterfilt" class="solr.TextField"> <fieldtype name="porterfilt" class="solr.TextField">

View File

@ -126,7 +126,7 @@
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="keywordtok" class="solr.TextField"> <fieldtype name="keywordtok" class="solr.TextField">
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="standardtok" class="solr.TextField"> <fieldtype name="standardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
@ -169,13 +169,13 @@
</fieldtype> </fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField"> <fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.PatternReplaceFilterFactory" <filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all" pattern="([^a-zA-Z])" replacement="_" replace="all"
/> />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<fieldtype name="porterfilt" class="solr.TextField"> <fieldtype name="porterfilt" class="solr.TextField">

View File

@ -217,7 +217,7 @@
<!-- KeywordTokenizer does no actual tokenizing, so the entire <!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<!-- The LowerCase TokenFilter does what you expect, which can be <!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
--> -->

View File

@ -156,7 +156,7 @@
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="keywordtok" class="solr.TextField"> <fieldtype name="keywordtok" class="solr.TextField">
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="standardtok" class="solr.TextField"> <fieldtype name="standardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
@ -207,13 +207,13 @@
</fieldtype> </fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField"> <fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.PatternReplaceFilterFactory" <filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all" pattern="([^a-zA-Z])" replacement="_" replace="all"
/> />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<fieldtype name="patterntok" class="solr.TextField"> <fieldtype name="patterntok" class="solr.TextField">

View File

@ -208,7 +208,7 @@
<!-- KeywordTokenizer does no actual tokenizing, so the entire <!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<!-- The LowerCase TokenFilter does what you expect, which can be <!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
--> -->

View File

@ -191,7 +191,7 @@
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="keywordtok" class="solr.TextField"> <fieldtype name="keywordtok" class="solr.TextField">
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="standardtok" class="solr.TextField"> <fieldtype name="standardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
@ -234,13 +234,13 @@
</fieldtype> </fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField"> <fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.PatternReplaceFilterFactory" <filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all" pattern="([^a-zA-Z])" replacement="_" replace="all"
/> />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<fieldtype name="porterfilt" class="solr.TextField"> <fieldtype name="porterfilt" class="solr.TextField">

View File

@ -191,7 +191,7 @@
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="keywordtok" class="solr.TextField"> <fieldtype name="keywordtok" class="solr.TextField">
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="standardtok" class="solr.TextField"> <fieldtype name="standardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
@ -234,13 +234,13 @@
</fieldtype> </fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField"> <fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.PatternReplaceFilterFactory" <filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all" pattern="([^a-zA-Z])" replacement="_" replace="all"
/> />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<fieldtype name="porterfilt" class="solr.TextField"> <fieldtype name="porterfilt" class="solr.TextField">

View File

@ -291,18 +291,18 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
indexPart = keywordtok.get("index"); indexPart = keywordtok.get("index");
assertNotNull("expecting an index token analysis for field 'keywordtok'", indexPart); assertNotNull("expecting an index token analysis for field 'keywordtok'", indexPart);
assertEquals("expecting only KeywordTokenizer to be applied", 1, indexPart.size()); assertEquals("expecting only MockTokenizer to be applied", 1, indexPart.size());
tokenList = indexPart.get(KeywordTokenizer.class.getName()); tokenList = indexPart.get(MockTokenizer.class.getName());
assertNotNull("expecting only KeywordTokenizer to be applied", tokenList); assertNotNull("expecting only MockTokenizer to be applied", tokenList);
assertEquals("expecting KeywordTokenizer to produce 1 token", 1, tokenList.size()); assertEquals("expecting MockTokenizer to produce 1 token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("the quick red fox jumped over the lazy brown dogs", null, "word", 0, 49, 1, new int[]{1}, null, false)); assertToken(tokenList.get(0), new TokenInfo("the quick red fox jumped over the lazy brown dogs", null, "word", 0, 49, 1, new int[]{1}, null, false));
queryPart = keywordtok.get("query"); queryPart = keywordtok.get("query");
assertNotNull("expecting a query token analysis for field 'keywordtok'", queryPart); assertNotNull("expecting a query token analysis for field 'keywordtok'", queryPart);
assertEquals("expecting only KeywordTokenizer to be applied", 1, queryPart.size()); assertEquals("expecting only MockTokenizer to be applied", 1, queryPart.size());
tokenList = queryPart.get(KeywordTokenizer.class.getName()); tokenList = queryPart.get(MockTokenizer.class.getName());
assertNotNull("expecting only KeywordTokenizer to be applied", tokenList); assertNotNull("expecting only MockTokenizer to be applied", tokenList);
assertEquals("expecting KeywordTokenizer to produce 1 token", 1, tokenList.size()); assertEquals("expecting MockTokenizer to produce 1 token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox brown", null, "word", 0, 9, 1, new int[]{1}, null, false)); assertToken(tokenList.get(0), new TokenInfo("fox brown", null, "word", 0, 9, 1, new int[]{1}, null, false));
} }

View File

@ -156,7 +156,7 @@
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="keywordtok" class="solr.TextField"> <fieldtype name="keywordtok" class="solr.TextField">
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
</fieldtype> </fieldtype>
<fieldtype name="standardtok" class="solr.TextField"> <fieldtype name="standardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer> <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
@ -207,13 +207,13 @@
</fieldtype> </fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField"> <fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.PatternReplaceFilterFactory" <filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all" pattern="([^a-zA-Z])" replacement="_" replace="all"
/> />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<fieldtype name="patterntok" class="solr.TextField"> <fieldtype name="patterntok" class="solr.TextField">