mirror of https://github.com/apache/lucene.git
LUCENE-3456: more use of MockTokenizer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1175579 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1e577e15b5
commit
e7229ee7a4
|
@ -227,7 +227,7 @@
|
|||
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<!-- The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
-->
|
||||
|
|
|
@ -208,7 +208,7 @@
|
|||
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<!-- The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
-->
|
||||
|
|
|
@ -208,7 +208,7 @@
|
|||
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<!-- The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
-->
|
||||
|
|
|
@ -143,7 +143,7 @@
|
|||
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
|
||||
<analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
|
||||
|
@ -186,13 +186,13 @@
|
|||
</fieldtype>
|
||||
<fieldtype name="patternreplacefilt" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-zA-Z])" replacement="_" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="porterfilt" class="solr.TextField">
|
||||
|
|
|
@ -389,7 +389,7 @@
|
|||
KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.KeywordTokenizerFactory" />
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<!--
|
||||
The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
|
@ -448,7 +448,7 @@
|
|||
<fieldType name="lowercase" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.KeywordTokenizerFactory" />
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -135,7 +135,7 @@
|
|||
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
|
||||
<analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
|
||||
|
@ -178,13 +178,13 @@
|
|||
</fieldtype>
|
||||
<fieldtype name="patternreplacefilt" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-zA-Z])" replacement="_" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="porterfilt" class="solr.TextField">
|
||||
|
|
|
@ -126,7 +126,7 @@
|
|||
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
|
||||
<analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
|
||||
|
@ -169,13 +169,13 @@
|
|||
</fieldtype>
|
||||
<fieldtype name="patternreplacefilt" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-zA-Z])" replacement="_" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="porterfilt" class="solr.TextField">
|
||||
|
|
|
@ -217,7 +217,7 @@
|
|||
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<!-- The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
-->
|
||||
|
|
|
@ -156,7 +156,7 @@
|
|||
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
|
||||
<analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
|
||||
|
@ -207,13 +207,13 @@
|
|||
</fieldtype>
|
||||
<fieldtype name="patternreplacefilt" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-zA-Z])" replacement="_" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="patterntok" class="solr.TextField">
|
||||
|
|
|
@ -208,7 +208,7 @@
|
|||
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<!-- The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
-->
|
||||
|
|
|
@ -191,7 +191,7 @@
|
|||
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
|
||||
<analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
|
||||
|
@ -234,13 +234,13 @@
|
|||
</fieldtype>
|
||||
<fieldtype name="patternreplacefilt" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-zA-Z])" replacement="_" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="porterfilt" class="solr.TextField">
|
||||
|
|
|
@ -191,7 +191,7 @@
|
|||
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
|
||||
<analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
|
||||
|
@ -234,13 +234,13 @@
|
|||
</fieldtype>
|
||||
<fieldtype name="patternreplacefilt" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-zA-Z])" replacement="_" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="porterfilt" class="solr.TextField">
|
||||
|
|
|
@ -291,18 +291,18 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
|
||||
indexPart = keywordtok.get("index");
|
||||
assertNotNull("expecting an index token analysis for field 'keywordtok'", indexPart);
|
||||
assertEquals("expecting only KeywordTokenizer to be applied", 1, indexPart.size());
|
||||
tokenList = indexPart.get(KeywordTokenizer.class.getName());
|
||||
assertNotNull("expecting only KeywordTokenizer to be applied", tokenList);
|
||||
assertEquals("expecting KeywordTokenizer to produce 1 token", 1, tokenList.size());
|
||||
assertEquals("expecting only MockTokenizer to be applied", 1, indexPart.size());
|
||||
tokenList = indexPart.get(MockTokenizer.class.getName());
|
||||
assertNotNull("expecting only MockTokenizer to be applied", tokenList);
|
||||
assertEquals("expecting MockTokenizer to produce 1 token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("the quick red fox jumped over the lazy brown dogs", null, "word", 0, 49, 1, new int[]{1}, null, false));
|
||||
|
||||
queryPart = keywordtok.get("query");
|
||||
assertNotNull("expecting a query token analysis for field 'keywordtok'", queryPart);
|
||||
assertEquals("expecting only KeywordTokenizer to be applied", 1, queryPart.size());
|
||||
tokenList = queryPart.get(KeywordTokenizer.class.getName());
|
||||
assertNotNull("expecting only KeywordTokenizer to be applied", tokenList);
|
||||
assertEquals("expecting KeywordTokenizer to produce 1 token", 1, tokenList.size());
|
||||
assertEquals("expecting only MockTokenizer to be applied", 1, queryPart.size());
|
||||
tokenList = queryPart.get(MockTokenizer.class.getName());
|
||||
assertNotNull("expecting only MockTokenizer to be applied", tokenList);
|
||||
assertEquals("expecting MockTokenizer to produce 1 token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox brown", null, "word", 0, 9, 1, new int[]{1}, null, false));
|
||||
|
||||
}
|
||||
|
|
|
@ -156,7 +156,7 @@
|
|||
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
|
||||
<analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
|
||||
|
@ -207,13 +207,13 @@
|
|||
</fieldtype>
|
||||
<fieldtype name="patternreplacefilt" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-zA-Z])" replacement="_" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="patterntok" class="solr.TextField">
|
||||
|
|
Loading…
Reference in New Issue