mirror of https://github.com/apache/lucene.git
SOLR-2921: enable multitermqueries for these filters too
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1304059 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c8cd6210cc
commit
6df50d5486
|
@ -37,7 +37,7 @@ import com.ibm.icu.text.Transliterator;
|
|||
* </ul>
|
||||
* @see Transliterator
|
||||
*/
|
||||
public class ICUTransformFilterFactory extends BaseTokenFilterFactory {
|
||||
public class ICUTransformFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
|
||||
private Transliterator transliterator;
|
||||
|
||||
// TODO: add support for custom rules
|
||||
|
@ -64,4 +64,9 @@ public class ICUTransformFilterFactory extends BaseTokenFilterFactory {
|
|||
public TokenStream create(TokenStream input) {
|
||||
return new ICUTransformFilter(input, transliterator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,13 @@
|
|||
<filter class="solr.ICUNormalizer2FilterFactory" name="nfkc_cf" mode="compose"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_icutransform" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.ICUTransformFilterFactory" id="Cyrillic-Latin"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
</types>
|
||||
|
||||
|
@ -40,6 +47,7 @@
|
|||
<field name="id" type="string" indexed="true" stored="true" required="true"/>
|
||||
<field name="content_icufolding" type="text_icufolding" indexed="true" stored="true"/>
|
||||
<field name="content_icunormalizer2" type="text_icunormalizer2" indexed="true" stored="true"/>
|
||||
<field name="content_icutransform" type="text_icutransform" indexed="true" stored="true"/>
|
||||
|
||||
</fields>
|
||||
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.solr.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -31,7 +30,6 @@ public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
|
|||
@BeforeClass
|
||||
public static void beforeTests() throws Exception {
|
||||
initCore("solrconfig-icucollate.xml","schema-folding-extra.xml", "analysis-extras/solr");
|
||||
IndexWriter iw;
|
||||
|
||||
int idx = 1;
|
||||
// ICUFoldingFilterFactory
|
||||
|
@ -55,7 +53,10 @@ public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
|
|||
assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ELİF"));
|
||||
assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "eli\u0307f"));
|
||||
|
||||
assertU(optimize());
|
||||
// ICUTransformFilterFactory
|
||||
assertU(adoc("id", Integer.toString(idx++), "content_icutransform", "Российская"));
|
||||
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -74,4 +75,8 @@ public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
|
|||
assertQ(req("q", "content_icunormalizer2:re\u0301Su*"), "//result[@numFound='2']");
|
||||
assertQ(req("q", "content_icunormalizer2:eL*"), "//result[@numFound='2']");
|
||||
}
|
||||
|
||||
public void testICUTransform() {
|
||||
assertQ(req("q", "content_icutransform:Росс*"), "//result[@numFound='1']");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,9 +31,14 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
|||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{
|
||||
public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
public ArabicNormalizationFilter create(TokenStream input) {
|
||||
return new ArabicNormalizationFilter(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,10 +33,15 @@ import org.apache.lucene.analysis.cjk.CJKWidthFilter;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
|
||||
public class CJKWidthFilterFactory extends BaseTokenFilterFactory {
|
||||
public class CJKWidthFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new CJKWidthFilter(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,9 +31,14 @@ import org.apache.lucene.analysis.de.GermanNormalizationFilter;
|
|||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class GermanNormalizationFilterFactory extends BaseTokenFilterFactory {
|
||||
public class GermanNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GermanNormalizationFilter(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,8 +31,13 @@ import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
|
|||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
|
||||
public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new HindiNormalizationFilter(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,8 +31,13 @@ import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
|||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory {
|
||||
public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new IndicNormalizationFilter(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,9 +34,14 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory {
|
||||
public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public PersianNormalizationFilter create(TokenStream input) {
|
||||
return new PersianNormalizationFilter(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -169,6 +169,42 @@
|
|||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_persian" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.PersianNormalizationFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_arabic" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.ArabicNormalizationFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_hindi" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.IndicNormalizationFilterFactory"/>
|
||||
<filter class="solr.HindiNormalizationFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_german" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.GermanNormalizationFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_width" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.CJKWidthFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<fieldType name="int" class="solr.TrieIntField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
@ -203,6 +239,11 @@
|
|||
<field name="content_greek" type="text_greek" indexed="true" stored="true"/>
|
||||
<field name="content_turkish" type="text_turkish" indexed="true" stored="true"/>
|
||||
<field name="content_russian" type="text_russian" indexed="true" stored="true"/>
|
||||
<field name="content_persian" type="text_persian" indexed="true" stored="true"/>
|
||||
<field name="content_arabic" type="text_arabic" indexed="true" stored="true"/>
|
||||
<field name="content_hindi" type="text_hindi" indexed="true" stored="true"/>
|
||||
<field name="content_german" type="text_german" indexed="true" stored="true"/>
|
||||
<field name="content_width" type="text_width" indexed="true" stored="true"/>
|
||||
|
||||
<dynamicField name="*_straight" type="text_straight" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_lower" type="text_lower" indexed="true" stored="true"/>
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.solr.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -31,7 +30,6 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
|
|||
@BeforeClass
|
||||
public static void beforeTests() throws Exception {
|
||||
initCore("solrconfig-basic.xml", "schema-folding.xml");
|
||||
IndexWriter iw;
|
||||
|
||||
String docs[] = {
|
||||
"abcdefg1 finger",
|
||||
|
@ -86,7 +84,22 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
|
|||
assertU(adoc("id", Integer.toString(idx++), "content_russian", "Вместе"));
|
||||
assertU(adoc("id", Integer.toString(idx++), "content_russian", "силе"));
|
||||
|
||||
assertU(optimize());
|
||||
// persian normalization
|
||||
assertU(adoc("id", Integer.toString(idx++), "content_persian", "هاي"));
|
||||
|
||||
// arabic normalization
|
||||
assertU(adoc("id", Integer.toString(idx++), "content_arabic", "روبرت"));
|
||||
|
||||
// hindi normalization
|
||||
assertU(adoc("id", Integer.toString(idx++), "content_hindi", "हिंदी"));
|
||||
assertU(adoc("id", Integer.toString(idx++), "content_hindi", "अाअा"));
|
||||
|
||||
// german normalization
|
||||
assertU(adoc("id", Integer.toString(idx++), "content_german", "weissbier"));
|
||||
|
||||
// cjk width normalization
|
||||
assertU(adoc("id", Integer.toString(idx++), "content_width", "ヴィッツ"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -304,4 +317,25 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
|
|||
assertQ(req("q", "content_russian:Си*е"), "//result[@numFound='1']");
|
||||
assertQ(req("q", "content_russian:эЛектромагнИт*"), "//result[@numFound='1']");
|
||||
}
|
||||
|
||||
public void testPersian() {
|
||||
assertQ(req("q", "content_persian:های*"), "//result[@numFound='1']");
|
||||
}
|
||||
|
||||
public void testArabic() {
|
||||
assertQ(req("q", "content_arabic:روبرـــــــــــــــــــــــــــــــــت*"), "//result[@numFound='1']");
|
||||
}
|
||||
|
||||
public void testHindi() {
|
||||
assertQ(req("q", "content_hindi:हिन्दी*"), "//result[@numFound='1']");
|
||||
assertQ(req("q", "content_hindi:आआ*"), "//result[@numFound='1']");
|
||||
}
|
||||
|
||||
public void testGerman() {
|
||||
assertQ(req("q", "content_german:weiß*"), "//result[@numFound='1']");
|
||||
}
|
||||
|
||||
public void testCJKWidth() {
|
||||
assertQ(req("q", "content_width:ヴィ*"), "//result[@numFound='1']");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue