SOLR-2921: enable multitermqueries for these filters too

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1304059 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-03-22 21:03:58 +00:00
parent c8cd6210cc
commit 6df50d5486
11 changed files with 136 additions and 13 deletions

View File

@ -37,7 +37,7 @@ import com.ibm.icu.text.Transliterator;
* </ul>
* @see Transliterator
*/
public class ICUTransformFilterFactory extends BaseTokenFilterFactory {
public class ICUTransformFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
private Transliterator transliterator;
// TODO: add support for custom rules
@ -64,4 +64,9 @@ public class ICUTransformFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new ICUTransformFilter(input, transliterator);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -33,6 +33,13 @@
<filter class="solr.ICUNormalizer2FilterFactory" name="nfkc_cf" mode="compose"/>
</analyzer>
</fieldType>
<fieldType name="text_icutransform" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ICUTransformFilterFactory" id="Cyrillic-Latin"/>
</analyzer>
</fieldType>
</types>
@ -40,6 +47,7 @@
<field name="id" type="string" indexed="true" stored="true" required="true"/>
<field name="content_icufolding" type="text_icufolding" indexed="true" stored="true"/>
<field name="content_icunormalizer2" type="text_icunormalizer2" indexed="true" stored="true"/>
<field name="content_icutransform" type="text_icutransform" indexed="true" stored="true"/>
</fields>

View File

@ -17,7 +17,6 @@ package org.apache.solr.analysis;
* limitations under the License.
*/
import org.apache.lucene.index.IndexWriter;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
@ -31,7 +30,6 @@ public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeTests() throws Exception {
initCore("solrconfig-icucollate.xml","schema-folding-extra.xml", "analysis-extras/solr");
IndexWriter iw;
int idx = 1;
// ICUFoldingFilterFactory
@ -55,7 +53,10 @@ public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ELİF"));
assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "eli\u0307f"));
assertU(optimize());
// ICUTransformFilterFactory
assertU(adoc("id", Integer.toString(idx++), "content_icutransform", "Российская"));
assertU(commit());
}
@Test
@ -74,4 +75,8 @@ public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
assertQ(req("q", "content_icunormalizer2:re\u0301Su*"), "//result[@numFound='2']");
assertQ(req("q", "content_icunormalizer2:eL*"), "//result[@numFound='2']");
}
public void testICUTransform() {
assertQ(req("q", "content_icutransform:Росс*"), "//result[@numFound='1']");
}
}

View File

@ -31,9 +31,14 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
* &lt;/fieldType&gt;</pre>
*
*/
public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{
public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public ArabicNormalizationFilter create(TokenStream input) {
return new ArabicNormalizationFilter(input);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -33,10 +33,15 @@ import org.apache.lucene.analysis.cjk.CJKWidthFilter;
* &lt;/fieldType&gt;</pre>
*/
public class CJKWidthFilterFactory extends BaseTokenFilterFactory {
public class CJKWidthFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
@Override
public TokenStream create(TokenStream input) {
return new CJKWidthFilter(input);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -31,9 +31,14 @@ import org.apache.lucene.analysis.de.GermanNormalizationFilter;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*/
public class GermanNormalizationFilterFactory extends BaseTokenFilterFactory {
public class GermanNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public TokenStream create(TokenStream input) {
return new GermanNormalizationFilter(input);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -31,8 +31,13 @@ import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
* &lt;/fieldType&gt;</pre>
*
*/
public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public TokenStream create(TokenStream input) {
return new HindiNormalizationFilter(input);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -31,8 +31,13 @@ import org.apache.lucene.analysis.in.IndicNormalizationFilter;
* &lt;/fieldType&gt;</pre>
*
*/
public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory {
public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public TokenStream create(TokenStream input) {
return new IndicNormalizationFilter(input);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -34,9 +34,14 @@ import org.apache.lucene.analysis.TokenStream;
* &lt;/fieldType&gt;</pre>
*
*/
public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory {
public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public PersianNormalizationFilter create(TokenStream input) {
return new PersianNormalizationFilter(input);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -169,6 +169,42 @@
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_persian" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.PersianNormalizationFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_arabic" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ArabicNormalizationFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_hindi" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.IndicNormalizationFilterFactory"/>
<filter class="solr.HindiNormalizationFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_german" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.GermanNormalizationFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_width" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.CJKWidthFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="int" class="solr.TrieIntField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
@ -203,6 +239,11 @@
<field name="content_greek" type="text_greek" indexed="true" stored="true"/>
<field name="content_turkish" type="text_turkish" indexed="true" stored="true"/>
<field name="content_russian" type="text_russian" indexed="true" stored="true"/>
<field name="content_persian" type="text_persian" indexed="true" stored="true"/>
<field name="content_arabic" type="text_arabic" indexed="true" stored="true"/>
<field name="content_hindi" type="text_hindi" indexed="true" stored="true"/>
<field name="content_german" type="text_german" indexed="true" stored="true"/>
<field name="content_width" type="text_width" indexed="true" stored="true"/>
<dynamicField name="*_straight" type="text_straight" indexed="true" stored="true"/>
<dynamicField name="*_lower" type="text_lower" indexed="true" stored="true"/>

View File

@ -17,7 +17,6 @@ package org.apache.solr.search;
* limitations under the License.
*/
import org.apache.lucene.index.IndexWriter;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
@ -31,7 +30,6 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeTests() throws Exception {
initCore("solrconfig-basic.xml", "schema-folding.xml");
IndexWriter iw;
String docs[] = {
"abcdefg1 finger",
@ -86,7 +84,22 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
assertU(adoc("id", Integer.toString(idx++), "content_russian", "Вместе"));
assertU(adoc("id", Integer.toString(idx++), "content_russian", "силе"));
assertU(optimize());
// persian normalization
assertU(adoc("id", Integer.toString(idx++), "content_persian", "هاي"));
// arabic normalization
assertU(adoc("id", Integer.toString(idx++), "content_arabic", "روبرت"));
// hindi normalization
assertU(adoc("id", Integer.toString(idx++), "content_hindi", "हिंदी"));
assertU(adoc("id", Integer.toString(idx++), "content_hindi", "अाअा"));
// german normalization
assertU(adoc("id", Integer.toString(idx++), "content_german", "weissbier"));
// cjk width normalization
assertU(adoc("id", Integer.toString(idx++), "content_width", "ヴィッツ"));
assertU(commit());
}
@Test
@ -304,4 +317,25 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
assertQ(req("q", "content_russian:Си*е"), "//result[@numFound='1']");
assertQ(req("q", "content_russian:эЛектромагнИт*"), "//result[@numFound='1']");
}
public void testPersian() {
assertQ(req("q", "content_persian:های*"), "//result[@numFound='1']");
}
public void testArabic() {
assertQ(req("q", "content_arabic:روبرـــــــــــــــــــــــــــــــــت*"), "//result[@numFound='1']");
}
public void testHindi() {
assertQ(req("q", "content_hindi:हिन्दी*"), "//result[@numFound='1']");
assertQ(req("q", "content_hindi:आआ*"), "//result[@numFound='1']");
}
public void testGerman() {
assertQ(req("q", "content_german:weiß*"), "//result[@numFound='1']");
}
public void testCJKWidth() {
assertQ(req("q", "content_width:ヴィ*"), "//result[@numFound='1']");
}
}