mirror of https://github.com/apache/lucene.git
SOLR-89 new PatternReplaceFilter, TrimFilter, and corrisponding Factories
git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@494675 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ae6c58f278
commit
a2beac1e38
|
@ -37,6 +37,11 @@ Detailed Change List
|
||||||
New Features
|
New Features
|
||||||
1. SOLR-82: Default field values can be specified in the schema.xml.
|
1. SOLR-82: Default field values can be specified in the schema.xml.
|
||||||
(Ryan McKinley via hossman)
|
(Ryan McKinley via hossman)
|
||||||
|
2. SOLR-89: Two new TokenFilters with corrisponding Factories...
|
||||||
|
* TrimFilter - Trims leading and trailing whitespace from Tokens
|
||||||
|
* PatternReplaceFilter - applies a Pattern to each token in the
|
||||||
|
stream, replacing match occurances with a specified replacement.
|
||||||
|
(hossman)
|
||||||
|
|
||||||
Changes in runtime behavior
|
Changes in runtime behavior
|
||||||
1. Highlighting using DisMax will only pick up terms from the main
|
1. Highlighting using DisMax will only pick up terms from the main
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
<add>
|
<add>
|
||||||
<doc>
|
<doc>
|
||||||
<field name="id">TWINX2048-3200PRO</field>
|
<field name="id">TWINX2048-3200PRO</field>
|
||||||
<field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
|
<field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
|
||||||
<field name="manu">Corsair Microsystems Inc.</field>
|
<field name="manu">Corsair Microsystems Inc.</field>
|
||||||
<field name="cat">electronics</field>
|
<field name="cat">electronics</field>
|
||||||
<field name="cat">memory</field>
|
<field name="cat">memory</field>
|
||||||
|
|
|
@ -182,6 +182,39 @@
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldtype>
|
</fieldtype>
|
||||||
|
|
||||||
|
<!-- This is an example of using the KeywordTokenizer along
|
||||||
|
With various TokenFilterFactories to produce a sortable field
|
||||||
|
that does not include some properties of the source text
|
||||||
|
-->
|
||||||
|
<fieldtype name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
||||||
|
<analyzer>
|
||||||
|
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
||||||
|
input string is preserved as a single token
|
||||||
|
-->
|
||||||
|
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||||
|
<!-- The LowerCase TokenFilter does what you expect, which can be
|
||||||
|
when you want your sorting to be case insensitive
|
||||||
|
-->
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
<!-- The TrimFilter removes any leading or trailing whitespace -->
|
||||||
|
<filter class="solr.TrimFilterFactory" />
|
||||||
|
<!-- The PatternReplaceFilter gives you the flexibility to use
|
||||||
|
Java Regular expression to replace any sequence of characters
|
||||||
|
matching a pattern with an arbitrary replacement string,
|
||||||
|
which may include back refrences to portions of the orriginal
|
||||||
|
string matched by the pattern.
|
||||||
|
|
||||||
|
See the Java Regular Expression documentation for more
|
||||||
|
infomation on pattern and replacement string syntax.
|
||||||
|
|
||||||
|
http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
|
||||||
|
-->
|
||||||
|
<filter class="solr.PatternReplaceFilterFactory"
|
||||||
|
pattern="([^a-z])" replacement="" replace="all"
|
||||||
|
/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
|
||||||
</types>
|
</types>
|
||||||
|
|
||||||
|
|
||||||
|
@ -204,6 +237,8 @@
|
||||||
<field name="id" type="string" indexed="true" stored="true"/>
|
<field name="id" type="string" indexed="true" stored="true"/>
|
||||||
<field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
|
<field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
|
||||||
<field name="name" type="text" indexed="true" stored="true"/>
|
<field name="name" type="text" indexed="true" stored="true"/>
|
||||||
|
<field name="nameSort" type="string" indexed="true" stored="false"/>
|
||||||
|
<field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
|
||||||
<field name="manu" type="text" indexed="true" stored="true" omitNorms="true"/>
|
<field name="manu" type="text" indexed="true" stored="true" omitNorms="true"/>
|
||||||
<field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
|
<field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
|
||||||
<field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
|
<field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
@ -264,6 +299,8 @@
|
||||||
|
|
||||||
<copyField source="cat" dest="text"/>
|
<copyField source="cat" dest="text"/>
|
||||||
<copyField source="name" dest="text"/>
|
<copyField source="name" dest="text"/>
|
||||||
|
<copyField source="name" dest="nameSort"/>
|
||||||
|
<copyField source="name" dest="alphaNameSort"/>
|
||||||
<copyField source="manu" dest="text"/>
|
<copyField source="manu" dest="text"/>
|
||||||
<copyField source="features" dest="text"/>
|
<copyField source="features" dest="text"/>
|
||||||
<copyField source="includes" dest="text"/>
|
<copyField source="includes" dest="text"/>
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A TokenFilter which applies a Pattern to each token in the stream,
|
||||||
|
* replacing match occurances with the specified replacement string.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* <b>Note:</b> Depending on the input and the pattern used and the input
|
||||||
|
* TokenStream, this TokenFilter may produce Tokens whose text is the empty
|
||||||
|
* string.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @version $Id:$
|
||||||
|
* @see Pattern
|
||||||
|
*/
|
||||||
|
public final class PatternReplaceFilter extends TokenFilter {
|
||||||
|
Pattern p;
|
||||||
|
String replacement;
|
||||||
|
boolean all = true;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs an instance to replace either the first, or all occurances
|
||||||
|
*
|
||||||
|
* @param in the TokenStream to process
|
||||||
|
* @param p the patterm to apply to each Token
|
||||||
|
* @param replacement the "replacement string" to substitute, if null a
|
||||||
|
* blank string will be used. Note that this is not the literal
|
||||||
|
* string that will be used, '$' and '\' have special meaning.
|
||||||
|
* @param all if true, all matches will be replaced otherwise just the first match.
|
||||||
|
* @see Matcher#quoteReplacement
|
||||||
|
*/
|
||||||
|
public PatternReplaceFilter(TokenStream in,
|
||||||
|
Pattern p,
|
||||||
|
String replacement,
|
||||||
|
boolean all) {
|
||||||
|
super(in);
|
||||||
|
this.p=p;
|
||||||
|
this.replacement = (null == replacement) ? "" : replacement;
|
||||||
|
this.all=all;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final Token next() throws IOException {
|
||||||
|
Token t = input.next();
|
||||||
|
if (t == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
Matcher m = p.matcher(t.termText());
|
||||||
|
if (all) {
|
||||||
|
t.setTermText(m.replaceAll(replacement));
|
||||||
|
} else {
|
||||||
|
t.setTermText(m.replaceFirst(replacement));
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.PatternSyntaxException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @version $Id:$
|
||||||
|
* @see PatternReplaceFilter
|
||||||
|
*/
|
||||||
|
public class PatternReplaceFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
Pattern p;
|
||||||
|
String replacement;
|
||||||
|
boolean all = true;
|
||||||
|
|
||||||
|
public void init(Map<String, String> args) {
|
||||||
|
super.init(args);
|
||||||
|
try {
|
||||||
|
p = Pattern.compile(args.get("pattern"));
|
||||||
|
} catch (PatternSyntaxException e) {
|
||||||
|
throw new RuntimeException
|
||||||
|
("Configuration Error: 'pattern' can not be parsed in " +
|
||||||
|
this.getClass().getName(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
replacement = args.get("replacement");
|
||||||
|
|
||||||
|
String r = args.get("replace");
|
||||||
|
if (null != r) {
|
||||||
|
if (r.equals("all")) {
|
||||||
|
all = true;
|
||||||
|
} else {
|
||||||
|
if (r.equals("first")) {
|
||||||
|
all = false;
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException
|
||||||
|
("Configuration Error: 'replace' must be 'first' or 'all' in "
|
||||||
|
+ this.getClass().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
public TokenStream create(TokenStream input) {
|
||||||
|
return new PatternReplaceFilter(input, p, replacement, all);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trims leading and trailing whitespace from Tokens in the stream.
|
||||||
|
*
|
||||||
|
* @version $Id:$
|
||||||
|
*/
|
||||||
|
public final class TrimFilter extends TokenFilter {
|
||||||
|
|
||||||
|
public TrimFilter(TokenStream in) {
|
||||||
|
super(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
public final Token next() throws IOException {
|
||||||
|
Token t = input.next();
|
||||||
|
if (null == t || null == t.termText())
|
||||||
|
return t;
|
||||||
|
|
||||||
|
t.setTermText(t.termText().trim());
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @version $Id:$
|
||||||
|
* @see TrimFilter
|
||||||
|
*/
|
||||||
|
public class TrimFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
public TokenStream create(TokenStream input) {
|
||||||
|
return new TrimFilter(input);
|
||||||
|
}
|
||||||
|
}
|
|
@ -686,6 +686,26 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testPatternReplaceFilter() {
|
||||||
|
|
||||||
|
assertU(adoc("id", "1",
|
||||||
|
"patternreplacefilt", "My fine-feathered friend!"));
|
||||||
|
assertU(adoc("id", "2",
|
||||||
|
"patternreplacefilt", " What's Up Doc?"));
|
||||||
|
assertU(commit());
|
||||||
|
|
||||||
|
assertQ("don't find Up",
|
||||||
|
req("q", "patternreplacefilt:Up"),
|
||||||
|
"*[count(//doc)=0]");
|
||||||
|
|
||||||
|
assertQ("find doc",
|
||||||
|
req("q", "patternreplacefilt:__What_s_Up_Doc_"),
|
||||||
|
"*[count(//doc)=1]");
|
||||||
|
|
||||||
|
assertQ("find birds",
|
||||||
|
req("q", "patternreplacefilt:My__fine_feathered_friend_"),
|
||||||
|
"*[count(//doc)=1]");
|
||||||
|
}
|
||||||
|
|
||||||
// /** this doesn't work, but if it did, this is how we'd test it. */
|
// /** this doesn't work, but if it did, this is how we'd test it. */
|
||||||
// public void testOverwriteFalse() {
|
// public void testOverwriteFalse() {
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @version $Id:$
|
||||||
|
*/
|
||||||
|
public class TestPatternReplaceFilter extends TestCase {
|
||||||
|
|
||||||
|
public void testReplaceAll() throws Exception {
|
||||||
|
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
|
||||||
|
TokenStream ts = new PatternReplaceFilter
|
||||||
|
(new WhitespaceTokenizer(new StringReader(input)),
|
||||||
|
Pattern.compile("a*b"),
|
||||||
|
"-", true);
|
||||||
|
assertEquals("-foo-foo-foo-", ts.next().termText());
|
||||||
|
assertEquals("-", ts.next().termText());
|
||||||
|
assertEquals("c-", ts.next().termText());
|
||||||
|
assertNull(ts.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testReplaceFirst() throws Exception {
|
||||||
|
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
|
||||||
|
TokenStream ts = new PatternReplaceFilter
|
||||||
|
(new WhitespaceTokenizer(new StringReader(input)),
|
||||||
|
Pattern.compile("a*b"),
|
||||||
|
"-", false);
|
||||||
|
assertEquals("-fooaabfooabfoob", ts.next().termText());
|
||||||
|
assertEquals("-", ts.next().termText());
|
||||||
|
assertEquals("c-", ts.next().termText());
|
||||||
|
assertNull(ts.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testStripFirst() throws Exception {
|
||||||
|
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
|
||||||
|
TokenStream ts = new PatternReplaceFilter
|
||||||
|
(new WhitespaceTokenizer(new StringReader(input)),
|
||||||
|
Pattern.compile("a*b"),
|
||||||
|
null, false);
|
||||||
|
assertEquals("fooaabfooabfoob", ts.next().termText());
|
||||||
|
assertEquals("", ts.next().termText());
|
||||||
|
assertEquals("c", ts.next().termText());
|
||||||
|
assertNull(ts.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testStripAll() throws Exception {
|
||||||
|
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
|
||||||
|
TokenStream ts = new PatternReplaceFilter
|
||||||
|
(new WhitespaceTokenizer(new StringReader(input)),
|
||||||
|
Pattern.compile("a*b"),
|
||||||
|
null, true);
|
||||||
|
assertEquals("foofoofoo", ts.next().termText());
|
||||||
|
assertEquals("", ts.next().termText());
|
||||||
|
assertEquals("c", ts.next().termText());
|
||||||
|
assertNull(ts.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testReplaceAllWithBackRef() throws Exception {
|
||||||
|
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
|
||||||
|
TokenStream ts = new PatternReplaceFilter
|
||||||
|
(new WhitespaceTokenizer(new StringReader(input)),
|
||||||
|
Pattern.compile("(a*)b"),
|
||||||
|
"$1\\$", true);
|
||||||
|
assertEquals("aa$fooaa$fooa$foo$", ts.next().termText());
|
||||||
|
assertEquals("a$", ts.next().termText());
|
||||||
|
assertEquals("caaaaaaaaa$", ts.next().termText());
|
||||||
|
assertNull(ts.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,61 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @version $Id:$
|
||||||
|
*/
|
||||||
|
public class TestTrimFilter extends TestCase {
|
||||||
|
|
||||||
|
public void testTrim() throws Exception {
|
||||||
|
TokenStream ts = new TrimFilter
|
||||||
|
(new IterTokenStream(new Token(" a ", 1, 5),
|
||||||
|
new Token("b ",6,10),
|
||||||
|
new Token("cCc",11,15),
|
||||||
|
new Token(" ",16,20)));
|
||||||
|
|
||||||
|
assertEquals("a", ts.next().termText());
|
||||||
|
assertEquals("b", ts.next().termText());
|
||||||
|
assertEquals("cCc", ts.next().termText());
|
||||||
|
assertEquals("", ts.next().termText());
|
||||||
|
assertNull(ts.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class IterTokenStream extends TokenStream {
|
||||||
|
Iterator<Token> toks;
|
||||||
|
public IterTokenStream(Token... toks) {
|
||||||
|
this.toks = Arrays.asList(toks).iterator();
|
||||||
|
}
|
||||||
|
public Token next() {
|
||||||
|
if (toks.hasNext()) {
|
||||||
|
return toks.next();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -164,6 +164,17 @@
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldtype>
|
</fieldtype>
|
||||||
|
<fieldtype name="patternreplacefilt" class="solr.TextField">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||||
|
<filter class="solr.PatternReplaceFilterFactory"
|
||||||
|
pattern="([^a-zA-Z])" replacement="_" replace="all"
|
||||||
|
/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
<fieldtype name="porterfilt" class="solr.TextField">
|
<fieldtype name="porterfilt" class="solr.TextField">
|
||||||
<analyzer>
|
<analyzer>
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
@ -340,6 +351,7 @@
|
||||||
<field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
|
<field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
|
||||||
<field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
|
<field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
|
||||||
<field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
|
<field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
|
||||||
|
<field name="patternreplacefilt" type="patternreplacefilt" indexed="true" stored="true"/>
|
||||||
<field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
|
<field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
|
||||||
<field name="engporterfilt" type="engporterfilt" indexed="true" stored="true"/>
|
<field name="engporterfilt" type="engporterfilt" indexed="true" stored="true"/>
|
||||||
<field name="custengporterfilt" type="custengporterfilt" indexed="true" stored="true"/>
|
<field name="custengporterfilt" type="custengporterfilt" indexed="true" stored="true"/>
|
||||||
|
|
Loading…
Reference in New Issue