mirror of https://github.com/apache/lucene.git
LUCENE-2413: move high-level charfilters to contrib/analyzers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@940676 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5b74a4ec61
commit
f9249e3a74
|
@ -11,6 +11,9 @@ Changes in backwards compatibility policy
|
|||
- o.a.l.analysis.LengthFilter -> o.a.l.analysis.miscellaneous.LengthFilter
|
||||
- o.a.l.analysis.PerFieldAnalyzerWrapper -> o.a.l.analysis.miscellaneous.PerFieldAnalyzerWrapper
|
||||
- o.a.l.analysis.TeeSinkTokenFilter -> o.a.l.analysis.sinks.TeeSinkTokenFilter
|
||||
- o.a.l.analysis.BaseCharFilter -> o.a.l.analysis.charfilter.BaseCharFilter
|
||||
- o.a.l.analysis.MappingCharFilter -> o.a.l.analysis.charfilter.MappingCharFilter
|
||||
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
|
||||
... (in progress)
|
||||
|
||||
* LUCENE-1458, LUCENE-2111, LUCENE-2354: Changes from flexible indexing:
|
||||
|
|
|
@ -1,61 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Holds a map of String input to String output, to be used
|
||||
* with {@link MappingCharFilter}.
|
||||
*/
|
||||
public class NormalizeCharMap {
|
||||
|
||||
Map<Character, NormalizeCharMap> submap;
|
||||
String normStr;
|
||||
int diff;
|
||||
|
||||
/** Records a replacement to be applied to the inputs
|
||||
* stream. Whenever <code>singleMatch</code> occurs in
|
||||
* the input, it will be replaced with
|
||||
* <code>replacement</code>.
|
||||
*
|
||||
* @param singleMatch input String to be replaced
|
||||
* @param replacement output String
|
||||
*/
|
||||
public void add(String singleMatch, String replacement) {
|
||||
NormalizeCharMap currMap = this;
|
||||
for(int i = 0; i < singleMatch.length(); i++) {
|
||||
char c = singleMatch.charAt(i);
|
||||
if (currMap.submap == null) {
|
||||
currMap.submap = new HashMap<Character, NormalizeCharMap>(1);
|
||||
}
|
||||
NormalizeCharMap map = currMap.submap.get(Character.valueOf(c));
|
||||
if (map == null) {
|
||||
map = new NormalizeCharMap();
|
||||
currMap.submap.put(Character.valueOf(c), map);
|
||||
}
|
||||
currMap = map;
|
||||
}
|
||||
if (currMap.normStr != null) {
|
||||
throw new RuntimeException("MappingCharFilter: there is already a mapping for " + singleMatch);
|
||||
}
|
||||
currMap.normStr = replacement;
|
||||
currMap.diff = singleMatch.length() - replacement.length();
|
||||
}
|
||||
}
|
|
@ -1,162 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.List;
|
||||
|
||||
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
NormalizeCharMap normMap;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
normMap = new NormalizeCharMap();
|
||||
|
||||
normMap.add( "aa", "a" );
|
||||
normMap.add( "bbb", "b" );
|
||||
normMap.add( "cccc", "cc" );
|
||||
|
||||
normMap.add( "h", "i" );
|
||||
normMap.add( "j", "jj" );
|
||||
normMap.add( "k", "kkk" );
|
||||
normMap.add( "ll", "llll" );
|
||||
|
||||
normMap.add( "empty", "" );
|
||||
}
|
||||
|
||||
public void testReaderReset() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
|
||||
char[] buf = new char[10];
|
||||
int len = cs.read(buf, 0, 10);
|
||||
assertEquals( 1, len );
|
||||
assertEquals( 'x', buf[0]) ;
|
||||
len = cs.read(buf, 0, 10);
|
||||
assertEquals( -1, len );
|
||||
|
||||
// rewind
|
||||
cs.reset();
|
||||
len = cs.read(buf, 0, 10);
|
||||
assertEquals( 1, len );
|
||||
assertEquals( 'x', buf[0]) ;
|
||||
}
|
||||
|
||||
public void testNothingChange() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
|
||||
}
|
||||
|
||||
public void test1to1() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
|
||||
}
|
||||
|
||||
public void test1to2() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
|
||||
}
|
||||
|
||||
public void test1to3() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
|
||||
}
|
||||
|
||||
public void test2to4() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
|
||||
}
|
||||
|
||||
public void test2to1() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
|
||||
}
|
||||
|
||||
public void test3to1() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
|
||||
}
|
||||
|
||||
public void test4to2() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
|
||||
}
|
||||
|
||||
public void test5to0() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts, new String[0]);
|
||||
}
|
||||
|
||||
//
|
||||
// 1111111111222
|
||||
// 01234567890123456789012
|
||||
//(in) h i j k ll cccc bbb aa
|
||||
//
|
||||
// 1111111111222
|
||||
// 01234567890123456789012
|
||||
//(out) i i jj kkk llll cc b a
|
||||
//
|
||||
// h, 0, 1 => i, 0, 1
|
||||
// i, 2, 3 => i, 2, 3
|
||||
// j, 4, 5 => jj, 4, 5
|
||||
// k, 6, 7 => kkk, 6, 7
|
||||
// ll, 8,10 => llll, 8,10
|
||||
// cccc,11,15 => cc,11,15
|
||||
// bbb,16,19 => b,16,19
|
||||
// aa,20,22 => a,20,22
|
||||
//
|
||||
public void testTokenStream() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts,
|
||||
new String[]{"i","i","jj","kkk","llll","cc","b","a"},
|
||||
new int[]{0,2,4,6,8,11,16,20},
|
||||
new int[]{1,3,5,7,10,15,19,22}
|
||||
);
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// 0123456789
|
||||
//(in) aaaa ll h
|
||||
//(out-1) aa llll i
|
||||
//(out-2) a llllllll i
|
||||
//
|
||||
// aaaa,0,4 => a,0,4
|
||||
// ll,5,7 => llllllll,5,7
|
||||
// h,8,9 => i,8,9
|
||||
public void testChained() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap,
|
||||
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||
assertTokenStreamContents(ts,
|
||||
new String[]{"a","llllllll","i"},
|
||||
new int[]{0,5,8},
|
||||
new int[]{4,7,9}
|
||||
);
|
||||
}
|
||||
}
|
|
@ -15,11 +15,14 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.charfilter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
|
||||
/**
|
||||
* Base utility class for implementing a {@link CharFilter}.
|
||||
* You subclass this, and then record mappings by calling
|
|
@ -15,12 +15,15 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.charfilter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.apache.lucene.analysis.CharReader;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
|
||||
/**
|
||||
* Simplistic {@link CharFilter} that applies the mappings
|
||||
* contained in a {@link NormalizeCharMap} to the character
|
|
@ -15,7 +15,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.charfilter;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
|
@ -15,10 +15,16 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.charfilter;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharReader;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
|
||||
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
NormalizeCharMap normMap;
|
|
@ -1,93 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Base utility class for implementing a {@link CharFilter}.
|
||||
* You subclass this, and then record mappings by calling
|
||||
* {@link #addOffCorrectMap}, and then invoke the correct
|
||||
* method to correct an offset.
|
||||
*
|
||||
* <p><b>NOTE</b>: This class is not particularly efficient.
|
||||
* For example, a new class instance is created for every
|
||||
* call to {@link #addOffCorrectMap}, which is then appended
|
||||
* to a private list.
|
||||
*/
|
||||
public abstract class BaseCharFilter extends CharFilter {
|
||||
|
||||
private List<OffCorrectMap> pcmList;
|
||||
|
||||
public BaseCharFilter(CharStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
/** Retrieve the corrected offset. Note that this method
|
||||
* is slow, if you correct positions far before the most
|
||||
* recently added position, as it's a simple linear
|
||||
* search backwards through all offset corrections added
|
||||
* by {@link #addOffCorrectMap}. */
|
||||
@Override
|
||||
protected int correct(int currentOff) {
|
||||
if (pcmList == null || pcmList.isEmpty()) {
|
||||
return currentOff;
|
||||
}
|
||||
for (int i = pcmList.size() - 1; i >= 0; i--) {
|
||||
if (currentOff >= pcmList.get(i).off) {
|
||||
return currentOff + pcmList.get(i).cumulativeDiff;
|
||||
}
|
||||
}
|
||||
return currentOff;
|
||||
}
|
||||
|
||||
protected int getLastCumulativeDiff() {
|
||||
return pcmList == null || pcmList.isEmpty() ?
|
||||
0 : pcmList.get(pcmList.size() - 1).cumulativeDiff;
|
||||
}
|
||||
|
||||
protected void addOffCorrectMap(int off, int cumulativeDiff) {
|
||||
if (pcmList == null) {
|
||||
pcmList = new ArrayList<OffCorrectMap>();
|
||||
}
|
||||
pcmList.add(new OffCorrectMap(off, cumulativeDiff));
|
||||
}
|
||||
|
||||
static class OffCorrectMap {
|
||||
|
||||
int off;
|
||||
int cumulativeDiff;
|
||||
|
||||
OffCorrectMap(int off, int cumulativeDiff) {
|
||||
this.off = off;
|
||||
this.cumulativeDiff = cumulativeDiff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append('(');
|
||||
sb.append(off);
|
||||
sb.append(',');
|
||||
sb.append(cumulativeDiff);
|
||||
sb.append(')');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,137 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* Simplistic {@link CharFilter} that applies the mappings
|
||||
* contained in a {@link NormalizeCharMap} to the character
|
||||
* stream, and correcting the resulting changes to the
|
||||
* offsets.
|
||||
*/
|
||||
public class MappingCharFilter extends BaseCharFilter {
|
||||
|
||||
private final NormalizeCharMap normMap;
|
||||
private LinkedList<Character> buffer;
|
||||
private String replacement;
|
||||
private int charPointer;
|
||||
private int nextCharCounter;
|
||||
|
||||
/** Default constructor that takes a {@link CharStream}. */
|
||||
public MappingCharFilter(NormalizeCharMap normMap, CharStream in) {
|
||||
super(in);
|
||||
this.normMap = normMap;
|
||||
}
|
||||
|
||||
/** Easy-use constructor that takes a {@link Reader}. */
|
||||
public MappingCharFilter(NormalizeCharMap normMap, Reader in) {
|
||||
super(CharReader.get(in));
|
||||
this.normMap = normMap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
while(true) {
|
||||
if (replacement != null && charPointer < replacement.length()) {
|
||||
return replacement.charAt(charPointer++);
|
||||
}
|
||||
|
||||
int firstChar = nextChar();
|
||||
if (firstChar == -1) return -1;
|
||||
NormalizeCharMap nm = normMap.submap != null ?
|
||||
normMap.submap.get(Character.valueOf((char) firstChar)) : null;
|
||||
if (nm == null) return firstChar;
|
||||
NormalizeCharMap result = match(nm);
|
||||
if (result == null) return firstChar;
|
||||
replacement = result.normStr;
|
||||
charPointer = 0;
|
||||
if (result.diff != 0) {
|
||||
int prevCumulativeDiff = getLastCumulativeDiff();
|
||||
if (result.diff < 0) {
|
||||
for(int i = 0; i < -result.diff ; i++)
|
||||
addOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
|
||||
} else {
|
||||
addOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int nextChar() throws IOException {
|
||||
nextCharCounter++;
|
||||
if (buffer != null && !buffer.isEmpty()) {
|
||||
return buffer.removeFirst().charValue();
|
||||
}
|
||||
return input.read();
|
||||
}
|
||||
|
||||
private void pushChar(int c) {
|
||||
nextCharCounter--;
|
||||
if(buffer == null)
|
||||
buffer = new LinkedList<Character>();
|
||||
buffer.addFirst(Character.valueOf((char) c));
|
||||
}
|
||||
|
||||
private void pushLastChar(int c) {
|
||||
if (buffer == null) {
|
||||
buffer = new LinkedList<Character>();
|
||||
}
|
||||
buffer.addLast(Character.valueOf((char) c));
|
||||
}
|
||||
|
||||
private NormalizeCharMap match(NormalizeCharMap map) throws IOException {
|
||||
NormalizeCharMap result = null;
|
||||
if (map.submap != null) {
|
||||
int chr = nextChar();
|
||||
if (chr != -1) {
|
||||
NormalizeCharMap subMap = map.submap.get(Character.valueOf((char) chr));
|
||||
if (subMap != null) {
|
||||
result = match(subMap);
|
||||
}
|
||||
if (result == null) {
|
||||
pushChar(chr);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (result == null && map.normStr != null) {
|
||||
result = map;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] cbuf, int off, int len) throws IOException {
|
||||
char[] tmp = new char[len];
|
||||
int l = input.read(tmp, 0, len);
|
||||
if (l != -1) {
|
||||
for(int i = 0; i < l; i++)
|
||||
pushLastChar(tmp[i]);
|
||||
}
|
||||
l = 0;
|
||||
for(int i = off; i < off + len; i++) {
|
||||
int c = read();
|
||||
if (c == -1) break;
|
||||
cbuf[i] = (char) c;
|
||||
l++;
|
||||
}
|
||||
return l == 0 ? -1 : l;
|
||||
}
|
||||
}
|
|
@ -24,7 +24,7 @@ import java.io.Reader;
|
|||
import java.util.HashMap;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.BaseCharFilter;
|
||||
import org.apache.lucene.analysis.charfilter.BaseCharFilter;
|
||||
import org.apache.lucene.analysis.CharReader;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
|
||||
|
|
|
@ -25,8 +25,8 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.MappingCharFilter;
|
||||
import org.apache.lucene.analysis.NormalizeCharMap;
|
||||
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
|
||||
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.util.LinkedList;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.BaseCharFilter;
|
||||
import org.apache.lucene.analysis.charfilter.BaseCharFilter;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
|
||||
/**
|
||||
|
|
|
@ -26,8 +26,8 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.CharReader;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.MappingCharFilter;
|
||||
import org.apache.lucene.analysis.NormalizeCharMap;
|
||||
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
|
||||
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
|
|
|
@ -312,7 +312,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
assertNotNull("expecting an index token analysis for field type 'charfilthtmlmap'", indexPart);
|
||||
|
||||
assertEquals(" whátëvêr ", indexPart.get("org.apache.solr.analysis.HTMLStripCharFilter"));
|
||||
assertEquals(" whatever ", indexPart.get("org.apache.lucene.analysis.MappingCharFilter"));
|
||||
assertEquals(" whatever ", indexPart.get("org.apache.lucene.analysis.charfilter.MappingCharFilter"));
|
||||
|
||||
List<NamedList> tokenList = (List<NamedList>)indexPart.get("org.apache.lucene.analysis.WhitespaceTokenizer");
|
||||
assertNotNull("Expecting WhitespaceTokenizer analysis breakdown", tokenList);
|
||||
|
|
Loading…
Reference in New Issue