mirror of https://github.com/apache/lucene.git
SOLR-1653: PatternReplaceCharFilter should receive a Pattern object instead of pattern String
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@897357 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
543395125a
commit
f325bcd692
|
@ -63,24 +63,24 @@ public class PatternReplaceCharFilter extends BaseCharFilter {
|
||||||
private String replaceBlockBuffer;
|
private String replaceBlockBuffer;
|
||||||
private int replaceBlockBufferOffset;
|
private int replaceBlockBufferOffset;
|
||||||
|
|
||||||
public PatternReplaceCharFilter( String pattern, String replacement, CharStream in ){
|
public PatternReplaceCharFilter( Pattern pattern, String replacement, CharStream in ){
|
||||||
this( pattern, replacement, DEFAULT_MAX_BLOCK_CHARS, null, in );
|
this( pattern, replacement, DEFAULT_MAX_BLOCK_CHARS, null, in );
|
||||||
}
|
}
|
||||||
|
|
||||||
public PatternReplaceCharFilter( String pattern, String replacement,
|
public PatternReplaceCharFilter( Pattern pattern, String replacement,
|
||||||
int maxBlockChars, CharStream in ){
|
int maxBlockChars, CharStream in ){
|
||||||
this( pattern, replacement, maxBlockChars, null, in );
|
this( pattern, replacement, maxBlockChars, null, in );
|
||||||
}
|
}
|
||||||
|
|
||||||
public PatternReplaceCharFilter( String pattern, String replacement,
|
public PatternReplaceCharFilter( Pattern pattern, String replacement,
|
||||||
String blockDelimiters, CharStream in ){
|
String blockDelimiters, CharStream in ){
|
||||||
this( pattern, replacement, DEFAULT_MAX_BLOCK_CHARS, blockDelimiters, in );
|
this( pattern, replacement, DEFAULT_MAX_BLOCK_CHARS, blockDelimiters, in );
|
||||||
}
|
}
|
||||||
|
|
||||||
public PatternReplaceCharFilter( String pattern, String replacement,
|
public PatternReplaceCharFilter( Pattern pattern, String replacement,
|
||||||
int maxBlockChars, String blockDelimiters, CharStream in ){
|
int maxBlockChars, String blockDelimiters, CharStream in ){
|
||||||
super( in );
|
super( in );
|
||||||
this.pattern = Pattern.compile( pattern );
|
this.pattern = pattern;
|
||||||
this.replacement = replacement;
|
this.replacement = replacement;
|
||||||
if( maxBlockChars < 1 )
|
if( maxBlockChars < 1 )
|
||||||
throw new IllegalArgumentException( "maxBlockChars should be greater than 0, but it is " + maxBlockChars );
|
throw new IllegalArgumentException( "maxBlockChars should be greater than 0, but it is " + maxBlockChars );
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.PatternSyntaxException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharStream;
|
import org.apache.lucene.analysis.CharStream;
|
||||||
|
|
||||||
|
@ -28,16 +30,20 @@ import org.apache.lucene.analysis.CharStream;
|
||||||
*/
|
*/
|
||||||
public class PatternReplaceCharFilterFactory extends BaseCharFilterFactory {
|
public class PatternReplaceCharFilterFactory extends BaseCharFilterFactory {
|
||||||
|
|
||||||
private String pattern;
|
private Pattern p;
|
||||||
private String replacement;
|
private String replacement;
|
||||||
private int maxBlockChars;
|
private int maxBlockChars;
|
||||||
private String blockDelimiters;
|
private String blockDelimiters;
|
||||||
|
|
||||||
public void init(Map<String, String> args) {
|
public void init(Map<String, String> args) {
|
||||||
super.init( args );
|
super.init( args );
|
||||||
pattern = args.get( "pattern" );
|
try {
|
||||||
if( pattern == null )
|
p = Pattern.compile(args.get("pattern"));
|
||||||
pattern = "";
|
} catch (PatternSyntaxException e) {
|
||||||
|
throw new RuntimeException
|
||||||
|
("Configuration Error: 'pattern' can not be parsed in " +
|
||||||
|
this.getClass().getName(), e);
|
||||||
|
}
|
||||||
replacement = args.get( "replacement" );
|
replacement = args.get( "replacement" );
|
||||||
if( replacement == null )
|
if( replacement == null )
|
||||||
replacement = "";
|
replacement = "";
|
||||||
|
@ -46,6 +52,6 @@ public class PatternReplaceCharFilterFactory extends BaseCharFilterFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
public CharStream create(CharStream input) {
|
public CharStream create(CharStream input) {
|
||||||
return new PatternReplaceCharFilter( pattern, replacement, maxBlockChars, blockDelimiters, input );
|
return new PatternReplaceCharFilter( p, replacement, maxBlockChars, blockDelimiters, input );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharReader;
|
import org.apache.lucene.analysis.CharReader;
|
||||||
import org.apache.lucene.analysis.CharStream;
|
import org.apache.lucene.analysis.CharStream;
|
||||||
|
@ -94,7 +95,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase {
|
||||||
// aa##bb###cc dd
|
// aa##bb###cc dd
|
||||||
public void test1block1matchLonger() throws IOException {
|
public void test1block1matchLonger() throws IOException {
|
||||||
final String BLOCK = "aa bb cc dd";
|
final String BLOCK = "aa bb cc dd";
|
||||||
CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1##$2###$3",
|
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1##$2###$3",
|
||||||
CharReader.get( new StringReader( BLOCK ) ) );
|
CharReader.get( new StringReader( BLOCK ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
|
@ -109,7 +110,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase {
|
||||||
// aa aa
|
// aa aa
|
||||||
public void test1block2matchLonger() throws IOException {
|
public void test1block2matchLonger() throws IOException {
|
||||||
final String BLOCK = " a a";
|
final String BLOCK = " a a";
|
||||||
CharStream cs = new PatternReplaceCharFilter( "a", "aa",
|
CharStream cs = new PatternReplaceCharFilter( pattern("a"), "aa",
|
||||||
CharReader.get( new StringReader( BLOCK ) ) );
|
CharReader.get( new StringReader( BLOCK ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
|
@ -125,7 +126,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase {
|
||||||
// aa#bb dd
|
// aa#bb dd
|
||||||
public void test1block1matchShorter() throws IOException {
|
public void test1block1matchShorter() throws IOException {
|
||||||
final String BLOCK = "aa bb cc dd";
|
final String BLOCK = "aa bb cc dd";
|
||||||
CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1#$2",
|
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2",
|
||||||
CharReader.get( new StringReader( BLOCK ) ) );
|
CharReader.get( new StringReader( BLOCK ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
|
@ -141,7 +142,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase {
|
||||||
// aa bb cc --- aa bb aa bb cc
|
// aa bb cc --- aa bb aa bb cc
|
||||||
public void test1blockMultiMatches() throws IOException {
|
public void test1blockMultiMatches() throws IOException {
|
||||||
final String BLOCK = " aa bb cc --- aa bb aa bb cc";
|
final String BLOCK = " aa bb cc --- aa bb aa bb cc";
|
||||||
CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1 $2 $3",
|
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1 $2 $3",
|
||||||
CharReader.get( new StringReader( BLOCK ) ) );
|
CharReader.get( new StringReader( BLOCK ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
|
@ -157,7 +158,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase {
|
||||||
// aa##bb cc --- aa##bb aa. bb aa##bb cc
|
// aa##bb cc --- aa##bb aa. bb aa##bb cc
|
||||||
public void test2blocksMultiMatches() throws IOException {
|
public void test2blocksMultiMatches() throws IOException {
|
||||||
final String BLOCK = " aa bb cc --- aa bb aa. bb aa bb cc";
|
final String BLOCK = " aa bb cc --- aa bb aa. bb aa bb cc";
|
||||||
CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)", "$1##$2", ".",
|
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)"), "$1##$2", ".",
|
||||||
CharReader.get( new StringReader( BLOCK ) ) );
|
CharReader.get( new StringReader( BLOCK ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
|
@ -173,10 +174,10 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase {
|
||||||
// aa b - c . --- b aa . c c b
|
// aa b - c . --- b aa . c c b
|
||||||
public void testChain() throws IOException {
|
public void testChain() throws IOException {
|
||||||
final String BLOCK = " a bb - ccc . --- bb a . ccc ccc bb";
|
final String BLOCK = " a bb - ccc . --- bb a . ccc ccc bb";
|
||||||
CharStream cs = new PatternReplaceCharFilter( "a", "aa", ".",
|
CharStream cs = new PatternReplaceCharFilter( pattern("a"), "aa", ".",
|
||||||
CharReader.get( new StringReader( BLOCK ) ) );
|
CharReader.get( new StringReader( BLOCK ) ) );
|
||||||
cs = new PatternReplaceCharFilter( "bb", "b", ".", cs );
|
cs = new PatternReplaceCharFilter( pattern("bb"), "b", ".", cs );
|
||||||
cs = new PatternReplaceCharFilter( "ccc", "c", ".", cs );
|
cs = new PatternReplaceCharFilter( pattern("ccc"), "c", ".", cs );
|
||||||
TokenStream ts = new WhitespaceTokenizer( cs );
|
TokenStream ts = new WhitespaceTokenizer( cs );
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
new String[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" },
|
new String[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" },
|
||||||
|
@ -184,4 +185,8 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase {
|
||||||
new int[] { 2, 5, 7, 11, 13, 17, 20, 22, 24, 28, 32, 35 },
|
new int[] { 2, 5, 7, 11, 13, 17, 20, 22, 24, 28, 32, 35 },
|
||||||
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
|
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Pattern pattern( String p ){
|
||||||
|
return Pattern.compile( p );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue