diff --git a/src/java/org/apache/solr/analysis/PatternReplaceCharFilter.java b/src/java/org/apache/solr/analysis/PatternReplaceCharFilter.java index adc24636a0e..bb06f2c0115 100644 --- a/src/java/org/apache/solr/analysis/PatternReplaceCharFilter.java +++ b/src/java/org/apache/solr/analysis/PatternReplaceCharFilter.java @@ -63,24 +63,24 @@ public class PatternReplaceCharFilter extends BaseCharFilter { private String replaceBlockBuffer; private int replaceBlockBufferOffset; - public PatternReplaceCharFilter( String pattern, String replacement, CharStream in ){ + public PatternReplaceCharFilter( Pattern pattern, String replacement, CharStream in ){ this( pattern, replacement, DEFAULT_MAX_BLOCK_CHARS, null, in ); } - public PatternReplaceCharFilter( String pattern, String replacement, + public PatternReplaceCharFilter( Pattern pattern, String replacement, int maxBlockChars, CharStream in ){ this( pattern, replacement, maxBlockChars, null, in ); } - public PatternReplaceCharFilter( String pattern, String replacement, + public PatternReplaceCharFilter( Pattern pattern, String replacement, String blockDelimiters, CharStream in ){ this( pattern, replacement, DEFAULT_MAX_BLOCK_CHARS, blockDelimiters, in ); } - public PatternReplaceCharFilter( String pattern, String replacement, + public PatternReplaceCharFilter( Pattern pattern, String replacement, int maxBlockChars, String blockDelimiters, CharStream in ){ super( in ); - this.pattern = Pattern.compile( pattern ); + this.pattern = pattern; this.replacement = replacement; if( maxBlockChars < 1 ) throw new IllegalArgumentException( "maxBlockChars should be greater than 0, but it is " + maxBlockChars ); diff --git a/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java b/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java index 958279b7452..19b97ee992a 100644 --- a/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java +++ b/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java @@ -18,6 +18,8 @@ package org.apache.solr.analysis; import java.util.Map; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import org.apache.lucene.analysis.CharStream; @@ -28,16 +30,20 @@ import org.apache.lucene.analysis.CharStream; */ public class PatternReplaceCharFilterFactory extends BaseCharFilterFactory { - private String pattern; + private Pattern p; private String replacement; private int maxBlockChars; private String blockDelimiters; public void init(Map args) { super.init( args ); - pattern = args.get( "pattern" ); - if( pattern == null ) - pattern = ""; + try { + p = Pattern.compile(args.get("pattern")); + } catch (PatternSyntaxException e) { + throw new RuntimeException + ("Configuration Error: 'pattern' can not be parsed in " + + this.getClass().getName(), e); + } replacement = args.get( "replacement" ); if( replacement == null ) replacement = ""; @@ -46,6 +52,6 @@ public class PatternReplaceCharFilterFactory extends BaseCharFilterFactory { } public CharStream create(CharStream input) { - return new PatternReplaceCharFilter( pattern, replacement, maxBlockChars, blockDelimiters, input ); + return new PatternReplaceCharFilter( p, replacement, maxBlockChars, blockDelimiters, input ); } } diff --git a/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java b/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java index cc1f5ed3e76..3241da80914 100644 --- a/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java +++ b/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.StringReader; import java.util.HashMap; import java.util.Map; +import java.util.regex.Pattern; import org.apache.lucene.analysis.CharReader; import org.apache.lucene.analysis.CharStream; @@ -94,7 +95,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase { // aa##bb###cc dd public void test1block1matchLonger() throws IOException { final String BLOCK = "aa bb cc dd"; - CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1##$2###$3", + CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1##$2###$3", CharReader.get( new StringReader( BLOCK ) ) ); TokenStream ts = new WhitespaceTokenizer( cs ); assertTokenStreamContents(ts, @@ -109,7 +110,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase { // aa aa public void test1block2matchLonger() throws IOException { final String BLOCK = " a a"; - CharStream cs = new PatternReplaceCharFilter( "a", "aa", + CharStream cs = new PatternReplaceCharFilter( pattern("a"), "aa", CharReader.get( new StringReader( BLOCK ) ) ); TokenStream ts = new WhitespaceTokenizer( cs ); assertTokenStreamContents(ts, @@ -125,7 +126,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase { // aa#bb dd public void test1block1matchShorter() throws IOException { final String BLOCK = "aa bb cc dd"; - CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1#$2", + CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2", CharReader.get( new StringReader( BLOCK ) ) ); TokenStream ts = new WhitespaceTokenizer( cs ); assertTokenStreamContents(ts, @@ -141,7 +142,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase { // aa bb cc --- aa bb aa bb cc public void test1blockMultiMatches() throws IOException { final String BLOCK = " aa bb cc --- aa bb aa bb cc"; - CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1 $2 $3", + CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1 $2 $3", CharReader.get( new StringReader( BLOCK ) ) ); TokenStream ts = new WhitespaceTokenizer( cs ); assertTokenStreamContents(ts, @@ -157,7 +158,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase { // aa##bb cc --- aa##bb aa. bb aa##bb cc public void test2blocksMultiMatches() throws IOException { final String BLOCK = " aa bb cc --- aa bb aa. bb aa bb cc"; - CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)", "$1##$2", ".", + CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)"), "$1##$2", ".", CharReader.get( new StringReader( BLOCK ) ) ); TokenStream ts = new WhitespaceTokenizer( cs ); assertTokenStreamContents(ts, @@ -173,10 +174,10 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase { // aa b - c . --- b aa . c c b public void testChain() throws IOException { final String BLOCK = " a bb - ccc . --- bb a . ccc ccc bb"; - CharStream cs = new PatternReplaceCharFilter( "a", "aa", ".", + CharStream cs = new PatternReplaceCharFilter( pattern("a"), "aa", ".", CharReader.get( new StringReader( BLOCK ) ) ); - cs = new PatternReplaceCharFilter( "bb", "b", ".", cs ); - cs = new PatternReplaceCharFilter( "ccc", "c", ".", cs ); + cs = new PatternReplaceCharFilter( pattern("bb"), "b", ".", cs ); + cs = new PatternReplaceCharFilter( pattern("ccc"), "c", ".", cs ); TokenStream ts = new WhitespaceTokenizer( cs ); assertTokenStreamContents(ts, new String[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" }, @@ -184,4 +185,8 @@ public class TestPatternReplaceCharFilter extends BaseTokenTestCase { new int[] { 2, 5, 7, 11, 13, 17, 20, 22, 24, 28, 32, 35 }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }); } + + private Pattern pattern( String p ){ + return Pattern.compile( p ); + } }