SOLR-1850: don't copy word set for each KeepWordFilter instance

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@928133 13f79535-47bb-0310-9956-ffa450edef68
2010-03-27 03:44:06 +00:00 · 2010-03-27 03:44:06 +00:00 · fc98d80345
parent b83700b51e
commit fc98d80345
4 changed files with 16 additions and 2 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -235,6 +235,9 @@ Bug Fixes
 * SOLR-1797: fix ConcurrentModificationException and potential memory
  leaks in ResourceLoader. (yonik)

+* SOLR-1850: change KeepWordFilter so a new word set is not created for
+  each instance (John Wang via yonik)
+
 Other Changes
 ----------------------

--- a/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
@ -39,8 +39,14 @@ public final class KeepWordFilter extends TokenFilter {
  private final TermAttribute termAtt;

  public KeepWordFilter(TokenStream in, Set<String> words, boolean ignoreCase ) {
+    this(in, new CharArraySet(words, ignoreCase));
+  }
+
+  /** The words set passed to this constructor will be directly used by this filter
+   * and should not be modified, */
+  public KeepWordFilter(TokenStream in, CharArraySet words) {
    super(in);
-    this.words = new CharArraySet(words, ignoreCase);
+    this.words = words;
    this.termAtt = (TermAttribute)addAttribute(TermAttribute.class);
  }

--- a/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
@ -72,10 +72,13 @@ public class KeepWordFilterFactory extends BaseTokenFilterFactory implements Res

  public void setIgnoreCase(boolean ignoreCase) {
    this.ignoreCase = ignoreCase;
+    if (words != null) {
+      words = new CharArraySet(words, ignoreCase);
+    }
  }

  public KeepWordFilter create(TokenStream input) {
-    return new KeepWordFilter(input, (Set)words, ignoreCase);
+    return new KeepWordFilter(input, words);
  }

  public CharArraySet getWords() {
--- a/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java
+++ b/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java
@ -66,10 +66,12 @@ public class TestKeepWordFilter extends BaseTokenTestCase {
    assertTokenStreamContents(stream, new String[] { "aaa", "BBB" });
    
    // Now force case
+    factory = new KeepWordFilterFactory();
    args = new HashMap<String, String>();
    args.put( "ignoreCase", "false" );
    factory.init( args );
    factory.inform( loader );
+    factory.setWords( words );    
    assertFalse(factory.isIgnoreCase());
    stream = factory.create(new WhitespaceTokenizer(new StringReader(input)));
    assertTokenStreamContents(stream, new String[] { "aaa" });