SOLR-466: add CharArrayMap, update SynonymMap to use char[] rather than String Tokens

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@614793 13f79535-47bb-0310-9956-ffa450edef68
2025-02-23 02:35:02 +00:00 · 2008-01-24 04:41:38 +00:00 · 2008-01-24 04:41:38 +00:00 · 8348498d08
commit 8348498d08
parent fdf340d59a
6 changed files with 692 additions and 59 deletions
--- a/src/java/org/apache/solr/analysis/SynonymFilter.java
+++ b/src/java/org/apache/solr/analysis/SynonymFilter.java
@ -39,13 +39,11 @@ import java.util.LinkedList;
 public class SynonymFilter extends TokenFilter {

  private final SynonymMap map;  // Map<String, SynonymMap>
-  private final boolean ignoreCase;
-  private Iterator replacement;  // iterator over generated tokens
+  private Iterator<Token> replacement;  // iterator over generated tokens

-  public SynonymFilter(TokenStream in, SynonymMap map, boolean ignoreCase) {
+  public SynonymFilter(TokenStream in, SynonymMap map) {
    super(in);
    this.map = map;
-    this.ignoreCase = ignoreCase;
  }


@ -66,26 +64,26 @@ public class SynonymFilter extends TokenFilter {
   *    merging token streams to preserve token positions.
   *  - preserve original positionIncrement of first matched token
   */
-  public Token next() throws IOException {
+  @Override
+  public Token next(Token target) throws IOException {
    while (true) {
      // if there are any generated tokens, return them... don't try any
      // matches against them, as we specifically don't want recursion.
      if (replacement!=null && replacement.hasNext()) {
-        return (Token)replacement.next();
+        return replacement.next();
      }

      // common case fast-path of first token not matching anything
-      Token firstTok = nextTok();
+      Token firstTok = nextTok(target);
      if (firstTok == null) return null;
-      String str = ignoreCase ? firstTok.termText().toLowerCase() : firstTok.termText();
-      Object o = map.submap!=null ? map.submap.get(str) : null;
-      if (o == null) return firstTok;
+      SynonymMap result = map.submap!=null ? map.submap.get(firstTok.termBuffer(), 0, firstTok.termLength()) : null;
+      if (result == null) return firstTok;

      // OK, we matched a token, so find the longest match.

-      matched = new LinkedList();
+      matched = new LinkedList<Token>();

-      SynonymMap result = match((SynonymMap)o);
+      result = match(result);

      if (result==null) {
        // no match, simply return the first token read.
@ -93,13 +91,13 @@ public class SynonymFilter extends TokenFilter {
      }

      // reuse, or create new one each time?
-      ArrayList generated = new ArrayList(result.synonyms.length + matched.size() + 1);
+      ArrayList<Token> generated = new ArrayList<Token>(result.synonyms.length + matched.size() + 1);

      //
      // there was a match... let's generate the new tokens, merging
      // in the matched tokens (position increments need adjusting)
      //
-      Token lastTok = matched.isEmpty() ? firstTok : (Token)matched.getLast();
+      Token lastTok = matched.isEmpty() ? firstTok : matched.getLast();
      boolean includeOrig = result.includeOrig();

      Token origTok = includeOrig ? firstTok : null;
@ -109,7 +107,8 @@ public class SynonymFilter extends TokenFilter {

      for (int i=0; i<result.synonyms.length; i++) {
        Token repTok = result.synonyms[i];
-        Token newTok = new Token(repTok.termText(), firstTok.startOffset(), lastTok.endOffset(), firstTok.type());
+        Token newTok = new Token(firstTok.startOffset(), lastTok.endOffset(), firstTok.type());
+        newTok.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength());
        repPos += repTok.getPositionIncrement();
        if (i==0) repPos=origPos;  // make position of first token equal to original

@ -118,7 +117,7 @@ public class SynonymFilter extends TokenFilter {
          origTok.setPositionIncrement(origPos-pos);
          generated.add(origTok);
          pos += origTok.getPositionIncrement();
-          origTok = matched.isEmpty() ? null : (Token)matched.removeFirst();
+          origTok = matched.isEmpty() ? null : matched.removeFirst();
          if (origTok != null) origPos += origTok.getPositionIncrement();
        }

@ -132,7 +131,7 @@ public class SynonymFilter extends TokenFilter {
        origTok.setPositionIncrement(origPos-pos);
        generated.add(origTok);
        pos += origTok.getPositionIncrement();
-        origTok = matched.isEmpty() ? null : (Token)matched.removeFirst();
+        origTok = matched.isEmpty() ? null : matched.removeFirst();
        if (origTok != null) origPos += origTok.getPositionIncrement();
      }

@ -152,21 +151,27 @@ public class SynonymFilter extends TokenFilter {
  // Defer creation of the buffer until the first time it is used to
  // optimize short fields with no matches.
  //
-  private LinkedList buffer;
-  private LinkedList matched;
-
-  // TODO: use ArrayList for better performance?
+  private LinkedList<Token> buffer;
+  private LinkedList<Token> matched;

  private Token nextTok() throws IOException {
    if (buffer!=null && !buffer.isEmpty()) {
-      return (Token)buffer.removeFirst();
+      return buffer.removeFirst();
    } else {
      return input.next();
    }
  }

+  private Token nextTok(Token target) throws IOException {
+    if (buffer!=null && !buffer.isEmpty()) {
+      return buffer.removeFirst();
+    } else {
+      return input.next(target);
+    }
+  }
+
  private void pushTok(Token t) {
-    if (buffer==null) buffer=new LinkedList();
+    if (buffer==null) buffer=new LinkedList<Token>();
    buffer.addFirst(t);
  }

@ -177,9 +182,7 @@ public class SynonymFilter extends TokenFilter {
      Token tok = nextTok();
      if (tok != null) {
        // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
-        String str = ignoreCase ? tok.termText().toLowerCase() : tok.termText();
-
-        SynonymMap subMap = (SynonymMap)map.submap.get(str);
+        SynonymMap subMap = map.submap.get(tok.termBuffer(), 0, tok.termLength());

        if (subMap != null) {
          // recurse
--- a/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
+++ b/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
@ -35,8 +35,8 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
  public void inform(ResourceLoader loader) {
    String synonyms = args.get("synonyms");

-    ignoreCase = getBoolean("ignoreCase",false);
-    expand = getBoolean("expand",true);
+    boolean ignoreCase = getBoolean("ignoreCase", false);
+    boolean expand = getBoolean("expand", true);

    if (synonyms != null) {
      List<String> wlist=null;
@ -45,8 +45,8 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
-      synMap = new SynonymMap();
-      parseRules(wlist, synMap, "=>", ",", ignoreCase,expand);
+      synMap = new SynonymMap(ignoreCase);
+      parseRules(wlist, synMap, "=>", ",", expand);
      if (wlist.size()<=20) {
        SolrCore.log.fine("SynonymMap "+synonyms +":"+synMap);
      }
@ -54,10 +54,8 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
  }

  private SynonymMap synMap;
-  private boolean ignoreCase;
-  private boolean expand;

-  private static void parseRules(List<String> rules, SynonymMap map, String mappingSep, String synSep, boolean ignoreCase, boolean expansion) {
+  private static void parseRules(List<String> rules, SynonymMap map, String mappingSep, String synSep, boolean expansion) {
    int count=0;
    for (String rule : rules) {
      // To use regexes, we need an expression that specifies an odd number of chars.
@ -91,10 +89,11 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
      for (List<String> fromToks : source) {
        count++;
        for (List<String> toToks : target) {
-          map.add(ignoreCase ? StrUtils.toLower(fromToks) : fromToks,
+          map.add(fromToks,
                  SynonymMap.makeTokens(toToks),
                  includeOrig,
-                  true);
+                  true
+          );
        }
      }
    }
@ -114,7 +113,7 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso


  public SynonymFilter create(TokenStream input) {
-    return new SynonymFilter(input,synMap,ignoreCase);
+    return new SynonymFilter(input,synMap);
  }


--- a/src/java/org/apache/solr/analysis/SynonymMap.java
+++ b/src/java/org/apache/solr/analysis/SynonymMap.java
@ -18,6 +18,7 @@
 package org.apache.solr.analysis;

 import org.apache.lucene.analysis.Token;
+import org.apache.solr.util.CharArrayMap;

 import java.util.*;

@ -26,13 +27,20 @@ import java.util.*;
 * @version $Id$
 */
 public class SynonymMap {
-  Map submap; // recursive: Map<String, SynonymMap>
+  CharArrayMap<SynonymMap> submap; // recursive: Map<String, SynonymMap>
  Token[] synonyms;
  int flags;

  static final int INCLUDE_ORIG=0x01;
+  static final int IGNORE_CASE=0x02;
+
+  public SynonymMap() {}
+  public SynonymMap(boolean ignoreCase) {
+    if (ignoreCase) flags |= IGNORE_CASE;
+  }

  public boolean includeOrig() { return (flags & INCLUDE_ORIG) != 0; }
+  public boolean ignoreCase() { return (flags & IGNORE_CASE) != 0; }

  /**
   * @param singleMatch  List<String>, the sequence of strings to match
@ -40,17 +48,17 @@ public class SynonymMap {
   * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
   * @param mergeExisting merge the replacement tokens with any other mappings that exist
   */
-  public void add(List singleMatch, List replacement, boolean includeOrig, boolean mergeExisting) {
+  public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
    SynonymMap currMap = this;
-    for (Iterator iter = singleMatch.iterator(); iter.hasNext();) {
-      String str = (String)iter.next();
+    for (String str : singleMatch) {
      if (currMap.submap==null) {
-        currMap.submap = new HashMap(1);
+        currMap.submap = new CharArrayMap<SynonymMap>(1, ignoreCase());
      }

-      SynonymMap map = (SynonymMap)currMap.submap.get(str);
+      SynonymMap map = currMap.submap.get(str);
      if (map==null) {
        map = new SynonymMap();
+        map.flags |= flags & IGNORE_CASE;
        currMap.submap.put(str, map);
      }

@ -68,7 +76,7 @@ public class SynonymMap {


  public String toString() {
-    StringBuffer sb = new StringBuffer("<");
+    StringBuilder sb = new StringBuilder("<");
    if (synonyms!=null) {
      sb.append("[");
      for (int i=0; i<synonyms.length; i++) {
@ -88,10 +96,12 @@ public class SynonymMap {


  /** Produces a List<Token> from a List<String> */
-  public static List makeTokens(List strings) {
-    List ret = new ArrayList(strings.size());
-    for (Iterator iter = strings.iterator(); iter.hasNext();) {
-      Token newTok = new Token((String)iter.next(),0,0,"SYNONYM");
+  public static List<Token> makeTokens(List<String> strings) {
+    List<Token> ret = new ArrayList<Token>(strings.size());
+    for (String str : strings) {
+      //Token newTok = new Token(str,0,0,"SYNONYM");
+      Token newTok = new Token(0,0,"SYNONYM");
+      newTok.setTermBuffer(str.toCharArray(), 0, str.length());
      ret.add(newTok);
    }
    return ret;
@ -106,8 +116,8 @@ public class SynonymMap {
   * Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
   *
   */
-  public static List mergeTokens(List lst1, List lst2) {
-    ArrayList result = new ArrayList();
+  public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
+    ArrayList<Token> result = new ArrayList<Token>();
    if (lst1 ==null || lst2 ==null) {
      if (lst2 != null) result.addAll(lst2);
      if (lst1 != null) result.addAll(lst1);
@ -115,27 +125,29 @@ public class SynonymMap {
    }

    int pos=0;
-    Iterator iter1=lst1.iterator();
-    Iterator iter2=lst2.iterator();
-    Token tok1 = iter1.hasNext() ? (Token)iter1.next() : null;
-    Token tok2 = iter2.hasNext() ? (Token)iter2.next() : null;
+    Iterator<Token> iter1=lst1.iterator();
+    Iterator<Token> iter2=lst2.iterator();
+    Token tok1 = iter1.hasNext() ? iter1.next() : null;
+    Token tok2 = iter2.hasNext() ? iter2.next() : null;
    int pos1 = tok1!=null ? tok1.getPositionIncrement() : 0;
    int pos2 = tok2!=null ? tok2.getPositionIncrement() : 0;
    while(tok1!=null || tok2!=null) {
      while (tok1 != null && (pos1 <= pos2 || tok2==null)) {
-        Token tok = new Token(tok1.termText(), tok1.startOffset(), tok1.endOffset(), tok1.type());
+        Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
+        tok.setTermBuffer(tok1.termBuffer(), 0, tok1.termLength());
        tok.setPositionIncrement(pos1-pos);
        result.add(tok);
        pos=pos1;
-        tok1 = iter1.hasNext() ? (Token)iter1.next() : null;
+        tok1 = iter1.hasNext() ? iter1.next() : null;
        pos1 += tok1!=null ? tok1.getPositionIncrement() : 0;
      }
      while (tok2 != null && (pos2 <= pos1 || tok1==null)) {
-        Token tok = new Token(tok2.termText(), tok2.startOffset(), tok2.endOffset(), tok2.type());
+        Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
+        tok.setTermBuffer(tok2.termBuffer(), 0, tok2.termLength());
        tok.setPositionIncrement(pos2-pos);
        result.add(tok);
        pos=pos2;
-        tok2 = iter2.hasNext() ? (Token)iter2.next() : null;
+        tok2 = iter2.hasNext() ? iter2.next() : null;
        pos2 += tok2!=null ? tok2.getPositionIncrement() : 0;
      }
    }
--- a/src/java/org/apache/solr/util/CharArrayMap.java
+++ b/src/java/org/apache/solr/util/CharArrayMap.java
@ -0,0 +1,411 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import java.util.*;
+import java.io.Serializable;
+
+/**
+ * A simple class that stores key Strings as char[]'s in a
+ * hash table. Note that this is not a general purpose
+ * class.  For example, it cannot remove items from the
+ * map, nor does it resize its hash table to be smaller,
+ * etc.  It is designed to be quick to retrieve items
+ * by char[] keys without the necessity of converting
+ * to a String first.
+ */
+
+public class CharArrayMap<V> extends AbstractMap<String, V>
+                               implements Map<String, V>, Cloneable, Serializable
+{
+  private final static int INIT_SIZE = 2;
+  private char[][] keys;
+  private Object[] values;
+  private int count;
+  private final boolean ignoreCase;
+
+  /** Create map with enough capacity to hold startSize
+   *  terms */
+  public CharArrayMap(int initialCapacity, boolean ignoreCase) {
+    this.ignoreCase = ignoreCase;
+    int size = INIT_SIZE;
+    // load factor of .75, inverse is 1.25, or x+x/4
+    initialCapacity = initialCapacity + (initialCapacity >>2);
+    while(size <= initialCapacity)
+      size <<= 1;
+    keys = new char[size][];
+    values = new Object[size];
+  }
+
+  public boolean ignoreCase() {
+    return ignoreCase;
+  }
+
+  public V get(char[] key) {
+    return get(key, 0, key.length);
+  }
+
+  public V get(char[] key, int off, int len) {
+    return (V)values[getSlot(key, off, len)];
+  }
+
+  public V get(CharSequence key) {
+    return (V)values[getSlot(key)];
+  }
+
+  @Override
+  public V get(Object key) {
+    return (V)values[getSlot(key)];
+  }
+
+  @Override
+  public boolean containsKey(Object s) {
+    return keys[getSlot(s)] != null; 
+  }
+
+  @Override
+  public boolean containsValue(Object value) {
+    if (value == null) {
+      // search for key with a null value
+      for (int i=0; i<keys.length; i++) {
+        if (keys[i] != null && values[i] == null) return true;
+      }
+      return false;
+    }
+
+    for (int i=0; i<values.length; i++) {
+      Object val = values[i];
+      if (val != null && value.equals(val)) return true;
+    }
+    return false;
+  }
+
+
+  private int getSlot(Object key) {
+    if (key instanceof char[]) {
+      char[] keyc = (char[])key;
+      return getSlot(keyc, 0, keyc.length);
+    }
+    return getSlot((CharSequence)key);
+  }
+
+  private int getSlot(char[] key, int off, int len) {
+    int code = getHashCode(key, len);
+    int pos = code & (keys.length-1);
+    char[] key2 = keys[pos];
+    if (key2 != null && !equals(key, off, len, key2)) {
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        pos = code & (keys.length-1);
+        key2 = keys[pos];
+      } while (key2 != null && !equals(key, off, len, key2));
+    }
+    return pos;
+  }
+
+  /** Returns true if the String is in the set */
+  private int getSlot(CharSequence key) {
+    int code = getHashCode(key);
+    int pos = code & (keys.length-1);
+    char[] key2 = keys[pos];
+    if (key2 != null && !equals(key, key2)) {
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        pos = code & (keys.length-1);
+        key2 = keys[pos];
+      } while (key2 != null && !equals(key, key2));
+    }
+    return pos;
+  }
+
+  public V put(CharSequence key, V val) {
+    return put(key.toString(), val); // could be more efficient
+  }
+
+  @Override
+  public V put(String key, V val) {
+    return put(key.toCharArray(), val);
+  }
+
+  /** Add this key,val pair to the map.
+   * The char[] key is directly used, no copy is made.
+   * If ignoreCase is true for this Map, the key array will be directly modified.
+   * The user should never modify the key after calling this method.
+   */
+  public V put(char[] key, Object val) {
+    if (ignoreCase)
+      for(int i=0;i< key.length;i++)
+        key[i] = Character.toLowerCase(key[i]);
+    int slot = getSlot(key, 0, key.length);
+    if (keys[slot] == null) count++;
+    Object prev = values[slot];
+    keys[slot] = key;
+    values[slot] = val;
+
+    if (count + (count>>2) >= keys.length) {
+      rehash();
+    }
+
+    return (V)prev;
+  }
+  
+
+  private boolean equals(char[] text1, int off, int len, char[] text2) {
+    if (len != text2.length)
+      return false;
+    if (ignoreCase) {
+      for(int i=0;i<len;i++) {
+        if (Character.toLowerCase(text1[off+i]) != text2[i])
+          return false;
+      }
+    } else {
+      for(int i=0;i<len;i++) {
+        if (text1[off+i] != text2[i])
+          return false;
+      }
+    }
+    return true;
+  }
+
+  private boolean equals(CharSequence text1, char[] text2) {
+    int len = text1.length();
+    if (len != text2.length)
+      return false;
+    if (ignoreCase) {
+      for(int i=0;i<len;i++) {
+        if (Character.toLowerCase(text1.charAt(i)) != text2[i])
+          return false;
+      }
+    } else {
+      for(int i=0;i<len;i++) {
+        if (text1.charAt(i) != text2[i])
+          return false;
+      }
+    }
+    return true;
+  }
+
+  private void rehash() {
+    final int newSize = 2* keys.length;
+    char[][] oldEntries = keys;
+    Object[] oldValues = values;
+    keys = new char[newSize][];
+    values = new Object[newSize];
+
+    for(int i=0;i<oldEntries.length;i++) {
+      char[] key = oldEntries[i];
+      if (key != null) {
+        // todo: could be faster... no need to compare keys on collision
+        // since they are unique
+        int newSlot = getSlot(key,0,key.length);
+        keys[newSlot] = key;
+        values[newSlot] = oldValues[i];
+      }
+    }
+  }
+
+  private int getHashCode(char[] text, int len) {
+    int code = 0;
+    if (ignoreCase) {
+      for (int i=0; i<len; i++) {
+        code = code*31 + Character.toLowerCase(text[i]);
+      }
+    } else {
+      for (int i=0; i<len; i++) {
+        code = code*31 + text[i];
+      }
+    }
+    return code;
+  }
+
+  private int getHashCode(CharSequence text) {
+    int code;
+    if (ignoreCase) {
+      code = 0;
+      int len = text.length();
+      for (int i=0; i<len; i++) {
+        code = code*31 + Character.toLowerCase(text.charAt(i));
+      }
+    } else {
+      if (false && text instanceof String) {
+        code = text.hashCode();
+      } else {
+        code = 0;
+        int len = text.length();
+        for (int i=0; i<len; i++) {
+          code = code*31 + text.charAt(i);
+        }
+      }
+    }
+    return code;
+  }
+
+  @Override
+  public int size() {
+    return count;
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return count==0;
+  }
+
+  @Override
+  public void clear() {
+    count = 0;
+    Arrays.fill(keys,null);
+    Arrays.fill(values,null);
+  }
+
+  @Override
+  public Set<Entry<String, V>> entrySet() {
+    return new EntrySet();
+  }
+
+  /** Returns an EntryIterator over this Map. */
+  public EntryIterator iterator() {
+    return new EntryIterator();
+  }
+
+  /** public iterator class so efficient methods are exposed to users */
+  public class EntryIterator implements Iterator<Map.Entry<String,V>> {
+    int pos=-1;
+    int lastPos;
+
+    EntryIterator() {
+      goNext();
+    }
+
+    private void goNext() {
+      lastPos = pos;
+      pos++;
+      while (pos < keys.length && keys[pos] == null) pos++;
+    }
+
+    public boolean hasNext() {
+      return pos < keys.length;
+    }
+
+    /** gets the next key... do not modify the returned char[] */
+    public char[] nextKey() {
+      goNext();
+      return keys[lastPos];
+    }
+
+    /** gets the next key as a newly created String object */
+    public String nextKeyString() {
+      return new String(nextKey());
+    }
+
+    /** returns the value associated with the last key returned */
+    public V currentValue() {
+      return (V)values[lastPos];
+    }
+
+    /** sets the value associated with the last key returned */    
+    public V setValue(V value) {
+      V old = (V)values[lastPos];
+      values[lastPos] = value;
+      return old;      
+    }
+
+    /** Returns an Entry<String,V> object created on the fly...
+     * use nextCharArray() + currentValie() for better efficiency. */
+    public Map.Entry<String,V> next() {
+      goNext();
+      return new MapEntry(lastPos);
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+
+  private class MapEntry implements Map.Entry<String,V> {
+    final int pos;
+
+    MapEntry(int pos) {
+      this.pos = pos;
+    }
+
+    public char[] getCharArr() {
+      return keys[pos];
+    }
+
+    public String getKey() {
+      return new String(getCharArr());
+    }
+
+    public V getValue() {
+      return (V)values[pos];
+    }
+
+    public V setValue(V value) {
+      V old = (V)values[pos];
+      values[pos] = value;
+      return old;
+    }
+
+    public String toString() {
+      return getKey() + '=' + getValue();
+    }
+  }
+
+
+
+  private class EntrySet extends AbstractSet<Map.Entry<String, V>> {
+    public EntryIterator iterator() {
+      return new EntryIterator();
+    }
+    public boolean contains(Object o) {
+      if (!(o instanceof Map.Entry))
+        return false;
+      Map.Entry e = (Map.Entry)o;
+      Object key = e.getKey();
+      if (key==null) return false;  // we don't support null keys
+      Object val = e.getValue();
+      Object v = get(key);
+      return v==null ? val==null : v.equals(val);
+    }
+    public boolean remove(Object o) {
+      throw new UnsupportedOperationException();
+    }
+    public int size() {
+      return count;
+    }
+    public void clear() {
+      CharArrayMap.this.clear();
+    }
+  }
+
+  @Override
+  public Object clone() {
+    CharArrayMap<V> map = null;
+    try {
+      map = (CharArrayMap<V>)super.clone();
+      map.keys = keys.clone();
+      map.values = keys.clone();
+    } catch (CloneNotSupportedException e) {
+      // impossible
+    }
+    return map;
+  }
+}
--- a/src/test/org/apache/solr/analysis/TestSynonymFilter.java
+++ b/src/test/org/apache/solr/analysis/TestSynonymFilter.java
@ -48,7 +48,7 @@ public class TestSynonymFilter extends BaseTokenTestCase {
      }
    };

-    SynonymFilter sf = new SynonymFilter(ts, dict, true);
+    SynonymFilter sf = new SynonymFilter(ts, dict);

    while(true) {
      Token t = sf.next();
--- a/src/test/org/apache/solr/util/TestCharArrayMap.java
+++ b/src/test/org/apache/solr/util/TestCharArrayMap.java
@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import junit.framework.TestCase;
+
+import java.util.*;
+
+import org.apache.lucene.analysis.StopAnalyzer;
+
+public class TestCharArrayMap extends TestCase {
+  Random r = new Random(0);
+
+  public void doRandom(int iter, boolean ignoreCase) {
+    CharArrayMap map = new CharArrayMap(1,ignoreCase);
+    HashMap hmap = new HashMap();
+
+    char[] key;
+    for (int i=0; i<iter; i++) {
+      int len = r.nextInt(5);
+      key = new char[len];
+      for (int j=0; j<key.length; j++) {
+        key[j] = (char)r.nextInt(127);
+      }
+      String keyStr = new String(key);
+      String hmapKey = ignoreCase ? keyStr.toLowerCase() : keyStr; 
+
+      int val = r.nextInt();
+
+      Object o1 = map.put(key, val);
+      Object o2 = hmap.put(hmapKey,val);
+      assertEquals(o1,o2);
+
+      // add it again with the string method
+      assertEquals(val, map.put(keyStr,val));
+
+      assertEquals(val, map.get(key,0,key.length));
+      assertEquals(val, map.get(key));
+      assertEquals(val, map.get(keyStr));
+
+      assertEquals(hmap.size(), map.size());
+    }
+
+    assertEquals(map,hmap);
+    assertEquals(hmap, map);    
+  }
+
+  public void testCharArrayMap() {
+    for (int i=0; i<5; i++) {  // pump this up for more random testing
+      doRandom(1000,false);
+      doRandom(1000,true);      
+    }
+  }
+
+  public void testMethods() {
+    CharArrayMap<Integer> cm = new CharArrayMap<Integer>(2,false);
+    HashMap<String,Integer> hm = new HashMap<String,Integer>();
+    hm.put("foo",1);
+    hm.put("bar",2);
+    cm.putAll(hm);
+    assertEquals(hm, cm);
+    assertEquals(cm, hm);
+    hm.put("baz", 3);
+    assertFalse(hm.equals(cm));
+    assertFalse(cm.equals(hm));
+    assertTrue(cm.equals(cm));
+    cm.putAll(hm);
+    assertEquals(hm, cm);
+
+    Iterator<Map.Entry<String,Integer>> iter1 = cm.entrySet().iterator();
+    int n=0;
+    while (iter1.hasNext()) {
+      Map.Entry<String,Integer> entry = iter1.next();
+      String key = entry.getKey();
+      Integer val = entry.getValue();
+      assertEquals(hm.get(key), val);
+      entry.setValue(val*100);
+      assertEquals(val*100, (int)cm.get(key));
+      n++;
+    }
+    assertEquals(hm.size(), n);
+    cm.clear();
+    cm.putAll(hm);
+
+    CharArrayMap<Integer>.EntryIterator iter2 = cm.iterator();
+    n=0;
+    while (iter2.hasNext()) {
+      char[] keyc = iter2.nextKey();
+      Integer val = iter2.currentValue();
+      assertEquals(hm.get(new String(keyc)), val);
+      iter2.setValue(val*100);
+      assertEquals(val*100, (int)cm.get(keyc));
+      n++;
+    }
+    assertEquals(hm.size(), n);
+
+    cm.clear();
+    assertEquals(0, cm.size());
+    assertTrue(cm.isEmpty());
+  }
+
+  
+  // performance test vs HashMap<String,Object>
+  // HashMap will have an edge because we are testing with
+  // non-dynamically created keys and String caches hashCode
+  public static void main(String[] args) {
+    int a=0;
+    String impl = args[a++].intern();          // hash OR chars OR char
+    int iter1 = Integer.parseInt(args[a++]);   // iterations of put()
+    int iter2 = Integer.parseInt(args[a++]);   // iterations of get()
+
+    int ret=0;
+    long start = System.currentTimeMillis();
+    String[] stopwords = StopAnalyzer.ENGLISH_STOP_WORDS;
+    // words = "this is a different test to see what is really going on here... I hope it works well but I'm not sure it will".split(" ");
+    char[][] stopwordschars = new char[stopwords.length][];
+    for (int i=0; i<stopwords.length; i++) {
+      stopwordschars[i] = stopwords[i].toCharArray();
+    }
+
+    String[] testwords = "now is the time for all good men to come to the aid of their country".split(" ");
+    // testwords = "this is a different test to see what is really going on here... I hope it works well but I'm not sure it will".split(" ");
+    char[][] testwordchars = new char[testwords.length][];
+
+    for (int i=0; i<testwordchars.length; i++) {
+      testwordchars[i] = testwords[i].toCharArray();
+    }
+
+    HashMap<String,Integer> hm=null;
+    CharArrayMap<Integer> cm=null;
+
+    if (impl=="hash") {
+      for (int i=0; i<iter1; i++) {
+
+        hm = new HashMap<String,Integer>();
+        int v=0;
+        for (String word : stopwords) {
+          hm.put(word, ++v);
+        }
+        ret += hm.size();
+      }
+    } else if (impl=="chars") {
+      for (int i=0; i<iter1; i++) {
+        cm = new CharArrayMap<Integer>(2,false);
+        int v=0;
+        for (String s : stopwords) {
+          cm.put(s,++v);
+        }
+        ret += cm.size();
+      }
+    } else if (impl=="char") {
+      for (int i=0; i<iter1; i++) {
+        cm = new CharArrayMap<Integer>(2,false);
+        int v=0;
+        for (char[] s : stopwordschars) {
+          cm.put(s,++v);
+        }
+        ret += cm.size();
+      }
+    }
+
+
+    if (impl=="hash") {
+      for (int i=0; i<iter2; i++) {
+        for (String word : testwords) {
+          Integer v = hm.get(word);
+          ret += v==null ? 0 : v;
+        }
+      }
+    } else if (impl=="chars") {
+      for (int i=0; i<iter2; i++) {
+        for (String word : testwords) {
+          Integer v = cm.get(word);
+          ret += v==null ? 0 : v;
+        }
+      }
+    } else if (impl=="char") {
+      for (int i=0; i<iter2; i++) {
+        for (char[] word : testwordchars) {
+          Integer v = cm.get(word);
+          ret += v==null ? 0 : v;
+        }
+      }
+    }
+
+    long end = System.currentTimeMillis();
+
+    System.out.println("result=" + ret);
+    System.out.println("time=" +(end-start));
+  }
+
+}
+