SOLR-466: add CharArrayMap, update SynonymMap to use char[] rather than String Tokens

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@614793 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2008-01-24 04:41:38 +00:00
parent fdf340d59a
commit 8348498d08
6 changed files with 692 additions and 59 deletions

View File

@ -39,13 +39,11 @@ import java.util.LinkedList;
public class SynonymFilter extends TokenFilter {
private final SynonymMap map; // Map<String, SynonymMap>
private final boolean ignoreCase;
private Iterator replacement; // iterator over generated tokens
private Iterator<Token> replacement; // iterator over generated tokens
public SynonymFilter(TokenStream in, SynonymMap map, boolean ignoreCase) {
public SynonymFilter(TokenStream in, SynonymMap map) {
super(in);
this.map = map;
this.ignoreCase = ignoreCase;
}
@ -66,26 +64,26 @@ public class SynonymFilter extends TokenFilter {
* merging token streams to preserve token positions.
* - preserve original positionIncrement of first matched token
*/
public Token next() throws IOException {
@Override
public Token next(Token target) throws IOException {
while (true) {
// if there are any generated tokens, return them... don't try any
// matches against them, as we specifically don't want recursion.
if (replacement!=null && replacement.hasNext()) {
return (Token)replacement.next();
return replacement.next();
}
// common case fast-path of first token not matching anything
Token firstTok = nextTok();
Token firstTok = nextTok(target);
if (firstTok == null) return null;
String str = ignoreCase ? firstTok.termText().toLowerCase() : firstTok.termText();
Object o = map.submap!=null ? map.submap.get(str) : null;
if (o == null) return firstTok;
SynonymMap result = map.submap!=null ? map.submap.get(firstTok.termBuffer(), 0, firstTok.termLength()) : null;
if (result == null) return firstTok;
// OK, we matched a token, so find the longest match.
matched = new LinkedList();
matched = new LinkedList<Token>();
SynonymMap result = match((SynonymMap)o);
result = match(result);
if (result==null) {
// no match, simply return the first token read.
@ -93,13 +91,13 @@ public class SynonymFilter extends TokenFilter {
}
// reuse, or create new one each time?
ArrayList generated = new ArrayList(result.synonyms.length + matched.size() + 1);
ArrayList<Token> generated = new ArrayList<Token>(result.synonyms.length + matched.size() + 1);
//
// there was a match... let's generate the new tokens, merging
// in the matched tokens (position increments need adjusting)
//
Token lastTok = matched.isEmpty() ? firstTok : (Token)matched.getLast();
Token lastTok = matched.isEmpty() ? firstTok : matched.getLast();
boolean includeOrig = result.includeOrig();
Token origTok = includeOrig ? firstTok : null;
@ -109,7 +107,8 @@ public class SynonymFilter extends TokenFilter {
for (int i=0; i<result.synonyms.length; i++) {
Token repTok = result.synonyms[i];
Token newTok = new Token(repTok.termText(), firstTok.startOffset(), lastTok.endOffset(), firstTok.type());
Token newTok = new Token(firstTok.startOffset(), lastTok.endOffset(), firstTok.type());
newTok.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength());
repPos += repTok.getPositionIncrement();
if (i==0) repPos=origPos; // make position of first token equal to original
@ -118,7 +117,7 @@ public class SynonymFilter extends TokenFilter {
origTok.setPositionIncrement(origPos-pos);
generated.add(origTok);
pos += origTok.getPositionIncrement();
origTok = matched.isEmpty() ? null : (Token)matched.removeFirst();
origTok = matched.isEmpty() ? null : matched.removeFirst();
if (origTok != null) origPos += origTok.getPositionIncrement();
}
@ -132,7 +131,7 @@ public class SynonymFilter extends TokenFilter {
origTok.setPositionIncrement(origPos-pos);
generated.add(origTok);
pos += origTok.getPositionIncrement();
origTok = matched.isEmpty() ? null : (Token)matched.removeFirst();
origTok = matched.isEmpty() ? null : matched.removeFirst();
if (origTok != null) origPos += origTok.getPositionIncrement();
}
@ -152,21 +151,27 @@ public class SynonymFilter extends TokenFilter {
// Defer creation of the buffer until the first time it is used to
// optimize short fields with no matches.
//
private LinkedList buffer;
private LinkedList matched;
// TODO: use ArrayList for better performance?
private LinkedList<Token> buffer;
private LinkedList<Token> matched;
private Token nextTok() throws IOException {
if (buffer!=null && !buffer.isEmpty()) {
return (Token)buffer.removeFirst();
return buffer.removeFirst();
} else {
return input.next();
}
}
private Token nextTok(Token target) throws IOException {
if (buffer!=null && !buffer.isEmpty()) {
return buffer.removeFirst();
} else {
return input.next(target);
}
}
private void pushTok(Token t) {
if (buffer==null) buffer=new LinkedList();
if (buffer==null) buffer=new LinkedList<Token>();
buffer.addFirst(t);
}
@ -177,9 +182,7 @@ public class SynonymFilter extends TokenFilter {
Token tok = nextTok();
if (tok != null) {
// check for positionIncrement!=1? if>1, should not match, if==0, check multiple at this level?
String str = ignoreCase ? tok.termText().toLowerCase() : tok.termText();
SynonymMap subMap = (SynonymMap)map.submap.get(str);
SynonymMap subMap = map.submap.get(tok.termBuffer(), 0, tok.termLength());
if (subMap != null) {
// recurse

View File

@ -35,8 +35,8 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
public void inform(ResourceLoader loader) {
String synonyms = args.get("synonyms");
ignoreCase = getBoolean("ignoreCase",false);
expand = getBoolean("expand",true);
boolean ignoreCase = getBoolean("ignoreCase", false);
boolean expand = getBoolean("expand", true);
if (synonyms != null) {
List<String> wlist=null;
@ -45,8 +45,8 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
} catch (IOException e) {
throw new RuntimeException(e);
}
synMap = new SynonymMap();
parseRules(wlist, synMap, "=>", ",", ignoreCase,expand);
synMap = new SynonymMap(ignoreCase);
parseRules(wlist, synMap, "=>", ",", expand);
if (wlist.size()<=20) {
SolrCore.log.fine("SynonymMap "+synonyms +":"+synMap);
}
@ -54,10 +54,8 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
}
private SynonymMap synMap;
private boolean ignoreCase;
private boolean expand;
private static void parseRules(List<String> rules, SynonymMap map, String mappingSep, String synSep, boolean ignoreCase, boolean expansion) {
private static void parseRules(List<String> rules, SynonymMap map, String mappingSep, String synSep, boolean expansion) {
int count=0;
for (String rule : rules) {
// To use regexes, we need an expression that specifies an odd number of chars.
@ -91,10 +89,11 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
for (List<String> fromToks : source) {
count++;
for (List<String> toToks : target) {
map.add(ignoreCase ? StrUtils.toLower(fromToks) : fromToks,
map.add(fromToks,
SynonymMap.makeTokens(toToks),
includeOrig,
true);
true
);
}
}
}
@ -114,7 +113,7 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
public SynonymFilter create(TokenStream input) {
return new SynonymFilter(input,synMap,ignoreCase);
return new SynonymFilter(input,synMap);
}

View File

@ -18,6 +18,7 @@
package org.apache.solr.analysis;
import org.apache.lucene.analysis.Token;
import org.apache.solr.util.CharArrayMap;
import java.util.*;
@ -26,13 +27,20 @@ import java.util.*;
* @version $Id$
*/
public class SynonymMap {
Map submap; // recursive: Map<String, SynonymMap>
CharArrayMap<SynonymMap> submap; // recursive: Map<String, SynonymMap>
Token[] synonyms;
int flags;
static final int INCLUDE_ORIG=0x01;
static final int IGNORE_CASE=0x02;
public SynonymMap() {}
public SynonymMap(boolean ignoreCase) {
if (ignoreCase) flags |= IGNORE_CASE;
}
public boolean includeOrig() { return (flags & INCLUDE_ORIG) != 0; }
public boolean ignoreCase() { return (flags & IGNORE_CASE) != 0; }
/**
* @param singleMatch List<String>, the sequence of strings to match
@ -40,17 +48,17 @@ public class SynonymMap {
* @param includeOrig sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
* @param mergeExisting merge the replacement tokens with any other mappings that exist
*/
public void add(List singleMatch, List replacement, boolean includeOrig, boolean mergeExisting) {
public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
SynonymMap currMap = this;
for (Iterator iter = singleMatch.iterator(); iter.hasNext();) {
String str = (String)iter.next();
for (String str : singleMatch) {
if (currMap.submap==null) {
currMap.submap = new HashMap(1);
currMap.submap = new CharArrayMap<SynonymMap>(1, ignoreCase());
}
SynonymMap map = (SynonymMap)currMap.submap.get(str);
SynonymMap map = currMap.submap.get(str);
if (map==null) {
map = new SynonymMap();
map.flags |= flags & IGNORE_CASE;
currMap.submap.put(str, map);
}
@ -68,7 +76,7 @@ public class SynonymMap {
public String toString() {
StringBuffer sb = new StringBuffer("<");
StringBuilder sb = new StringBuilder("<");
if (synonyms!=null) {
sb.append("[");
for (int i=0; i<synonyms.length; i++) {
@ -88,10 +96,12 @@ public class SynonymMap {
/** Produces a List<Token> from a List<String> */
public static List makeTokens(List strings) {
List ret = new ArrayList(strings.size());
for (Iterator iter = strings.iterator(); iter.hasNext();) {
Token newTok = new Token((String)iter.next(),0,0,"SYNONYM");
public static List<Token> makeTokens(List<String> strings) {
List<Token> ret = new ArrayList<Token>(strings.size());
for (String str : strings) {
//Token newTok = new Token(str,0,0,"SYNONYM");
Token newTok = new Token(0,0,"SYNONYM");
newTok.setTermBuffer(str.toCharArray(), 0, str.length());
ret.add(newTok);
}
return ret;
@ -106,8 +116,8 @@ public class SynonymMap {
* Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a has posInc=n)
*
*/
public static List mergeTokens(List lst1, List lst2) {
ArrayList result = new ArrayList();
public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
ArrayList<Token> result = new ArrayList<Token>();
if (lst1 ==null || lst2 ==null) {
if (lst2 != null) result.addAll(lst2);
if (lst1 != null) result.addAll(lst1);
@ -115,27 +125,29 @@ public class SynonymMap {
}
int pos=0;
Iterator iter1=lst1.iterator();
Iterator iter2=lst2.iterator();
Token tok1 = iter1.hasNext() ? (Token)iter1.next() : null;
Token tok2 = iter2.hasNext() ? (Token)iter2.next() : null;
Iterator<Token> iter1=lst1.iterator();
Iterator<Token> iter2=lst2.iterator();
Token tok1 = iter1.hasNext() ? iter1.next() : null;
Token tok2 = iter2.hasNext() ? iter2.next() : null;
int pos1 = tok1!=null ? tok1.getPositionIncrement() : 0;
int pos2 = tok2!=null ? tok2.getPositionIncrement() : 0;
while(tok1!=null || tok2!=null) {
while (tok1 != null && (pos1 <= pos2 || tok2==null)) {
Token tok = new Token(tok1.termText(), tok1.startOffset(), tok1.endOffset(), tok1.type());
Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
tok.setTermBuffer(tok1.termBuffer(), 0, tok1.termLength());
tok.setPositionIncrement(pos1-pos);
result.add(tok);
pos=pos1;
tok1 = iter1.hasNext() ? (Token)iter1.next() : null;
tok1 = iter1.hasNext() ? iter1.next() : null;
pos1 += tok1!=null ? tok1.getPositionIncrement() : 0;
}
while (tok2 != null && (pos2 <= pos1 || tok1==null)) {
Token tok = new Token(tok2.termText(), tok2.startOffset(), tok2.endOffset(), tok2.type());
Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
tok.setTermBuffer(tok2.termBuffer(), 0, tok2.termLength());
tok.setPositionIncrement(pos2-pos);
result.add(tok);
pos=pos2;
tok2 = iter2.hasNext() ? (Token)iter2.next() : null;
tok2 = iter2.hasNext() ? iter2.next() : null;
pos2 += tok2!=null ? tok2.getPositionIncrement() : 0;
}
}

View File

@ -0,0 +1,411 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import java.util.*;
import java.io.Serializable;
/**
* A simple class that stores key Strings as char[]'s in a
* hash table. Note that this is not a general purpose
* class. For example, it cannot remove items from the
* map, nor does it resize its hash table to be smaller,
* etc. It is designed to be quick to retrieve items
* by char[] keys without the necessity of converting
* to a String first.
*/
public class CharArrayMap<V> extends AbstractMap<String, V>
implements Map<String, V>, Cloneable, Serializable
{
private final static int INIT_SIZE = 2;
private char[][] keys;
private Object[] values;
private int count;
private final boolean ignoreCase;
/** Create map with enough capacity to hold startSize
* terms */
public CharArrayMap(int initialCapacity, boolean ignoreCase) {
this.ignoreCase = ignoreCase;
int size = INIT_SIZE;
// load factor of .75, inverse is 1.25, or x+x/4
initialCapacity = initialCapacity + (initialCapacity >>2);
while(size <= initialCapacity)
size <<= 1;
keys = new char[size][];
values = new Object[size];
}
public boolean ignoreCase() {
return ignoreCase;
}
public V get(char[] key) {
return get(key, 0, key.length);
}
public V get(char[] key, int off, int len) {
return (V)values[getSlot(key, off, len)];
}
public V get(CharSequence key) {
return (V)values[getSlot(key)];
}
@Override
public V get(Object key) {
return (V)values[getSlot(key)];
}
@Override
public boolean containsKey(Object s) {
return keys[getSlot(s)] != null;
}
@Override
public boolean containsValue(Object value) {
if (value == null) {
// search for key with a null value
for (int i=0; i<keys.length; i++) {
if (keys[i] != null && values[i] == null) return true;
}
return false;
}
for (int i=0; i<values.length; i++) {
Object val = values[i];
if (val != null && value.equals(val)) return true;
}
return false;
}
private int getSlot(Object key) {
if (key instanceof char[]) {
char[] keyc = (char[])key;
return getSlot(keyc, 0, keyc.length);
}
return getSlot((CharSequence)key);
}
private int getSlot(char[] key, int off, int len) {
int code = getHashCode(key, len);
int pos = code & (keys.length-1);
char[] key2 = keys[pos];
if (key2 != null && !equals(key, off, len, key2)) {
final int inc = ((code>>8)+code)|1;
do {
code += inc;
pos = code & (keys.length-1);
key2 = keys[pos];
} while (key2 != null && !equals(key, off, len, key2));
}
return pos;
}
/** Returns true if the String is in the set */
private int getSlot(CharSequence key) {
int code = getHashCode(key);
int pos = code & (keys.length-1);
char[] key2 = keys[pos];
if (key2 != null && !equals(key, key2)) {
final int inc = ((code>>8)+code)|1;
do {
code += inc;
pos = code & (keys.length-1);
key2 = keys[pos];
} while (key2 != null && !equals(key, key2));
}
return pos;
}
public V put(CharSequence key, V val) {
return put(key.toString(), val); // could be more efficient
}
@Override
public V put(String key, V val) {
return put(key.toCharArray(), val);
}
/** Add this key,val pair to the map.
* The char[] key is directly used, no copy is made.
* If ignoreCase is true for this Map, the key array will be directly modified.
* The user should never modify the key after calling this method.
*/
public V put(char[] key, Object val) {
if (ignoreCase)
for(int i=0;i< key.length;i++)
key[i] = Character.toLowerCase(key[i]);
int slot = getSlot(key, 0, key.length);
if (keys[slot] == null) count++;
Object prev = values[slot];
keys[slot] = key;
values[slot] = val;
if (count + (count>>2) >= keys.length) {
rehash();
}
return (V)prev;
}
private boolean equals(char[] text1, int off, int len, char[] text2) {
if (len != text2.length)
return false;
if (ignoreCase) {
for(int i=0;i<len;i++) {
if (Character.toLowerCase(text1[off+i]) != text2[i])
return false;
}
} else {
for(int i=0;i<len;i++) {
if (text1[off+i] != text2[i])
return false;
}
}
return true;
}
private boolean equals(CharSequence text1, char[] text2) {
int len = text1.length();
if (len != text2.length)
return false;
if (ignoreCase) {
for(int i=0;i<len;i++) {
if (Character.toLowerCase(text1.charAt(i)) != text2[i])
return false;
}
} else {
for(int i=0;i<len;i++) {
if (text1.charAt(i) != text2[i])
return false;
}
}
return true;
}
private void rehash() {
final int newSize = 2* keys.length;
char[][] oldEntries = keys;
Object[] oldValues = values;
keys = new char[newSize][];
values = new Object[newSize];
for(int i=0;i<oldEntries.length;i++) {
char[] key = oldEntries[i];
if (key != null) {
// todo: could be faster... no need to compare keys on collision
// since they are unique
int newSlot = getSlot(key,0,key.length);
keys[newSlot] = key;
values[newSlot] = oldValues[i];
}
}
}
private int getHashCode(char[] text, int len) {
int code = 0;
if (ignoreCase) {
for (int i=0; i<len; i++) {
code = code*31 + Character.toLowerCase(text[i]);
}
} else {
for (int i=0; i<len; i++) {
code = code*31 + text[i];
}
}
return code;
}
private int getHashCode(CharSequence text) {
int code;
if (ignoreCase) {
code = 0;
int len = text.length();
for (int i=0; i<len; i++) {
code = code*31 + Character.toLowerCase(text.charAt(i));
}
} else {
if (false && text instanceof String) {
code = text.hashCode();
} else {
code = 0;
int len = text.length();
for (int i=0; i<len; i++) {
code = code*31 + text.charAt(i);
}
}
}
return code;
}
@Override
public int size() {
return count;
}
@Override
public boolean isEmpty() {
return count==0;
}
@Override
public void clear() {
count = 0;
Arrays.fill(keys,null);
Arrays.fill(values,null);
}
@Override
public Set<Entry<String, V>> entrySet() {
return new EntrySet();
}
/** Returns an EntryIterator over this Map. */
public EntryIterator iterator() {
return new EntryIterator();
}
/** public iterator class so efficient methods are exposed to users */
public class EntryIterator implements Iterator<Map.Entry<String,V>> {
int pos=-1;
int lastPos;
EntryIterator() {
goNext();
}
private void goNext() {
lastPos = pos;
pos++;
while (pos < keys.length && keys[pos] == null) pos++;
}
public boolean hasNext() {
return pos < keys.length;
}
/** gets the next key... do not modify the returned char[] */
public char[] nextKey() {
goNext();
return keys[lastPos];
}
/** gets the next key as a newly created String object */
public String nextKeyString() {
return new String(nextKey());
}
/** returns the value associated with the last key returned */
public V currentValue() {
return (V)values[lastPos];
}
/** sets the value associated with the last key returned */
public V setValue(V value) {
V old = (V)values[lastPos];
values[lastPos] = value;
return old;
}
/** Returns an Entry<String,V> object created on the fly...
* use nextCharArray() + currentValie() for better efficiency. */
public Map.Entry<String,V> next() {
goNext();
return new MapEntry(lastPos);
}
public void remove() {
throw new UnsupportedOperationException();
}
}
private class MapEntry implements Map.Entry<String,V> {
final int pos;
MapEntry(int pos) {
this.pos = pos;
}
public char[] getCharArr() {
return keys[pos];
}
public String getKey() {
return new String(getCharArr());
}
public V getValue() {
return (V)values[pos];
}
public V setValue(V value) {
V old = (V)values[pos];
values[pos] = value;
return old;
}
public String toString() {
return getKey() + '=' + getValue();
}
}
private class EntrySet extends AbstractSet<Map.Entry<String, V>> {
public EntryIterator iterator() {
return new EntryIterator();
}
public boolean contains(Object o) {
if (!(o instanceof Map.Entry))
return false;
Map.Entry e = (Map.Entry)o;
Object key = e.getKey();
if (key==null) return false; // we don't support null keys
Object val = e.getValue();
Object v = get(key);
return v==null ? val==null : v.equals(val);
}
public boolean remove(Object o) {
throw new UnsupportedOperationException();
}
public int size() {
return count;
}
public void clear() {
CharArrayMap.this.clear();
}
}
@Override
public Object clone() {
CharArrayMap<V> map = null;
try {
map = (CharArrayMap<V>)super.clone();
map.keys = keys.clone();
map.values = keys.clone();
} catch (CloneNotSupportedException e) {
// impossible
}
return map;
}
}

View File

@ -48,7 +48,7 @@ public class TestSynonymFilter extends BaseTokenTestCase {
}
};
SynonymFilter sf = new SynonymFilter(ts, dict, true);
SynonymFilter sf = new SynonymFilter(ts, dict);
while(true) {
Token t = sf.next();

View File

@ -0,0 +1,208 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import junit.framework.TestCase;
import java.util.*;
import org.apache.lucene.analysis.StopAnalyzer;
public class TestCharArrayMap extends TestCase {
Random r = new Random(0);
public void doRandom(int iter, boolean ignoreCase) {
CharArrayMap map = new CharArrayMap(1,ignoreCase);
HashMap hmap = new HashMap();
char[] key;
for (int i=0; i<iter; i++) {
int len = r.nextInt(5);
key = new char[len];
for (int j=0; j<key.length; j++) {
key[j] = (char)r.nextInt(127);
}
String keyStr = new String(key);
String hmapKey = ignoreCase ? keyStr.toLowerCase() : keyStr;
int val = r.nextInt();
Object o1 = map.put(key, val);
Object o2 = hmap.put(hmapKey,val);
assertEquals(o1,o2);
// add it again with the string method
assertEquals(val, map.put(keyStr,val));
assertEquals(val, map.get(key,0,key.length));
assertEquals(val, map.get(key));
assertEquals(val, map.get(keyStr));
assertEquals(hmap.size(), map.size());
}
assertEquals(map,hmap);
assertEquals(hmap, map);
}
public void testCharArrayMap() {
for (int i=0; i<5; i++) { // pump this up for more random testing
doRandom(1000,false);
doRandom(1000,true);
}
}
public void testMethods() {
CharArrayMap<Integer> cm = new CharArrayMap<Integer>(2,false);
HashMap<String,Integer> hm = new HashMap<String,Integer>();
hm.put("foo",1);
hm.put("bar",2);
cm.putAll(hm);
assertEquals(hm, cm);
assertEquals(cm, hm);
hm.put("baz", 3);
assertFalse(hm.equals(cm));
assertFalse(cm.equals(hm));
assertTrue(cm.equals(cm));
cm.putAll(hm);
assertEquals(hm, cm);
Iterator<Map.Entry<String,Integer>> iter1 = cm.entrySet().iterator();
int n=0;
while (iter1.hasNext()) {
Map.Entry<String,Integer> entry = iter1.next();
String key = entry.getKey();
Integer val = entry.getValue();
assertEquals(hm.get(key), val);
entry.setValue(val*100);
assertEquals(val*100, (int)cm.get(key));
n++;
}
assertEquals(hm.size(), n);
cm.clear();
cm.putAll(hm);
CharArrayMap<Integer>.EntryIterator iter2 = cm.iterator();
n=0;
while (iter2.hasNext()) {
char[] keyc = iter2.nextKey();
Integer val = iter2.currentValue();
assertEquals(hm.get(new String(keyc)), val);
iter2.setValue(val*100);
assertEquals(val*100, (int)cm.get(keyc));
n++;
}
assertEquals(hm.size(), n);
cm.clear();
assertEquals(0, cm.size());
assertTrue(cm.isEmpty());
}
// performance test vs HashMap<String,Object>
// HashMap will have an edge because we are testing with
// non-dynamically created keys and String caches hashCode
public static void main(String[] args) {
int a=0;
String impl = args[a++].intern(); // hash OR chars OR char
int iter1 = Integer.parseInt(args[a++]); // iterations of put()
int iter2 = Integer.parseInt(args[a++]); // iterations of get()
int ret=0;
long start = System.currentTimeMillis();
String[] stopwords = StopAnalyzer.ENGLISH_STOP_WORDS;
// words = "this is a different test to see what is really going on here... I hope it works well but I'm not sure it will".split(" ");
char[][] stopwordschars = new char[stopwords.length][];
for (int i=0; i<stopwords.length; i++) {
stopwordschars[i] = stopwords[i].toCharArray();
}
String[] testwords = "now is the time for all good men to come to the aid of their country".split(" ");
// testwords = "this is a different test to see what is really going on here... I hope it works well but I'm not sure it will".split(" ");
char[][] testwordchars = new char[testwords.length][];
for (int i=0; i<testwordchars.length; i++) {
testwordchars[i] = testwords[i].toCharArray();
}
HashMap<String,Integer> hm=null;
CharArrayMap<Integer> cm=null;
if (impl=="hash") {
for (int i=0; i<iter1; i++) {
hm = new HashMap<String,Integer>();
int v=0;
for (String word : stopwords) {
hm.put(word, ++v);
}
ret += hm.size();
}
} else if (impl=="chars") {
for (int i=0; i<iter1; i++) {
cm = new CharArrayMap<Integer>(2,false);
int v=0;
for (String s : stopwords) {
cm.put(s,++v);
}
ret += cm.size();
}
} else if (impl=="char") {
for (int i=0; i<iter1; i++) {
cm = new CharArrayMap<Integer>(2,false);
int v=0;
for (char[] s : stopwordschars) {
cm.put(s,++v);
}
ret += cm.size();
}
}
if (impl=="hash") {
for (int i=0; i<iter2; i++) {
for (String word : testwords) {
Integer v = hm.get(word);
ret += v==null ? 0 : v;
}
}
} else if (impl=="chars") {
for (int i=0; i<iter2; i++) {
for (String word : testwords) {
Integer v = cm.get(word);
ret += v==null ? 0 : v;
}
}
} else if (impl=="char") {
for (int i=0; i<iter2; i++) {
for (char[] word : testwordchars) {
Integer v = cm.get(word);
ret += v==null ? 0 : v;
}
}
}
long end = System.currentTimeMillis();
System.out.println("result=" + ret);
System.out.println("time=" +(end-start));
}
}