mirror of https://github.com/apache/lucene.git
LUCENE-2247: Add a CharArrayMap<V> for performance improvements in some stemmers and synonym filters
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@906032 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5ad7974d3f
commit
924d7faa68
|
@ -137,6 +137,9 @@ New features
|
|||
on the provided Version. Version < 3.1 will use the char-API.
|
||||
(Simon Willnauer via Uwe Schindler)
|
||||
|
||||
* LUCENE-2247: Added a CharArrayMap<V> for performance improvements
|
||||
in some stemmers and synonym filters. (Uwe Schindler)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2086: When resolving deleted terms, do so in term sort order
|
||||
|
|
|
@ -0,0 +1,700 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.AbstractMap;
|
||||
import java.util.AbstractSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.CharacterUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* A simple class that stores key Strings as char[]'s in a
|
||||
* hash table. Note that this is not a general purpose
|
||||
* class. For example, it cannot remove items from the
|
||||
* map, nor does it resize its hash table to be smaller,
|
||||
* etc. It is designed to be quick to retrieve items
|
||||
* by char[] keys without the necessity of converting
|
||||
* to a String first.
|
||||
* <p>You must specify the required {@link Version}
|
||||
* compatibility when creating {@link CharArrayMap}:
|
||||
* <ul>
|
||||
* <li> As of 3.1, supplementary characters are
|
||||
* properly lowercased.</li>
|
||||
* </ul>
|
||||
* Before 3.1 supplementary characters could not be
|
||||
* lowercased correctly due to the lack of Unicode 4
|
||||
* support in JDK 1.4. To use instances of
|
||||
* {@link CharArrayMap} with the behavior before Lucene
|
||||
* 3.1 pass a {@link Version} < 3.1 to the constructors.
|
||||
*/
|
||||
public class CharArrayMap<V> extends AbstractMap<Object,V> {
|
||||
// private only because missing generics
|
||||
private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<Object>();
|
||||
|
||||
private final static int INIT_SIZE = 8;
|
||||
private final CharacterUtils charUtils;
|
||||
private boolean ignoreCase;
|
||||
private int count;
|
||||
final Version matchVersion; // package private because used in CharArraySet
|
||||
char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
|
||||
V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
|
||||
|
||||
/**
|
||||
* Create map with enough capacity to hold startSize terms
|
||||
*
|
||||
* @param matchVersion
|
||||
* compatibility match version see <a href="#version">Version
|
||||
* note</a> above for details.
|
||||
* @param startSize
|
||||
* the initial capacity
|
||||
* @param ignoreCase
|
||||
* <code>false</code> if and only if the set should be case sensitive
|
||||
* otherwise <code>true</code>.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) {
|
||||
this.ignoreCase = ignoreCase;
|
||||
int size = INIT_SIZE;
|
||||
while(startSize + (startSize>>2) > size)
|
||||
size <<= 1;
|
||||
keys = new char[size][];
|
||||
values = (V[]) new Object[size];
|
||||
this.charUtils = CharacterUtils.getInstance(matchVersion);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a map from the mappings in another map.
|
||||
*
|
||||
* @param matchVersion
|
||||
* compatibility match version see <a href="#version">Version
|
||||
* note</a> above for details.
|
||||
* @param c
|
||||
* a map whose mappings to be copied
|
||||
* @param ignoreCase
|
||||
* <code>false</code> if and only if the set should be case sensitive
|
||||
* otherwise <code>true</code>.
|
||||
*/
|
||||
public CharArrayMap(Version matchVersion, Map<?,? extends V> c, boolean ignoreCase) {
|
||||
this(matchVersion, c.size(), ignoreCase);
|
||||
putAll(c);
|
||||
}
|
||||
|
||||
/** Create set from the supplied map (used internally for readonly maps...) */
|
||||
private CharArrayMap(CharArrayMap<V> toCopy){
|
||||
this.keys = toCopy.keys;
|
||||
this.values = toCopy.values;
|
||||
this.ignoreCase = toCopy.ignoreCase;
|
||||
this.count = toCopy.count;
|
||||
this.charUtils = toCopy.charUtils;
|
||||
this.matchVersion = toCopy.matchVersion;
|
||||
}
|
||||
|
||||
/** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
|
||||
@Override
|
||||
public void clear() {
|
||||
count = 0;
|
||||
Arrays.fill(keys, null);
|
||||
Arrays.fill(values, null);
|
||||
}
|
||||
|
||||
/** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
|
||||
* are in the {@link #keySet} */
|
||||
public boolean containsKey(char[] text, int off, int len) {
|
||||
return keys[getSlot(text, off, len)] != null;
|
||||
}
|
||||
|
||||
/** true if the <code>CharSequence</code> is in the {@link #keySet} */
|
||||
public boolean containsKey(CharSequence cs) {
|
||||
return keys[getSlot(cs)] != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsKey(Object o) {
|
||||
if (o instanceof char[]) {
|
||||
final char[] text = (char[])o;
|
||||
return containsKey(text, 0, text.length);
|
||||
}
|
||||
return containsKey(o.toString());
|
||||
}
|
||||
|
||||
/** returns the value of the mapping of <code>len</code> chars of <code>text</code>
|
||||
* starting at <code>off</code> */
|
||||
public V get(char[] text, int off, int len) {
|
||||
return values[getSlot(text, off, len)];
|
||||
}
|
||||
|
||||
/** returns the value of the mapping of the chars inside this {@code CharSequence} */
|
||||
public V get(CharSequence cs) {
|
||||
return values[getSlot(cs)];
|
||||
}
|
||||
|
||||
@Override
|
||||
public V get(Object o) {
|
||||
if (o instanceof char[]) {
|
||||
final char[] text = (char[])o;
|
||||
return get(text, 0, text.length);
|
||||
}
|
||||
return get(o.toString());
|
||||
}
|
||||
|
||||
private int getSlot(char[] text, int off, int len) {
|
||||
int code = getHashCode(text, off, len);
|
||||
int pos = code & (keys.length-1);
|
||||
char[] text2 = keys[pos];
|
||||
if (text2 != null && !equals(text, off, len, text2)) {
|
||||
final int inc = ((code>>8)+code)|1;
|
||||
do {
|
||||
code += inc;
|
||||
pos = code & (keys.length-1);
|
||||
text2 = keys[pos];
|
||||
} while (text2 != null && !equals(text, off, len, text2));
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
/** Returns true if the String is in the set */
|
||||
private int getSlot(CharSequence text) {
|
||||
int code = getHashCode(text);
|
||||
int pos = code & (keys.length-1);
|
||||
char[] text2 = keys[pos];
|
||||
if (text2 != null && !equals(text, text2)) {
|
||||
final int inc = ((code>>8)+code)|1;
|
||||
do {
|
||||
code += inc;
|
||||
pos = code & (keys.length-1);
|
||||
text2 = keys[pos];
|
||||
} while (text2 != null && !equals(text, text2));
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
/** Add the given mapping. */
|
||||
public V put(CharSequence text, V value) {
|
||||
return put(text.toString(), value); // could be more efficient
|
||||
}
|
||||
|
||||
@Override
|
||||
public V put(Object o, V value) {
|
||||
if (o instanceof char[]) {
|
||||
return put((char[])o, value);
|
||||
}
|
||||
return put(o.toString(), value);
|
||||
}
|
||||
|
||||
/** Add the given mapping. */
|
||||
public V put(String text, V value) {
|
||||
return put(text.toCharArray(), value);
|
||||
}
|
||||
|
||||
/** Add the given mapping.
|
||||
* If ignoreCase is true for this Set, the text array will be directly modified.
|
||||
* The user should never modify this text array after calling this method.
|
||||
*/
|
||||
public V put(char[] text, V value) {
|
||||
if (ignoreCase)
|
||||
for(int i=0;i<text.length;){
|
||||
i += Character.toChars(
|
||||
Character.toLowerCase(
|
||||
charUtils.codePointAt(text, i)), text, i);
|
||||
}
|
||||
int slot = getSlot(text, 0, text.length);
|
||||
if (keys[slot] != null) {
|
||||
final V oldValue = values[slot];
|
||||
values[slot] = value;
|
||||
return oldValue;
|
||||
}
|
||||
keys[slot] = text;
|
||||
values[slot] = value;
|
||||
count++;
|
||||
|
||||
if (count + (count>>2) > keys.length) {
|
||||
rehash();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void rehash() {
|
||||
assert keys.length == values.length;
|
||||
final int newSize = 2*keys.length;
|
||||
final char[][] oldkeys = keys;
|
||||
final V[] oldvalues = values;
|
||||
keys = new char[newSize][];
|
||||
values = (V[]) new Object[newSize];
|
||||
|
||||
for(int i=0; i<oldkeys.length; i++) {
|
||||
char[] text = oldkeys[i];
|
||||
if (text != null) {
|
||||
// todo: could be faster... no need to compare strings on collision
|
||||
final int slot = getSlot(text,0,text.length);
|
||||
keys[slot] = text;
|
||||
values[slot] = oldvalues[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean equals(char[] text1, int off, int len, char[] text2) {
|
||||
if (len != text2.length)
|
||||
return false;
|
||||
final int limit = off+len;
|
||||
if (ignoreCase) {
|
||||
for(int i=0;i<len;) {
|
||||
final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
|
||||
if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
|
||||
return false;
|
||||
i += Character.charCount(codePointAt);
|
||||
}
|
||||
} else {
|
||||
for(int i=0;i<len;i++) {
|
||||
if (text1[off+i] != text2[i])
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean equals(CharSequence text1, char[] text2) {
|
||||
int len = text1.length();
|
||||
if (len != text2.length)
|
||||
return false;
|
||||
if (ignoreCase) {
|
||||
for(int i=0;i<len;) {
|
||||
final int codePointAt = charUtils.codePointAt(text1, i);
|
||||
if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
|
||||
return false;
|
||||
i += Character.charCount(codePointAt);
|
||||
}
|
||||
} else {
|
||||
for(int i=0;i<len;i++) {
|
||||
if (text1.charAt(i) != text2[i])
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private int getHashCode(char[] text, int offset, int len) {
|
||||
if (text == null)
|
||||
throw new NullPointerException();
|
||||
int code = 0;
|
||||
final int stop = offset + len;
|
||||
if (ignoreCase) {
|
||||
for (int i=offset; i<stop;) {
|
||||
final int codePointAt = charUtils.codePointAt(text, i, stop);
|
||||
code = code*31 + Character.toLowerCase(codePointAt);
|
||||
i += Character.charCount(codePointAt);
|
||||
}
|
||||
} else {
|
||||
for (int i=offset; i<stop; i++) {
|
||||
code = code*31 + text[i];
|
||||
}
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
private int getHashCode(CharSequence text) {
|
||||
if (text == null)
|
||||
throw new NullPointerException();
|
||||
int code = 0;
|
||||
int len = text.length();
|
||||
if (ignoreCase) {
|
||||
for (int i=0; i<len;) {
|
||||
int codePointAt = charUtils.codePointAt(text, i);
|
||||
code = code*31 + Character.toLowerCase(codePointAt);
|
||||
i += Character.charCount(codePointAt);
|
||||
}
|
||||
} else {
|
||||
for (int i=0; i<len; i++) {
|
||||
code = code*31 + text.charAt(i);
|
||||
}
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public V remove(Object key) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("{");
|
||||
for (Map.Entry<Object,V> entry : entrySet()) {
|
||||
if (sb.length()>1) sb.append(", ");
|
||||
sb.append(entry);
|
||||
}
|
||||
return sb.append('}').toString();
|
||||
}
|
||||
|
||||
private EntrySet entrySet = null;
|
||||
private CharArraySet keySet = null;
|
||||
|
||||
EntrySet createEntrySet() {
|
||||
return new EntrySet(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final EntrySet entrySet() {
|
||||
if (entrySet == null) {
|
||||
entrySet = createEntrySet();
|
||||
}
|
||||
return entrySet;
|
||||
}
|
||||
|
||||
// helper for CharArraySet to not produce endless recursion
|
||||
final Set<Object> originalKeySet() {
|
||||
return super.keySet();
|
||||
}
|
||||
|
||||
/** Returns an {@link CharArraySet} view on the map's keys.
|
||||
* The set will use the same {@code matchVersion} as this map. */
|
||||
@Override @SuppressWarnings("unchecked")
|
||||
public final CharArraySet keySet() {
|
||||
if (keySet == null) {
|
||||
// prevent adding of entries
|
||||
keySet = new CharArraySet((CharArrayMap) this) {
|
||||
@Override
|
||||
public boolean add(Object o) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
@Override
|
||||
public boolean add(CharSequence text) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
@Override
|
||||
public boolean add(String text) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
@Override
|
||||
public boolean add(char[] text) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
return keySet;
|
||||
}
|
||||
|
||||
/** public iterator class so efficient methods are exposed to users */
|
||||
public class EntryIterator implements Iterator<Map.Entry<Object,V>> {
|
||||
private int pos=-1;
|
||||
private int lastPos;
|
||||
private final boolean allowModify;
|
||||
|
||||
private EntryIterator(boolean allowModify) {
|
||||
this.allowModify = allowModify;
|
||||
goNext();
|
||||
}
|
||||
|
||||
private void goNext() {
|
||||
lastPos = pos;
|
||||
pos++;
|
||||
while (pos < keys.length && keys[pos] == null) pos++;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return pos < keys.length;
|
||||
}
|
||||
|
||||
/** gets the next key... do not modify the returned char[] */
|
||||
public char[] nextKey() {
|
||||
goNext();
|
||||
return keys[lastPos];
|
||||
}
|
||||
|
||||
/** gets the next key as a newly created String object */
|
||||
public String nextKeyString() {
|
||||
return new String(nextKey());
|
||||
}
|
||||
|
||||
/** returns the value associated with the last key returned */
|
||||
public V currentValue() {
|
||||
return values[lastPos];
|
||||
}
|
||||
|
||||
/** sets the value associated with the last key returned */
|
||||
public V setValue(V value) {
|
||||
if (!allowModify)
|
||||
throw new UnsupportedOperationException();
|
||||
V old = values[lastPos];
|
||||
values[lastPos] = value;
|
||||
return old;
|
||||
}
|
||||
|
||||
/** use nextCharArray() + currentValue() for better efficiency. */
|
||||
public Map.Entry<Object,V> next() {
|
||||
goNext();
|
||||
return new MapEntry(lastPos, allowModify);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
private final class MapEntry implements Map.Entry<Object,V> {
|
||||
private final int pos;
|
||||
private final boolean allowModify;
|
||||
|
||||
private MapEntry(int pos, boolean allowModify) {
|
||||
this.pos = pos;
|
||||
this.allowModify = allowModify;
|
||||
}
|
||||
|
||||
public Object getKey() {
|
||||
// we must clone here, as putAll to another CharArrayMap
|
||||
// with other case sensitivity flag would corrupt the keys
|
||||
return keys[pos].clone();
|
||||
}
|
||||
|
||||
public V getValue() {
|
||||
return values[pos];
|
||||
}
|
||||
|
||||
public V setValue(V value) {
|
||||
if (!allowModify)
|
||||
throw new UnsupportedOperationException();
|
||||
final V old = values[pos];
|
||||
values[pos] = value;
|
||||
return old;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new StringBuilder().append(keys[pos]).append('=')
|
||||
.append(((Object) values[pos] == (Object) CharArrayMap.this) ? "(this Map)" : values[pos])
|
||||
.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/** public EntrySet class so efficient methods are exposed to users */
|
||||
public final class EntrySet extends AbstractSet<Map.Entry<Object,V>> {
|
||||
private final boolean allowModify;
|
||||
|
||||
private EntrySet(boolean allowModify) {
|
||||
this.allowModify = allowModify;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EntryIterator iterator() {
|
||||
return new EntryIterator(allowModify);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Object o) {
|
||||
if (!(o instanceof Map.Entry))
|
||||
return false;
|
||||
final Map.Entry e = (Map.Entry)o;
|
||||
final Object key = e.getKey();
|
||||
final Object val = e.getValue();
|
||||
final Object v = get(key);
|
||||
return v == null ? val == null : v.equals(val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean remove(Object o) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
if (!allowModify)
|
||||
throw new UnsupportedOperationException();
|
||||
CharArrayMap.this.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable {@link CharArrayMap}. This allows to provide
|
||||
* unmodifiable views of internal map for "read-only" use.
|
||||
*
|
||||
* @param map
|
||||
* a map for which the unmodifiable map is returned.
|
||||
* @return an new unmodifiable {@link CharArrayMap}.
|
||||
* @throws NullPointerException
|
||||
* if the given map is <code>null</code>.
|
||||
*/
|
||||
public static <V> CharArrayMap<V> unmodifiableMap(CharArrayMap<V> map) {
|
||||
if (map == null)
|
||||
throw new NullPointerException("Given map is null");
|
||||
if (map == emptyMap() || map.isEmpty())
|
||||
return emptyMap();
|
||||
if (map instanceof UnmodifiableCharArrayMap)
|
||||
return map;
|
||||
return new UnmodifiableCharArrayMap<V>(map);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the given map as a {@link CharArrayMap}. If the given map
|
||||
* is a {@link CharArrayMap} the ignoreCase property will be preserved.
|
||||
* <p>
|
||||
* <b>Note:</b> If you intend to create a copy of another {@link CharArrayMap} where
|
||||
* the {@link Version} of the source map differs from its copy
|
||||
* {@link #CharArrayMap(Version, Map, boolean)} should be used instead.
|
||||
* The {@link #copy(Version, Map)} will preserve the {@link Version} of the
|
||||
* source map it is an instance of {@link CharArrayMap}.
|
||||
* </p>
|
||||
*
|
||||
* @param matchVersion
|
||||
* compatibility match version see <a href="#version">Version
|
||||
* note</a> above for details. This argument will be ignored if the
|
||||
* given map is a {@link CharArrayMap}.
|
||||
* @param map
|
||||
* a map to copy
|
||||
* @return a copy of the given map as a {@link CharArrayMap}. If the given map
|
||||
* is a {@link CharArrayMap} the ignoreCase property as well as the
|
||||
* matchVersion will be of the given map will be preserved.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <V> CharArrayMap<V> copy(final Version matchVersion, final Map<?,? extends V> map) {
|
||||
if(map == EMPTY_MAP)
|
||||
return emptyMap();
|
||||
if(map instanceof CharArrayMap) {
|
||||
CharArrayMap<V> m = (CharArrayMap<V>) map;
|
||||
// use fast path instead of iterating all values
|
||||
// this is even on very small sets ~10 times faster than iterating
|
||||
final char[][] keys = new char[m.keys.length][];
|
||||
System.arraycopy(m.keys, 0, keys, 0, keys.length);
|
||||
final V[] values = (V[]) new Object[m.values.length];
|
||||
System.arraycopy(m.values, 0, values, 0, values.length);
|
||||
m = new CharArrayMap<V>(m);
|
||||
m.keys = keys;
|
||||
m.values = values;
|
||||
return m;
|
||||
}
|
||||
return new CharArrayMap<V>(matchVersion, map, false);
|
||||
}
|
||||
|
||||
/** Returns an empty, unmodifiable map. */
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <V> CharArrayMap<V> emptyMap() {
|
||||
return (CharArrayMap<V>) EMPTY_MAP;
|
||||
}
|
||||
|
||||
// package private CharArraySet instanceof check in CharArraySet
|
||||
static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V> {
|
||||
|
||||
UnmodifiableCharArrayMap(CharArrayMap<V> map) {
|
||||
super(map);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public V put(Object o, V val){
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public V put(char[] text, V val) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public V put(CharSequence text, V val) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public V put(String text, V val) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public V remove(Object key) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
EntrySet createEntrySet() {
|
||||
return new EntrySet(false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Empty {@link UnmodifiableCharArrayMap} optimized for speed.
|
||||
* Contains checks will always return <code>false</code> or throw
|
||||
* NPE if necessary.
|
||||
*/
|
||||
private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V> {
|
||||
EmptyCharArrayMap() {
|
||||
super(new CharArrayMap<V>(Version.LUCENE_CURRENT, 0, false));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsKey(char[] text, int off, int len) {
|
||||
if(text == null)
|
||||
throw new NullPointerException();
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsKey(CharSequence cs) {
|
||||
if(cs == null)
|
||||
throw new NullPointerException();
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsKey(Object o) {
|
||||
if(o == null)
|
||||
throw new NullPointerException();
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public V get(char[] text, int off, int len) {
|
||||
if(text == null)
|
||||
throw new NullPointerException();
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public V get(CharSequence cs) {
|
||||
if(cs == null)
|
||||
throw new NullPointerException();
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public V get(Object o) {
|
||||
if(o == null)
|
||||
throw new NullPointerException();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,15 +1,5 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.AbstractSet;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.CharacterUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -27,6 +17,13 @@ import org.apache.lucene.util.Version;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.AbstractSet;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* A simple class that stores Strings as char[]'s in a
|
||||
|
@ -58,15 +55,11 @@ import org.apache.lucene.util.Version;
|
|||
* For type safety also {@link #stringIterator()} is provided.
|
||||
*/
|
||||
public class CharArraySet extends AbstractSet<Object> {
|
||||
private final static int INIT_SIZE = 8;
|
||||
private char[][] entries;
|
||||
private int count;
|
||||
private final boolean ignoreCase;
|
||||
public static final CharArraySet EMPTY_SET = new EmptyCharArraySet();
|
||||
public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.<Object>emptyMap());
|
||||
private static final Object PLACEHOLDER = new Object();
|
||||
|
||||
private final CharArrayMap<Object> map;
|
||||
|
||||
private final CharacterUtils charUtils;
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Create set with enough capacity to hold startSize terms
|
||||
*
|
||||
|
@ -80,13 +73,7 @@ public class CharArraySet extends AbstractSet<Object> {
|
|||
* otherwise <code>true</code>.
|
||||
*/
|
||||
public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) {
|
||||
this.ignoreCase = ignoreCase;
|
||||
int size = INIT_SIZE;
|
||||
while(startSize + (startSize>>2) > size)
|
||||
size <<= 1;
|
||||
entries = new char[size][];
|
||||
this.charUtils = CharacterUtils.getInstance(matchVersion);
|
||||
this.matchVersion = matchVersion;
|
||||
this(new CharArrayMap<Object>(matchVersion, startSize, ignoreCase));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -101,7 +88,7 @@ public class CharArraySet extends AbstractSet<Object> {
|
|||
* <code>false</code> if and only if the set should be case sensitive
|
||||
* otherwise <code>true</code>.
|
||||
*/
|
||||
public CharArraySet(Version matchVersion, Collection<? extends Object> c, boolean ignoreCase) {
|
||||
public CharArraySet(Version matchVersion, Collection<?> c, boolean ignoreCase) {
|
||||
this(matchVersion, c.size(), ignoreCase);
|
||||
addAll(c);
|
||||
}
|
||||
|
@ -132,77 +119,51 @@ public class CharArraySet extends AbstractSet<Object> {
|
|||
* @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public CharArraySet(Collection<? extends Object> c, boolean ignoreCase) {
|
||||
public CharArraySet(Collection<?> c, boolean ignoreCase) {
|
||||
this(Version.LUCENE_30, c.size(), ignoreCase);
|
||||
addAll(c);
|
||||
}
|
||||
|
||||
/** Create set from entries */
|
||||
private CharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, int count){
|
||||
this.entries = entries;
|
||||
this.ignoreCase = ignoreCase;
|
||||
this.count = count;
|
||||
this.charUtils = CharacterUtils.getInstance(matchVersion);
|
||||
this.matchVersion = matchVersion;
|
||||
/** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */
|
||||
CharArraySet(final CharArrayMap<Object> map){
|
||||
this.map = map;
|
||||
}
|
||||
|
||||
/** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
|
||||
@Override
|
||||
public void clear() {
|
||||
count = 0;
|
||||
Arrays.fill(entries, null);
|
||||
map.clear();
|
||||
}
|
||||
|
||||
/** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
|
||||
* are in the set */
|
||||
public boolean contains(char[] text, int off, int len) {
|
||||
return entries[getSlot(text, off, len)] != null;
|
||||
return map.containsKey(text, off, len);
|
||||
}
|
||||
|
||||
/** true if the <code>CharSequence</code> is in the set */
|
||||
public boolean contains(CharSequence cs) {
|
||||
return entries[getSlot(cs)] != null;
|
||||
return map.containsKey(cs);
|
||||
}
|
||||
|
||||
private int getSlot(char[] text, int off, int len) {
|
||||
int code = getHashCode(text, off, len);
|
||||
int pos = code & (entries.length-1);
|
||||
char[] text2 = entries[pos];
|
||||
if (text2 != null && !equals(text, off, len, text2)) {
|
||||
final int inc = ((code>>8)+code)|1;
|
||||
do {
|
||||
code += inc;
|
||||
pos = code & (entries.length-1);
|
||||
text2 = entries[pos];
|
||||
} while (text2 != null && !equals(text, off, len, text2));
|
||||
}
|
||||
return pos;
|
||||
@Override
|
||||
public boolean contains(Object o) {
|
||||
return map.containsKey(o);
|
||||
}
|
||||
|
||||
/** Returns true if the String is in the set */
|
||||
private int getSlot(CharSequence text) {
|
||||
int code = getHashCode(text);
|
||||
int pos = code & (entries.length-1);
|
||||
char[] text2 = entries[pos];
|
||||
if (text2 != null && !equals(text, text2)) {
|
||||
final int inc = ((code>>8)+code)|1;
|
||||
do {
|
||||
code += inc;
|
||||
pos = code & (entries.length-1);
|
||||
text2 = entries[pos];
|
||||
} while (text2 != null && !equals(text, text2));
|
||||
}
|
||||
return pos;
|
||||
@Override
|
||||
public boolean add(Object o) {
|
||||
return map.put(o, PLACEHOLDER) == null;
|
||||
}
|
||||
|
||||
/** Add this CharSequence into the set */
|
||||
public boolean add(CharSequence text) {
|
||||
return add(text.toString()); // could be more efficient
|
||||
return map.put(text, PLACEHOLDER) == null;
|
||||
}
|
||||
|
||||
/** Add this String into the set */
|
||||
public boolean add(String text) {
|
||||
return add(text.toCharArray());
|
||||
return map.put(text, PLACEHOLDER) == null;
|
||||
}
|
||||
|
||||
/** Add this char[] directly to the set.
|
||||
|
@ -210,140 +171,12 @@ public class CharArraySet extends AbstractSet<Object> {
|
|||
* The user should never modify this text array after calling this method.
|
||||
*/
|
||||
public boolean add(char[] text) {
|
||||
if (ignoreCase)
|
||||
for(int i=0;i<text.length;){
|
||||
i += Character.toChars(
|
||||
Character.toLowerCase(
|
||||
charUtils.codePointAt(text, i)), text, i);
|
||||
}
|
||||
int slot = getSlot(text, 0, text.length);
|
||||
if (entries[slot] != null) return false;
|
||||
entries[slot] = text;
|
||||
count++;
|
||||
|
||||
if (count + (count>>2) > entries.length) {
|
||||
rehash();
|
||||
}
|
||||
|
||||
return true;
|
||||
return map.put(text, PLACEHOLDER) == null;
|
||||
}
|
||||
|
||||
private boolean equals(char[] text1, int off, int len, char[] text2) {
|
||||
if (len != text2.length)
|
||||
return false;
|
||||
final int limit = off+len;
|
||||
if (ignoreCase) {
|
||||
for(int i=0;i<len;) {
|
||||
final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
|
||||
if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
|
||||
return false;
|
||||
i += Character.charCount(codePointAt);
|
||||
}
|
||||
} else {
|
||||
for(int i=0;i<len;i++) {
|
||||
if (text1[off+i] != text2[i])
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean equals(CharSequence text1, char[] text2) {
|
||||
int len = text1.length();
|
||||
if (len != text2.length)
|
||||
return false;
|
||||
if (ignoreCase) {
|
||||
for(int i=0;i<len;) {
|
||||
final int codePointAt = charUtils.codePointAt(text1, i);
|
||||
if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
|
||||
return false;
|
||||
i += Character.charCount(codePointAt);
|
||||
}
|
||||
} else {
|
||||
for(int i=0;i<len;i++) {
|
||||
if (text1.charAt(i) != text2[i])
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void rehash() {
|
||||
final int newSize = 2*entries.length;
|
||||
char[][] oldEntries = entries;
|
||||
entries = new char[newSize][];
|
||||
|
||||
for(int i=0;i<oldEntries.length;i++) {
|
||||
char[] text = oldEntries[i];
|
||||
if (text != null) {
|
||||
// todo: could be faster... no need to compare strings on collision
|
||||
entries[getSlot(text,0,text.length)] = text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int getHashCode(char[] text, int offset, int len) {
|
||||
int code = 0;
|
||||
final int stop = offset + len;
|
||||
if (ignoreCase) {
|
||||
for (int i=offset; i<stop;) {
|
||||
final int codePointAt = charUtils.codePointAt(text, i, stop);
|
||||
code = code*31 + Character.toLowerCase(codePointAt);
|
||||
i += Character.charCount(codePointAt);
|
||||
}
|
||||
} else {
|
||||
for (int i=offset; i<stop; i++) {
|
||||
code = code*31 + text[i];
|
||||
}
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
private int getHashCode(CharSequence text) {
|
||||
int code = 0;
|
||||
int len = text.length();
|
||||
if (ignoreCase) {
|
||||
for (int i=0; i<len;) {
|
||||
int codePointAt = charUtils.codePointAt(text, i);
|
||||
code = code*31 + Character.toLowerCase(codePointAt);
|
||||
i += Character.charCount(codePointAt);
|
||||
}
|
||||
} else {
|
||||
for (int i=0; i<len; i++) {
|
||||
code = code*31 + text.charAt(i);
|
||||
}
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return count==0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Object o) {
|
||||
if (o instanceof char[]) {
|
||||
final char[] text = (char[])o;
|
||||
return contains(text, 0, text.length);
|
||||
}
|
||||
return contains(o.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(Object o) {
|
||||
if (o instanceof char[]) {
|
||||
return add((char[])o);
|
||||
}
|
||||
return add(o.toString());
|
||||
return map.size();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -361,14 +194,9 @@ public class CharArraySet extends AbstractSet<Object> {
|
|||
throw new NullPointerException("Given set is null");
|
||||
if (set == EMPTY_SET)
|
||||
return EMPTY_SET;
|
||||
if (set instanceof UnmodifiableCharArraySet)
|
||||
if (set.map instanceof CharArrayMap.UnmodifiableCharArrayMap)
|
||||
return set;
|
||||
|
||||
/*
|
||||
* Instead of delegating calls to the given set copy the low-level values to
|
||||
* the unmodifiable Subclass
|
||||
*/
|
||||
return new UnmodifiableCharArraySet(set.matchVersion, set.entries, set.ignoreCase, set.count);
|
||||
return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -415,29 +243,27 @@ public class CharArraySet extends AbstractSet<Object> {
|
|||
return EMPTY_SET;
|
||||
if(set instanceof CharArraySet) {
|
||||
final CharArraySet source = (CharArraySet) set;
|
||||
// use fast path instead of iterating all values
|
||||
// this is even on very small sets ~10 times faster than iterating
|
||||
final char[][] entries = new char[source.entries.length][];
|
||||
System.arraycopy(source.entries, 0, entries, 0, entries.length);
|
||||
return new CharArraySet(source.matchVersion, entries, source.ignoreCase, source.count);
|
||||
return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map));
|
||||
}
|
||||
return new CharArraySet(matchVersion, set, false);
|
||||
}
|
||||
|
||||
|
||||
/** The Iterator<String> for this set. Strings are constructed on the fly, so
|
||||
* use <code>nextCharArray</code> for more efficient access. */
|
||||
* use <code>nextCharArray</code> for more efficient access.
|
||||
* @deprecated Use the standard iterator, which returns {@code char[]} instances.
|
||||
*/
|
||||
@Deprecated
|
||||
public class CharArraySetIterator implements Iterator<String> {
|
||||
int pos=-1;
|
||||
char[] next;
|
||||
CharArraySetIterator() {
|
||||
private CharArraySetIterator() {
|
||||
goNext();
|
||||
}
|
||||
|
||||
private void goNext() {
|
||||
next = null;
|
||||
pos++;
|
||||
while (pos < entries.length && (next=entries[pos]) == null) pos++;
|
||||
while (pos < map.keys.length && (next=map.keys[pos]) == null) pos++;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
@ -462,93 +288,41 @@ public class CharArraySet extends AbstractSet<Object> {
|
|||
}
|
||||
}
|
||||
|
||||
/** returns an iterator of new allocated Strings */
|
||||
/** returns an iterator of new allocated Strings (an instance of {@link CharArraySetIterator}).
|
||||
* @deprecated Use {@link #iterator}, which returns {@code char[]} instances.
|
||||
*/
|
||||
@Deprecated
|
||||
public Iterator<String> stringIterator() {
|
||||
return new CharArraySetIterator();
|
||||
}
|
||||
|
||||
/** returns an iterator of new allocated Strings, this method violates the Set interface */
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
/** Returns an {@link Iterator} depending on the version used:
|
||||
* <ul>
|
||||
* <li>if {@code matchVersion} ≥ 3.1, it returns {@code char[]} instances in this set.</li>
|
||||
* <li>if {@code matchVersion} is 3.0 or older, it returns new
|
||||
* allocated Strings, so this method violates the Set interface.
|
||||
* It is kept this way for backwards compatibility, normally it should
|
||||
* return {@code char[]} on {@code next()}</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Override @SuppressWarnings("unchecked")
|
||||
public Iterator<Object> iterator() {
|
||||
return (Iterator) stringIterator();
|
||||
// use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
|
||||
return map.matchVersion.onOrAfter(Version.LUCENE_31) ?
|
||||
map.originalKeySet().iterator() : (Iterator) stringIterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Efficient unmodifiable {@link CharArraySet}. This implementation does not
|
||||
* delegate calls to a give {@link CharArraySet} like
|
||||
* {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes
|
||||
* the internal representation of a {@link CharArraySet} to a super
|
||||
* constructor and overrides all mutators.
|
||||
*/
|
||||
private static class UnmodifiableCharArraySet extends CharArraySet {
|
||||
|
||||
private UnmodifiableCharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase,
|
||||
int count) {
|
||||
super(matchVersion, entries, ignoreCase, count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(Object o){
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addAll(Collection<? extends Object> coll) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(char[] text) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(CharSequence text) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(String text) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Empty {@link UnmodifiableCharArraySet} optimized for speed.
|
||||
* Contains checks will always return <code>false</code> or throw
|
||||
* NPE if necessary.
|
||||
*/
|
||||
private static final class EmptyCharArraySet extends UnmodifiableCharArraySet {
|
||||
|
||||
private EmptyCharArraySet() {
|
||||
super(Version.LUCENE_CURRENT, new char[0][], false, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(char[] text, int off, int len) {
|
||||
if(text == null)
|
||||
throw new NullPointerException();
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(CharSequence cs) {
|
||||
if(cs == null)
|
||||
throw new NullPointerException();
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Object o) {
|
||||
if(o == null)
|
||||
throw new NullPointerException();
|
||||
return false;
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("[");
|
||||
for (Object item : this) {
|
||||
if (sb.length()>1) sb.append(", ");
|
||||
if (item instanceof char[]) {
|
||||
sb.append((char[]) item);
|
||||
} else {
|
||||
sb.append(item);
|
||||
}
|
||||
}
|
||||
return sb.append(']').toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,243 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.util.*;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestCharArrayMap extends LuceneTestCase {
|
||||
Random r = newRandom();
|
||||
|
||||
public void doRandom(int iter, boolean ignoreCase) {
|
||||
CharArrayMap<Integer> map = new CharArrayMap<Integer>(Version.LUCENE_CURRENT, 1, ignoreCase);
|
||||
HashMap<String,Integer> hmap = new HashMap<String,Integer>();
|
||||
|
||||
char[] key;
|
||||
for (int i=0; i<iter; i++) {
|
||||
int len = r.nextInt(5);
|
||||
key = new char[len];
|
||||
for (int j=0; j<key.length; j++) {
|
||||
key[j] = (char)r.nextInt(127);
|
||||
}
|
||||
String keyStr = new String(key);
|
||||
String hmapKey = ignoreCase ? keyStr.toLowerCase() : keyStr;
|
||||
|
||||
int val = r.nextInt();
|
||||
|
||||
Object o1 = map.put(key, val);
|
||||
Object o2 = hmap.put(hmapKey,val);
|
||||
assertEquals(o1,o2);
|
||||
|
||||
// add it again with the string method
|
||||
assertEquals(val, map.put(keyStr,val).intValue());
|
||||
|
||||
assertEquals(val, map.get(key,0,key.length).intValue());
|
||||
assertEquals(val, map.get(key).intValue());
|
||||
assertEquals(val, map.get(keyStr).intValue());
|
||||
|
||||
assertEquals(hmap.size(), map.size());
|
||||
}
|
||||
}
|
||||
|
||||
public void testCharArrayMap() {
|
||||
for (int i=0; i<5; i++) { // pump this up for more random testing
|
||||
doRandom(1000,false);
|
||||
doRandom(1000,true);
|
||||
}
|
||||
}
|
||||
|
||||
public void testMethods() {
|
||||
CharArrayMap<Integer> cm = new CharArrayMap<Integer>(Version.LUCENE_CURRENT, 2, false);
|
||||
HashMap<String,Integer> hm = new HashMap<String,Integer>();
|
||||
hm.put("foo",1);
|
||||
hm.put("bar",2);
|
||||
cm.putAll(hm);
|
||||
assertEquals(hm.size(), cm.size());
|
||||
hm.put("baz", 3);
|
||||
cm.putAll(hm);
|
||||
assertEquals(hm.size(), cm.size());
|
||||
|
||||
CharArraySet cs = cm.keySet();
|
||||
int n=0;
|
||||
for (Object o : cs) {
|
||||
assertTrue(cm.containsKey(o));
|
||||
assertTrue(cm.containsKey((char[]) o));
|
||||
n++;
|
||||
}
|
||||
assertEquals(hm.size(), n);
|
||||
assertEquals(hm.size(), cs.size());
|
||||
assertEquals(cm.size(), cs.size());
|
||||
cs.clear();
|
||||
assertEquals(0, cs.size());
|
||||
assertEquals(0, cm.size());
|
||||
try {
|
||||
cs.add("test");
|
||||
fail("keySet() allows adding new keys");
|
||||
} catch (UnsupportedOperationException ue) {
|
||||
// pass
|
||||
}
|
||||
cm.putAll(hm);
|
||||
assertEquals(hm.size(), cs.size());
|
||||
assertEquals(cm.size(), cs.size());
|
||||
|
||||
Iterator<Map.Entry<Object,Integer>> iter1 = cm.entrySet().iterator();
|
||||
n=0;
|
||||
while (iter1.hasNext()) {
|
||||
Map.Entry<Object,Integer> entry = iter1.next();
|
||||
Object key = entry.getKey();
|
||||
Integer val = entry.getValue();
|
||||
assertEquals(cm.get(key), val);
|
||||
entry.setValue(val*100);
|
||||
assertEquals(val*100, (int)cm.get(key));
|
||||
n++;
|
||||
}
|
||||
assertEquals(hm.size(), n);
|
||||
cm.clear();
|
||||
cm.putAll(hm);
|
||||
assertEquals(cm.size(), n);
|
||||
|
||||
CharArrayMap<Integer>.EntryIterator iter2 = cm.entrySet().iterator();
|
||||
n=0;
|
||||
while (iter2.hasNext()) {
|
||||
char[] keyc = iter2.nextKey();
|
||||
Integer val = iter2.currentValue();
|
||||
assertEquals(hm.get(new String(keyc)), val);
|
||||
iter2.setValue(val*100);
|
||||
assertEquals(val*100, (int)cm.get(keyc));
|
||||
n++;
|
||||
}
|
||||
assertEquals(hm.size(), n);
|
||||
|
||||
cm.entrySet().clear();
|
||||
assertEquals(0, cm.size());
|
||||
assertEquals(0, cm.entrySet().size());
|
||||
assertTrue(cm.isEmpty());
|
||||
}
|
||||
|
||||
public void testModifyOnUnmodifiable(){
|
||||
CharArrayMap<Integer> map = new CharArrayMap<Integer>(Version.LUCENE_CURRENT, 2, false);
|
||||
map.put("foo",1);
|
||||
map.put("bar",2);
|
||||
final int size = map.size();
|
||||
assertEquals(2, size);
|
||||
assertTrue(map.containsKey("foo"));
|
||||
assertEquals(1, map.get("foo").intValue());
|
||||
assertTrue(map.containsKey("bar"));
|
||||
assertEquals(2, map.get("bar").intValue());
|
||||
|
||||
map = CharArrayMap.unmodifiableMap(map);
|
||||
assertEquals("Map size changed due to unmodifiableMap call" , size, map.size());
|
||||
String NOT_IN_MAP = "SirGallahad";
|
||||
assertFalse("Test String already exists in map", map.containsKey(NOT_IN_MAP));
|
||||
assertNull("Test String already exists in map", map.get(NOT_IN_MAP));
|
||||
|
||||
try{
|
||||
map.put(NOT_IN_MAP.toCharArray(), 3);
|
||||
fail("Modified unmodifiable map");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
|
||||
assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
|
||||
assertEquals("Size of unmodifiable map has changed", size, map.size());
|
||||
}
|
||||
|
||||
try{
|
||||
map.put(NOT_IN_MAP, 3);
|
||||
fail("Modified unmodifiable map");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
|
||||
assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
|
||||
assertEquals("Size of unmodifiable map has changed", size, map.size());
|
||||
}
|
||||
|
||||
try{
|
||||
map.put(new StringBuilder(NOT_IN_MAP), 3);
|
||||
fail("Modified unmodifiable map");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
|
||||
assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
|
||||
assertEquals("Size of unmodifiable map has changed", size, map.size());
|
||||
}
|
||||
|
||||
try{
|
||||
map.clear();
|
||||
fail("Modified unmodifiable map");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
assertEquals("Size of unmodifiable map has changed", size, map.size());
|
||||
}
|
||||
|
||||
try{
|
||||
map.entrySet().clear();
|
||||
fail("Modified unmodifiable map");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
assertEquals("Size of unmodifiable map has changed", size, map.size());
|
||||
}
|
||||
|
||||
try{
|
||||
map.keySet().clear();
|
||||
fail("Modified unmodifiable map");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
assertEquals("Size of unmodifiable map has changed", size, map.size());
|
||||
}
|
||||
|
||||
try{
|
||||
map.put((Object) NOT_IN_MAP, 3);
|
||||
fail("Modified unmodifiable map");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
|
||||
assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
|
||||
assertEquals("Size of unmodifiable map has changed", size, map.size());
|
||||
}
|
||||
|
||||
try{
|
||||
map.putAll(Collections.singletonMap(NOT_IN_MAP, 3));
|
||||
fail("Modified unmodifiable map");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
|
||||
assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
|
||||
assertEquals("Size of unmodifiable map has changed", size, map.size());
|
||||
}
|
||||
|
||||
assertTrue(map.containsKey("foo"));
|
||||
assertEquals(1, map.get("foo").intValue());
|
||||
assertTrue(map.containsKey("bar"));
|
||||
assertEquals(2, map.get("bar").intValue());
|
||||
}
|
||||
|
||||
public void testToString() {
|
||||
CharArrayMap<Integer> cm = new CharArrayMap<Integer>(Version.LUCENE_CURRENT, Collections.singletonMap("test",1), false);
|
||||
assertEquals("[test]",cm.keySet().toString());
|
||||
assertEquals("[1]",cm.values().toString());
|
||||
assertEquals("[test=1]",cm.entrySet().toString());
|
||||
assertEquals("{test=1}",cm.toString());
|
||||
cm.put("test2", 2);
|
||||
assertTrue(cm.keySet().toString().contains(", "));
|
||||
assertTrue(cm.values().toString().contains(", "));
|
||||
assertTrue(cm.entrySet().toString().contains(", "));
|
||||
assertTrue(cm.toString().contains(", "));
|
||||
}
|
||||
}
|
||||
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
@ -92,7 +93,7 @@ public class TestCharArraySet extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testModifyOnUnmodifiable(){
|
||||
CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10,true);
|
||||
CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10, true);
|
||||
set.addAll(Arrays.asList(TEST_STOP_WORDS));
|
||||
final int size = set.size();
|
||||
set = CharArraySet.unmodifiableSet(set);
|
||||
|
@ -143,8 +144,12 @@ public class TestCharArraySet extends LuceneTestCase {
|
|||
assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
|
||||
assertEquals("Size of unmodifiable set has changed", size, set.size());
|
||||
}
|
||||
|
||||
// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
|
||||
// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
|
||||
// remove() on the iterator
|
||||
try{
|
||||
set.removeAll(Arrays.asList(TEST_STOP_WORDS));
|
||||
set.removeAll(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(TEST_STOP_WORDS), true));
|
||||
fail("Modified unmodifiable set");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
|
@ -152,7 +157,7 @@ public class TestCharArraySet extends LuceneTestCase {
|
|||
}
|
||||
|
||||
try{
|
||||
set.retainAll(Arrays.asList(new String[]{NOT_IN_SET}));
|
||||
set.retainAll(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(NOT_IN_SET), true));
|
||||
fail("Modified unmodifiable set");
|
||||
}catch (UnsupportedOperationException e) {
|
||||
// expected
|
||||
|
@ -491,4 +496,11 @@ public class TestCharArraySet extends LuceneTestCase {
|
|||
fail("null value must raise NPE");
|
||||
} catch (NullPointerException e) {}
|
||||
}
|
||||
|
||||
public void testToString() {
|
||||
CharArraySet set = CharArraySet.copy(Version.LUCENE_CURRENT, Collections.singleton("test"));
|
||||
assertEquals("[test]", set.toString());
|
||||
set.add("test2");
|
||||
assertTrue(set.toString().contains(", "));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue