mirror of https://github.com/apache/lucene.git
Replace Map<Character> by CharObjectHashMap and Set<Character> by CharHashSet. (#13420)
Also optimize the character replacement in JapaneseKatakanaUppercaseFilter.
This commit is contained in:
parent
444d4e7c42
commit
6e2a8fc9b7
|
@ -18,13 +18,13 @@ package org.apache.lucene.analysis.charfilter;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.Map;
|
|
||||||
import org.apache.lucene.analysis.CharFilter; // javadocs
|
import org.apache.lucene.analysis.CharFilter; // javadocs
|
||||||
import org.apache.lucene.analysis.util.RollingCharBuffer;
|
import org.apache.lucene.analysis.util.RollingCharBuffer;
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.fst.CharSequenceOutputs;
|
import org.apache.lucene.util.fst.CharSequenceOutputs;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.Outputs;
|
import org.apache.lucene.util.fst.Outputs;
|
||||||
|
import org.apache.lucene.util.hppc.CharObjectHashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simplistic {@link CharFilter} that applies the mappings contained in a {@link NormalizeCharMap}
|
* Simplistic {@link CharFilter} that applies the mappings contained in a {@link NormalizeCharMap}
|
||||||
|
@ -38,7 +38,7 @@ public class MappingCharFilter extends BaseCharFilter {
|
||||||
private final FST.BytesReader fstReader;
|
private final FST.BytesReader fstReader;
|
||||||
private final RollingCharBuffer buffer = new RollingCharBuffer();
|
private final RollingCharBuffer buffer = new RollingCharBuffer();
|
||||||
private final FST.Arc<CharsRef> scratchArc = new FST.Arc<>();
|
private final FST.Arc<CharsRef> scratchArc = new FST.Arc<>();
|
||||||
private final Map<Character, FST.Arc<CharsRef>> cachedRootArcs;
|
private final CharObjectHashMap<FST.Arc<CharsRef>> cachedRootArcs;
|
||||||
|
|
||||||
private CharsRef replacement;
|
private CharsRef replacement;
|
||||||
private int replacementPointer;
|
private int replacementPointer;
|
||||||
|
@ -96,7 +96,7 @@ public class MappingCharFilter extends BaseCharFilter {
|
||||||
|
|
||||||
final int firstCH = buffer.get(inputOff);
|
final int firstCH = buffer.get(inputOff);
|
||||||
if (firstCH != -1) {
|
if (firstCH != -1) {
|
||||||
FST.Arc<CharsRef> arc = cachedRootArcs.get(Character.valueOf((char) firstCH));
|
FST.Arc<CharsRef> arc = cachedRootArcs.get((char) firstCH);
|
||||||
if (arc != null) {
|
if (arc != null) {
|
||||||
if (!FST.targetHasArcs(arc)) {
|
if (!FST.targetHasArcs(arc)) {
|
||||||
// Fast pass for single character match:
|
// Fast pass for single character match:
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
package org.apache.lucene.analysis.charfilter;
|
package org.apache.lucene.analysis.charfilter;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
|
@ -27,6 +26,7 @@ import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.FSTCompiler;
|
import org.apache.lucene.util.fst.FSTCompiler;
|
||||||
import org.apache.lucene.util.fst.Outputs;
|
import org.apache.lucene.util.fst.Outputs;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
import org.apache.lucene.util.hppc.CharObjectHashMap;
|
||||||
|
|
||||||
// TODO: save/load?
|
// TODO: save/load?
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ import org.apache.lucene.util.fst.Util;
|
||||||
public class NormalizeCharMap {
|
public class NormalizeCharMap {
|
||||||
|
|
||||||
final FST<CharsRef> map;
|
final FST<CharsRef> map;
|
||||||
final Map<Character, FST.Arc<CharsRef>> cachedRootArcs = new HashMap<>();
|
final CharObjectHashMap<FST.Arc<CharsRef>> cachedRootArcs = new CharObjectHashMap<>();
|
||||||
|
|
||||||
// Use the builder to create:
|
// Use the builder to create:
|
||||||
private NormalizeCharMap(FST<CharsRef> map) {
|
private NormalizeCharMap(FST<CharsRef> map) {
|
||||||
|
@ -53,8 +53,7 @@ public class NormalizeCharMap {
|
||||||
while (true) {
|
while (true) {
|
||||||
assert scratchArc.label() != FST.END_LABEL;
|
assert scratchArc.label() != FST.END_LABEL;
|
||||||
cachedRootArcs.put(
|
cachedRootArcs.put(
|
||||||
Character.valueOf((char) scratchArc.label()),
|
(char) scratchArc.label(), new FST.Arc<CharsRef>().copyFrom(scratchArc));
|
||||||
new FST.Arc<CharsRef>().copyFrom(scratchArc));
|
|
||||||
if (scratchArc.isLast()) {
|
if (scratchArc.isLast()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,6 @@ import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -49,7 +48,6 @@ import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.lucene.analysis.hunspell.SortingStrategy.EntryAccumulator;
|
import org.apache.lucene.analysis.hunspell.SortingStrategy.EntryAccumulator;
|
||||||
import org.apache.lucene.analysis.hunspell.SortingStrategy.EntrySupplier;
|
import org.apache.lucene.analysis.hunspell.SortingStrategy.EntrySupplier;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -60,6 +58,7 @@ import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.FSTCompiler;
|
import org.apache.lucene.util.fst.FSTCompiler;
|
||||||
import org.apache.lucene.util.fst.IntSequenceOutputs;
|
import org.apache.lucene.util.fst.IntSequenceOutputs;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
import org.apache.lucene.util.hppc.CharHashSet;
|
||||||
import org.apache.lucene.util.hppc.IntArrayList;
|
import org.apache.lucene.util.hppc.IntArrayList;
|
||||||
import org.apache.lucene.util.hppc.IntCursor;
|
import org.apache.lucene.util.hppc.IntCursor;
|
||||||
|
|
||||||
|
@ -334,8 +333,8 @@ public class Dictionary {
|
||||||
throws IOException, ParseException {
|
throws IOException, ParseException {
|
||||||
TreeMap<String, IntArrayList> prefixes = new TreeMap<>();
|
TreeMap<String, IntArrayList> prefixes = new TreeMap<>();
|
||||||
TreeMap<String, IntArrayList> suffixes = new TreeMap<>();
|
TreeMap<String, IntArrayList> suffixes = new TreeMap<>();
|
||||||
Set<Character> prefixContFlags = new HashSet<>();
|
CharHashSet prefixContFlags = new CharHashSet();
|
||||||
Set<Character> suffixContFlags = new HashSet<>();
|
CharHashSet suffixContFlags = new CharHashSet();
|
||||||
Map<String, Integer> seenPatterns = new HashMap<>();
|
Map<String, Integer> seenPatterns = new HashMap<>();
|
||||||
|
|
||||||
// zero condition -> 0 ord
|
// zero condition -> 0 ord
|
||||||
|
@ -673,7 +672,7 @@ public class Dictionary {
|
||||||
*/
|
*/
|
||||||
private void parseAffix(
|
private void parseAffix(
|
||||||
TreeMap<String, IntArrayList> affixes,
|
TreeMap<String, IntArrayList> affixes,
|
||||||
Set<Character> secondStageFlags,
|
CharHashSet secondStageFlags,
|
||||||
String header,
|
String header,
|
||||||
LineNumberReader reader,
|
LineNumberReader reader,
|
||||||
AffixKind kind,
|
AffixKind kind,
|
||||||
|
@ -1178,10 +1177,14 @@ public class Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
char[] allNonSuggestibleFlags() {
|
char[] allNonSuggestibleFlags() {
|
||||||
return Dictionary.toSortedCharArray(
|
CharHashSet set = new CharHashSet(5);
|
||||||
Stream.of(HIDDEN_FLAG, noSuggest, forbiddenword, onlyincompound, subStandard)
|
set.add(HIDDEN_FLAG);
|
||||||
.filter(c -> c != FLAG_UNSET)
|
for (char c : new char[] {noSuggest, forbiddenword, onlyincompound, subStandard}) {
|
||||||
.collect(Collectors.toSet()));
|
if (c != FLAG_UNSET) {
|
||||||
|
set.add(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Dictionary.toSortedCharArray(set);
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> readMorphFields(String word, String unparsed) {
|
private List<String> readMorphFields(String word, String unparsed) {
|
||||||
|
@ -1538,12 +1541,8 @@ public class Dictionary {
|
||||||
return reuse;
|
return reuse;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char[] toSortedCharArray(Set<Character> set) {
|
static char[] toSortedCharArray(CharHashSet set) {
|
||||||
char[] chars = new char[set.size()];
|
char[] chars = set.toArray();
|
||||||
int i = 0;
|
|
||||||
for (Character c : set) {
|
|
||||||
chars[i++] = c;
|
|
||||||
}
|
|
||||||
Arrays.sort(chars);
|
Arrays.sort(chars);
|
||||||
return chars;
|
return chars;
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,8 @@ import org.apache.lucene.analysis.hunspell.AffixedWord.Affix;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.IntsRefFSTEnum;
|
import org.apache.lucene.util.fst.IntsRefFSTEnum;
|
||||||
|
import org.apache.lucene.util.hppc.CharHashSet;
|
||||||
|
import org.apache.lucene.util.hppc.CharObjectHashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A utility class used for generating possible word forms by adding affixes to stems ({@link
|
* A utility class used for generating possible word forms by adding affixes to stems ({@link
|
||||||
|
@ -50,7 +52,7 @@ import org.apache.lucene.util.fst.IntsRefFSTEnum;
|
||||||
*/
|
*/
|
||||||
public class WordFormGenerator {
|
public class WordFormGenerator {
|
||||||
private final Dictionary dictionary;
|
private final Dictionary dictionary;
|
||||||
private final Map<Character, List<AffixEntry>> affixes = new HashMap<>();
|
private final CharObjectHashMap<List<AffixEntry>> affixes = new CharObjectHashMap<>();
|
||||||
private final Stemmer stemmer;
|
private final Stemmer stemmer;
|
||||||
|
|
||||||
public WordFormGenerator(Dictionary dictionary) {
|
public WordFormGenerator(Dictionary dictionary) {
|
||||||
|
@ -75,7 +77,15 @@ public class WordFormGenerator {
|
||||||
char flag = dictionary.affixData(id, AFFIX_FLAG);
|
char flag = dictionary.affixData(id, AFFIX_FLAG);
|
||||||
var entry =
|
var entry =
|
||||||
new AffixEntry(id, flag, kind, toString(kind, io.input), strip(id), condition(id));
|
new AffixEntry(id, flag, kind, toString(kind, io.input), strip(id), condition(id));
|
||||||
affixes.computeIfAbsent(flag, __ -> new ArrayList<>()).add(entry);
|
List<AffixEntry> entries;
|
||||||
|
int index = affixes.indexOf(flag);
|
||||||
|
if (index < 0) {
|
||||||
|
entries = new ArrayList<>();
|
||||||
|
affixes.indexInsert(index, flag, entries);
|
||||||
|
} else {
|
||||||
|
entries = affixes.indexGet(index);
|
||||||
|
}
|
||||||
|
entries.add(entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -162,11 +172,7 @@ public class WordFormGenerator {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static char[] deduplicate(char[] flags) {
|
private static char[] deduplicate(char[] flags) {
|
||||||
Set<Character> set = new HashSet<>();
|
return toSortedCharArray(CharHashSet.from(flags));
|
||||||
for (char flag : flags) {
|
|
||||||
set.add(flag);
|
|
||||||
}
|
|
||||||
return toSortedCharArray(set);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -408,7 +414,7 @@ public class WordFormGenerator {
|
||||||
int innerSuffix) {
|
int innerSuffix) {
|
||||||
String candidate = new String(word, offset, length);
|
String candidate = new String(word, offset, length);
|
||||||
stemCounts.merge(candidate, 1, Integer::sum);
|
stemCounts.merge(candidate, 1, Integer::sum);
|
||||||
Set<Character> flags = new LinkedHashSet<>();
|
CharHashSet flags = new CharHashSet();
|
||||||
if (outerPrefix >= 0) flags.add(dictionary.affixData(outerPrefix, AFFIX_FLAG));
|
if (outerPrefix >= 0) flags.add(dictionary.affixData(outerPrefix, AFFIX_FLAG));
|
||||||
if (innerPrefix >= 0) flags.add(dictionary.affixData(innerPrefix, AFFIX_FLAG));
|
if (innerPrefix >= 0) flags.add(dictionary.affixData(innerPrefix, AFFIX_FLAG));
|
||||||
if (outerSuffix >= 0) flags.add(dictionary.affixData(outerSuffix, AFFIX_FLAG));
|
if (outerSuffix >= 0) flags.add(dictionary.affixData(outerSuffix, AFFIX_FLAG));
|
||||||
|
@ -479,7 +485,7 @@ public class WordFormGenerator {
|
||||||
if (wordSet.contains(extra)) continue;
|
if (wordSet.contains(extra)) continue;
|
||||||
|
|
||||||
if (forbidden.contains(extra) && dictionary.forbiddenword != FLAG_UNSET) {
|
if (forbidden.contains(extra) && dictionary.forbiddenword != FLAG_UNSET) {
|
||||||
addEntry(toEdit, toAdd, extra, Set.of(dictionary.forbiddenword));
|
addEntry(toEdit, toAdd, extra, CharHashSet.from(dictionary.forbiddenword));
|
||||||
} else {
|
} else {
|
||||||
extraGenerated.add(extra);
|
extraGenerated.add(extra);
|
||||||
}
|
}
|
||||||
|
@ -489,7 +495,7 @@ public class WordFormGenerator {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addEntry(
|
private void addEntry(
|
||||||
List<DictEntry> toEdit, List<DictEntry> toAdd, String stem, Set<Character> flags) {
|
List<DictEntry> toEdit, List<DictEntry> toAdd, String stem, CharHashSet flags) {
|
||||||
String flagString = toFlagString(flags);
|
String flagString = toFlagString(flags);
|
||||||
(existingStems.contains(stem) ? toEdit : toAdd).add(DictEntry.create(stem, flagString));
|
(existingStems.contains(stem) ? toEdit : toAdd).add(DictEntry.create(stem, flagString));
|
||||||
}
|
}
|
||||||
|
@ -529,18 +535,20 @@ public class WordFormGenerator {
|
||||||
.flatMap(swc -> expansionCache.computeIfAbsent(swc, expandToWords).stream());
|
.flatMap(swc -> expansionCache.computeIfAbsent(swc, expandToWords).stream());
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<AffixedWord> expand(String stem, Set<Character> flagSet) {
|
private List<AffixedWord> expand(String stem, CharHashSet flagSet) {
|
||||||
return getAllWordForms(stem, toFlagString(flagSet), checkCanceled);
|
return getAllWordForms(stem, toFlagString(flagSet), checkCanceled);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String toFlagString(Set<Character> flagSet) {
|
private String toFlagString(CharHashSet flagSet) {
|
||||||
return dictionary.flagParsingStrategy.printFlags(Dictionary.toSortedCharArray(flagSet));
|
return dictionary.flagParsingStrategy.printFlags(Dictionary.toSortedCharArray(flagSet));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private record FlagSet(Set<Character> flags, Dictionary dictionary) {
|
private record FlagSet(CharHashSet flags, Dictionary dictionary) {
|
||||||
static Set<Character> flatten(Set<FlagSet> flagSets) {
|
static CharHashSet flatten(Set<FlagSet> flagSets) {
|
||||||
return flagSets.stream().flatMap(f -> f.flags.stream()).collect(Collectors.toSet());
|
CharHashSet set = new CharHashSet(flagSets.size() << 1);
|
||||||
|
flagSets.forEach(flagSet -> set.addAll(flagSet.flags));
|
||||||
|
return set;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.ja;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import org.apache.lucene.util.hppc.CharObjectHashMap;
|
||||||
|
|
||||||
|
/** Utility methods for Japanese filters. */
|
||||||
|
class JapaneseFilterUtil {
|
||||||
|
|
||||||
|
/** Creates a primitive char-to-char map from a set of {@link java.util.Map.Entry}. */
|
||||||
|
@SafeVarargs
|
||||||
|
static CharObjectHashMap<Character> createCharMap(
|
||||||
|
Map.Entry<Character, Character>... charMappings) {
|
||||||
|
CharObjectHashMap<Character> map = new CharObjectHashMap<>(charMappings.length);
|
||||||
|
for (Map.Entry<Character, Character> charMapping : charMappings) {
|
||||||
|
map.put(charMapping.getKey(), charMapping.getValue());
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
}
|
|
@ -16,11 +16,14 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.ja;
|
package org.apache.lucene.analysis.ja;
|
||||||
|
|
||||||
|
import static org.apache.lucene.analysis.ja.JapaneseFilterUtil.createCharMap;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.util.hppc.CharObjectHashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link TokenFilter} that normalizes small letters (捨て仮名) in hiragana into normal letters. For
|
* A {@link TokenFilter} that normalizes small letters (捨て仮名) in hiragana into normal letters. For
|
||||||
|
@ -30,13 +33,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* legal, contract policies, etc.
|
* legal, contract policies, etc.
|
||||||
*/
|
*/
|
||||||
public final class JapaneseHiraganaUppercaseFilter extends TokenFilter {
|
public final class JapaneseHiraganaUppercaseFilter extends TokenFilter {
|
||||||
private static final Map<Character, Character> LETTER_MAPPINGS;
|
private static final CharObjectHashMap<Character> LETTER_MAPPINGS;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
// supported characters are:
|
// supported characters are:
|
||||||
// ぁ ぃ ぅ ぇ ぉ っ ゃ ゅ ょ ゎ ゕ ゖ
|
// ぁ ぃ ぅ ぇ ぉ っ ゃ ゅ ょ ゎ ゕ ゖ
|
||||||
LETTER_MAPPINGS =
|
LETTER_MAPPINGS =
|
||||||
Map.ofEntries(
|
createCharMap(
|
||||||
Map.entry('ぁ', 'あ'),
|
Map.entry('ぁ', 'あ'),
|
||||||
Map.entry('ぃ', 'い'),
|
Map.entry('ぃ', 'い'),
|
||||||
Map.entry('ぅ', 'う'),
|
Map.entry('ぅ', 'う'),
|
||||||
|
@ -59,17 +62,16 @@ public final class JapaneseHiraganaUppercaseFilter extends TokenFilter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (input.incrementToken()) {
|
if (!input.incrementToken()) {
|
||||||
char[] termBuffer = termAttr.buffer();
|
|
||||||
for (int i = 0; i < termBuffer.length; i++) {
|
|
||||||
Character c = LETTER_MAPPINGS.get(termBuffer[i]);
|
|
||||||
if (c != null) {
|
|
||||||
termBuffer[i] = c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
final char[] termBuffer = termAttr.buffer();
|
||||||
|
for (int i = 0, length = termAttr.length(); i < length; i++) {
|
||||||
|
Character c = LETTER_MAPPINGS.get(termBuffer[i]);
|
||||||
|
if (c != null) {
|
||||||
|
termBuffer[i] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,11 +16,14 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.ja;
|
package org.apache.lucene.analysis.ja;
|
||||||
|
|
||||||
|
import static org.apache.lucene.analysis.ja.JapaneseFilterUtil.createCharMap;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.util.hppc.CharObjectHashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link TokenFilter} that normalizes small letters (捨て仮名) in katakana into normal letters. For
|
* A {@link TokenFilter} that normalizes small letters (捨て仮名) in katakana into normal letters. For
|
||||||
|
@ -30,13 +33,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* legal, contract policies, etc.
|
* legal, contract policies, etc.
|
||||||
*/
|
*/
|
||||||
public final class JapaneseKatakanaUppercaseFilter extends TokenFilter {
|
public final class JapaneseKatakanaUppercaseFilter extends TokenFilter {
|
||||||
private static final Map<Character, Character> LETTER_MAPPINGS;
|
private static final CharObjectHashMap<Character> LETTER_MAPPINGS;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
// supported characters are:
|
// supported characters are:
|
||||||
// ァ ィ ゥ ェ ォ ヵ ㇰ ヶ ㇱ ㇲ ッ ㇳ ㇴ ㇵ ㇶ ㇷ ㇷ゚ ㇸ ㇹ ㇺ ャ ュ ョ ㇻ ㇼ ㇽ ㇾ ㇿ ヮ
|
// ァ ィ ゥ ェ ォ ヵ ㇰ ヶ ㇱ ㇲ ッ ㇳ ㇴ ㇵ ㇶ ㇷ ㇷ゚ ㇸ ㇹ ㇺ ャ ュ ョ ㇻ ㇼ ㇽ ㇾ ㇿ ヮ
|
||||||
LETTER_MAPPINGS =
|
LETTER_MAPPINGS =
|
||||||
Map.ofEntries(
|
createCharMap(
|
||||||
Map.entry('ァ', 'ア'),
|
Map.entry('ァ', 'ア'),
|
||||||
Map.entry('ィ', 'イ'),
|
Map.entry('ィ', 'イ'),
|
||||||
Map.entry('ゥ', 'ウ'),
|
Map.entry('ゥ', 'ウ'),
|
||||||
|
@ -75,22 +78,24 @@ public final class JapaneseKatakanaUppercaseFilter extends TokenFilter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (input.incrementToken()) {
|
if (!input.incrementToken()) {
|
||||||
String term = termAttr.toString();
|
|
||||||
if (term.contains("ㇷ゚")) {
|
|
||||||
term = term.replace("ㇷ゚", "プ");
|
|
||||||
termAttr.setEmpty().append(term);
|
|
||||||
}
|
|
||||||
char[] termBuffer = termAttr.buffer();
|
|
||||||
for (int i = 0; i < termBuffer.length; i++) {
|
|
||||||
Character c = LETTER_MAPPINGS.get(termBuffer[i]);
|
|
||||||
if (c != null) {
|
|
||||||
termBuffer[i] = c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
final char[] termBuffer = termAttr.buffer();
|
||||||
|
int newLength = termAttr.length();
|
||||||
|
for (int from = 0, to = 0, length = newLength; from < length; from++, to++) {
|
||||||
|
char c = termBuffer[from];
|
||||||
|
if (c == 'ㇷ' && from + 1 < length && termBuffer[from + 1] == '゚') {
|
||||||
|
// ㇷ゚detected, replace it by プ.
|
||||||
|
termBuffer[to] = 'プ';
|
||||||
|
from++;
|
||||||
|
newLength--;
|
||||||
|
} else {
|
||||||
|
Character mappedChar = LETTER_MAPPINGS.get(c);
|
||||||
|
termBuffer[to] = mappedChar == null ? c : mappedChar;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
termAttr.setLength(newLength);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,6 +65,7 @@ public class TestJapaneseKatakanaUppercaseFilter extends BaseTokenStreamTestCase
|
||||||
new String[] {"アイウエオカクケシスツトヌハヒフプヘホムヤユヨラリルレロワ"});
|
new String[] {"アイウエオカクケシスツトヌハヒフプヘホムヤユヨラリルレロワ"});
|
||||||
assertAnalyzesTo(keywordAnalyzer, "ストップウォッチ", new String[] {"ストツプウオツチ"});
|
assertAnalyzesTo(keywordAnalyzer, "ストップウォッチ", new String[] {"ストツプウオツチ"});
|
||||||
assertAnalyzesTo(keywordAnalyzer, "サラニㇷ゚ カムイチェㇷ゚ ㇷ゚ㇷ゚", new String[] {"サラニプ", "カムイチエプ", "ププ"});
|
assertAnalyzesTo(keywordAnalyzer, "サラニㇷ゚ カムイチェㇷ゚ ㇷ゚ㇷ゚", new String[] {"サラニプ", "カムイチエプ", "ププ"});
|
||||||
|
assertAnalyzesTo(keywordAnalyzer, "カムイチェㇷ゚カムイチェ", new String[] {"カムイチエプカムイチエ"});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testKanaUppercaseWithSurrogatePair() throws IOException {
|
public void testKanaUppercaseWithSurrogatePair() throws IOException {
|
||||||
|
|
|
@ -58,6 +58,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.lucene.util.hppc.ObjectCursor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Gener object helps in the discarding of nodes which break the reduction effort and defend the
|
* The Gener object helps in the discarding of nodes which break the reduction effort and defend the
|
||||||
|
@ -103,8 +104,8 @@ public class Gener extends Reduce {
|
||||||
*/
|
*/
|
||||||
public boolean eat(Row in, int[] remap) {
|
public boolean eat(Row in, int[] remap) {
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
for (Iterator<Cell> i = in.cells.values().iterator(); i.hasNext(); ) {
|
for (Iterator<ObjectCursor<Cell>> i = in.cells.values().iterator(); i.hasNext(); ) {
|
||||||
Cell c = i.next();
|
Cell c = i.next().value;
|
||||||
sum += c.cnt;
|
sum += c.cnt;
|
||||||
if (c.ref >= 0) {
|
if (c.ref >= 0) {
|
||||||
if (remap[c.ref] == 0) {
|
if (remap[c.ref] == 0) {
|
||||||
|
@ -114,8 +115,8 @@ public class Gener extends Reduce {
|
||||||
}
|
}
|
||||||
int frame = sum / 10;
|
int frame = sum / 10;
|
||||||
boolean live = false;
|
boolean live = false;
|
||||||
for (Iterator<Cell> i = in.cells.values().iterator(); i.hasNext(); ) {
|
for (Iterator<ObjectCursor<Cell>> i = in.cells.values().iterator(); i.hasNext(); ) {
|
||||||
Cell c = i.next();
|
Cell c = i.next().value;
|
||||||
if (c.cnt < frame && c.cmd >= 0) {
|
if (c.cnt < frame && c.cmd >= 0) {
|
||||||
c.cnt = 0;
|
c.cnt = 0;
|
||||||
c.cmd = -1;
|
c.cmd = -1;
|
||||||
|
|
|
@ -58,6 +58,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.lucene.util.hppc.ObjectCursor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Lift class is a data structure that is a variation of a Patricia trie.
|
* The Lift class is a data structure that is a variation of a Patricia trie.
|
||||||
|
@ -111,9 +112,9 @@ public class Lift extends Reduce {
|
||||||
* @param nodes contains the patch commands
|
* @param nodes contains the patch commands
|
||||||
*/
|
*/
|
||||||
public void liftUp(Row in, List<Row> nodes) {
|
public void liftUp(Row in, List<Row> nodes) {
|
||||||
Iterator<Cell> i = in.cells.values().iterator();
|
Iterator<ObjectCursor<Cell>> i = in.cells.values().iterator();
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Cell c = i.next();
|
Cell c = i.next().value;
|
||||||
if (c.ref >= 0) {
|
if (c.ref >= 0) {
|
||||||
Row to = nodes.get(c.ref);
|
Row to = nodes.get(c.ref);
|
||||||
int sum = to.uniformCmd(changeSkip);
|
int sum = to.uniformCmd(changeSkip);
|
||||||
|
|
|
@ -58,6 +58,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.lucene.util.hppc.CharCursor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Optimizer class is a Trie that will be reduced (have empty rows removed).
|
* The Optimizer class is a Trie that will be reduced (have empty rows removed).
|
||||||
|
@ -116,10 +117,10 @@ public class Optimizer extends Reduce {
|
||||||
* @return the resulting Row, or <code>null</code> if the operation cannot be realized
|
* @return the resulting Row, or <code>null</code> if the operation cannot be realized
|
||||||
*/
|
*/
|
||||||
public Row merge(Row master, Row existing) {
|
public Row merge(Row master, Row existing) {
|
||||||
Iterator<Character> i = master.cells.keySet().iterator();
|
Iterator<CharCursor> i = master.cells.keys().iterator();
|
||||||
Row n = new Row();
|
Row n = new Row();
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Character ch = i.next();
|
char ch = i.next().value;
|
||||||
// XXX also must handle Cnt and Skip !!
|
// XXX also must handle Cnt and Skip !!
|
||||||
Cell a = master.cells.get(ch);
|
Cell a = master.cells.get(ch);
|
||||||
Cell b = existing.cells.get(ch);
|
Cell b = existing.cells.get(ch);
|
||||||
|
@ -130,9 +131,9 @@ public class Optimizer extends Reduce {
|
||||||
}
|
}
|
||||||
n.cells.put(ch, s);
|
n.cells.put(ch, s);
|
||||||
}
|
}
|
||||||
i = existing.cells.keySet().iterator();
|
i = existing.cells.keys().iterator();
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Character ch = i.next();
|
char ch = i.next().value;
|
||||||
if (master.at(ch) != null) {
|
if (master.at(ch) != null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,8 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.lucene.util.hppc.CharCursor;
|
||||||
|
import org.apache.lucene.util.hppc.ObjectCursor;
|
||||||
|
|
||||||
/** The Reduce object is used to remove gaps in a Trie which stores a dictionary. */
|
/** The Reduce object is used to remove gaps in a Trie which stores a dictionary. */
|
||||||
public class Reduce {
|
public class Reduce {
|
||||||
|
@ -88,9 +90,9 @@ public class Reduce {
|
||||||
|
|
||||||
Row now = old.get(ind);
|
Row now = old.get(ind);
|
||||||
to.add(now);
|
to.add(now);
|
||||||
Iterator<Cell> i = now.cells.values().iterator();
|
Iterator<ObjectCursor<Cell>> i = now.cells.values().iterator();
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Cell c = i.next();
|
Cell c = i.next().value;
|
||||||
if (c.ref >= 0 && remap[c.ref] < 0) {
|
if (c.ref >= 0 && remap[c.ref] < 0) {
|
||||||
removeGaps(c.ref, old, to, remap);
|
removeGaps(c.ref, old, to, remap);
|
||||||
}
|
}
|
||||||
|
@ -109,9 +111,9 @@ public class Reduce {
|
||||||
*/
|
*/
|
||||||
public Remap(Row old, int[] remap) {
|
public Remap(Row old, int[] remap) {
|
||||||
super();
|
super();
|
||||||
Iterator<Character> i = old.cells.keySet().iterator();
|
Iterator<CharCursor> i = old.cells.keys().iterator();
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Character ch = i.next();
|
char ch = i.next().value;
|
||||||
Cell c = old.at(ch);
|
Cell c = old.at(ch);
|
||||||
Cell nc;
|
Cell nc;
|
||||||
if (c.ref >= 0) {
|
if (c.ref >= 0) {
|
||||||
|
|
|
@ -59,11 +59,13 @@ import java.io.DataOutput;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.TreeMap;
|
import org.apache.lucene.util.hppc.CharCursor;
|
||||||
|
import org.apache.lucene.util.hppc.CharObjectHashMap;
|
||||||
|
import org.apache.lucene.util.hppc.ObjectCursor;
|
||||||
|
|
||||||
/** The Row class represents a row in a matrix representation of a trie. */
|
/** The Row class represents a row in a matrix representation of a trie. */
|
||||||
public class Row {
|
public class Row {
|
||||||
TreeMap<Character, Cell> cells = new TreeMap<>();
|
CharObjectHashMap<Cell> cells = new CharObjectHashMap<>();
|
||||||
int uniformCnt = 0;
|
int uniformCnt = 0;
|
||||||
int uniformSkip = 0;
|
int uniformSkip = 0;
|
||||||
|
|
||||||
|
@ -98,12 +100,12 @@ public class Row {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the command in the Cell of the given Character to the given integer.
|
* Set the command in the Cell of the given character to the given integer.
|
||||||
*
|
*
|
||||||
* @param way the Character defining the Cell
|
* @param way the character defining the Cell
|
||||||
* @param cmd the new command
|
* @param cmd the new command
|
||||||
*/
|
*/
|
||||||
public void setCmd(Character way, int cmd) {
|
public void setCmd(char way, int cmd) {
|
||||||
Cell c = at(way);
|
Cell c = at(way);
|
||||||
if (c == null) {
|
if (c == null) {
|
||||||
c = new Cell();
|
c = new Cell();
|
||||||
|
@ -116,12 +118,12 @@ public class Row {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the reference to the next row in the Cell of the given Character to the given integer.
|
* Set the reference to the next row in the Cell of the given character to the given integer.
|
||||||
*
|
*
|
||||||
* @param way the Character defining the Cell
|
* @param way the character defining the Cell
|
||||||
* @param ref The new ref value
|
* @param ref The new ref value
|
||||||
*/
|
*/
|
||||||
public void setRef(Character way, int ref) {
|
public void setRef(char way, int ref) {
|
||||||
Cell c = at(way);
|
Cell c = at(way);
|
||||||
if (c == null) {
|
if (c == null) {
|
||||||
c = new Cell();
|
c = new Cell();
|
||||||
|
@ -138,10 +140,10 @@ public class Row {
|
||||||
* @return the number of cells in use
|
* @return the number of cells in use
|
||||||
*/
|
*/
|
||||||
public int getCells() {
|
public int getCells() {
|
||||||
Iterator<Character> i = cells.keySet().iterator();
|
Iterator<CharCursor> i = cells.keys().iterator();
|
||||||
int size = 0;
|
int size = 0;
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Character c = i.next();
|
char c = i.next().value;
|
||||||
Cell e = at(c);
|
Cell e = at(c);
|
||||||
if (e.cmd >= 0 || e.ref >= 0) {
|
if (e.cmd >= 0 || e.ref >= 0) {
|
||||||
size++;
|
size++;
|
||||||
|
@ -156,10 +158,10 @@ public class Row {
|
||||||
* @return the number of references
|
* @return the number of references
|
||||||
*/
|
*/
|
||||||
public int getCellsPnt() {
|
public int getCellsPnt() {
|
||||||
Iterator<Character> i = cells.keySet().iterator();
|
Iterator<CharCursor> i = cells.keys().iterator();
|
||||||
int size = 0;
|
int size = 0;
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Character c = i.next();
|
char c = i.next().value;
|
||||||
Cell e = at(c);
|
Cell e = at(c);
|
||||||
if (e.ref >= 0) {
|
if (e.ref >= 0) {
|
||||||
size++;
|
size++;
|
||||||
|
@ -174,10 +176,10 @@ public class Row {
|
||||||
* @return the number of patch commands
|
* @return the number of patch commands
|
||||||
*/
|
*/
|
||||||
public int getCellsVal() {
|
public int getCellsVal() {
|
||||||
Iterator<Character> i = cells.keySet().iterator();
|
Iterator<CharCursor> i = cells.keys().iterator();
|
||||||
int size = 0;
|
int size = 0;
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Character c = i.next();
|
char c = i.next().value;
|
||||||
Cell e = at(c);
|
Cell e = at(c);
|
||||||
if (e.cmd >= 0) {
|
if (e.cmd >= 0) {
|
||||||
size++;
|
size++;
|
||||||
|
@ -187,35 +189,35 @@ public class Row {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the command in the Cell associated with the given Character.
|
* Return the command in the Cell associated with the given character.
|
||||||
*
|
*
|
||||||
* @param way the Character associated with the Cell holding the desired command
|
* @param way the character associated with the Cell holding the desired command
|
||||||
* @return the command
|
* @return the command
|
||||||
*/
|
*/
|
||||||
public int getCmd(Character way) {
|
public int getCmd(char way) {
|
||||||
Cell c = at(way);
|
Cell c = at(way);
|
||||||
return (c == null) ? -1 : c.cmd;
|
return (c == null) ? -1 : c.cmd;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the number of patch commands were in the Cell associated with the given Character before
|
* Return the number of patch commands were in the Cell associated with the given character before
|
||||||
* the Trie containing this Row was reduced.
|
* the Trie containing this Row was reduced.
|
||||||
*
|
*
|
||||||
* @param way the Character associated with the desired Cell
|
* @param way the character associated with the desired Cell
|
||||||
* @return the number of patch commands before reduction
|
* @return the number of patch commands before reduction
|
||||||
*/
|
*/
|
||||||
public int getCnt(Character way) {
|
public int getCnt(char way) {
|
||||||
Cell c = at(way);
|
Cell c = at(way);
|
||||||
return (c == null) ? -1 : c.cnt;
|
return (c == null) ? -1 : c.cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the reference to the next Row in the Cell associated with the given Character.
|
* Return the reference to the next Row in the Cell associated with the given character.
|
||||||
*
|
*
|
||||||
* @param way the Character associated with the desired Cell
|
* @param way the character associated with the desired Cell
|
||||||
* @return the reference, or -1 if the Cell is <code>null</code>
|
* @return the reference, or -1 if the Cell is <code>null</code>
|
||||||
*/
|
*/
|
||||||
public int getRef(Character way) {
|
public int getRef(char way) {
|
||||||
Cell c = at(way);
|
Cell c = at(way);
|
||||||
return (c == null) ? -1 : c.ref;
|
return (c == null) ? -1 : c.ref;
|
||||||
}
|
}
|
||||||
|
@ -228,15 +230,15 @@ public class Row {
|
||||||
*/
|
*/
|
||||||
public void store(DataOutput os) throws IOException {
|
public void store(DataOutput os) throws IOException {
|
||||||
os.writeInt(cells.size());
|
os.writeInt(cells.size());
|
||||||
Iterator<Character> i = cells.keySet().iterator();
|
Iterator<CharCursor> i = cells.keys().iterator();
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Character c = i.next();
|
char c = i.next().value;
|
||||||
Cell e = at(c);
|
Cell e = at(c);
|
||||||
if (e.cmd < 0 && e.ref < 0) {
|
if (e.cmd < 0 && e.ref < 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
os.writeChar(c.charValue());
|
os.writeChar(c);
|
||||||
os.writeInt(e.cmd);
|
os.writeInt(e.cmd);
|
||||||
os.writeInt(e.cnt);
|
os.writeInt(e.cnt);
|
||||||
os.writeInt(e.ref);
|
os.writeInt(e.ref);
|
||||||
|
@ -251,12 +253,12 @@ public class Row {
|
||||||
* @return the number of identical Cells, or -1 if there are (at least) two different cells
|
* @return the number of identical Cells, or -1 if there are (at least) two different cells
|
||||||
*/
|
*/
|
||||||
public int uniformCmd(boolean eqSkip) {
|
public int uniformCmd(boolean eqSkip) {
|
||||||
Iterator<Cell> i = cells.values().iterator();
|
Iterator<ObjectCursor<Cell>> i = cells.values().iterator();
|
||||||
int ret = -1;
|
int ret = -1;
|
||||||
uniformCnt = 1;
|
uniformCnt = 1;
|
||||||
uniformSkip = 0;
|
uniformSkip = 0;
|
||||||
for (; i.hasNext(); ) {
|
for (; i.hasNext(); ) {
|
||||||
Cell c = i.next();
|
Cell c = i.next().value;
|
||||||
if (c.ref >= 0) {
|
if (c.ref >= 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -284,15 +286,15 @@ public class Row {
|
||||||
|
|
||||||
/** Write the contents of this Row to the printstream. */
|
/** Write the contents of this Row to the printstream. */
|
||||||
public void print(PrintStream out) {
|
public void print(PrintStream out) {
|
||||||
for (Iterator<Character> i = cells.keySet().iterator(); i.hasNext(); ) {
|
for (Iterator<CharCursor> i = cells.keys().iterator(); i.hasNext(); ) {
|
||||||
Character ch = i.next();
|
char ch = i.next().value;
|
||||||
Cell c = at(ch);
|
Cell c = at(ch);
|
||||||
out.print("[" + ch + ":" + c + "]");
|
out.print("[" + ch + ":" + c + "]");
|
||||||
}
|
}
|
||||||
out.println();
|
out.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
Cell at(Character index) {
|
Cell at(char index) {
|
||||||
return cells.get(index);
|
return cells.get(index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,7 +134,7 @@ public class Trie {
|
||||||
boolean br = false;
|
boolean br = false;
|
||||||
|
|
||||||
for (int i = 0; i < key.length() - 1; i++) {
|
for (int i = 0; i < key.length() - 1; i++) {
|
||||||
Character ch = e.next();
|
char ch = e.next();
|
||||||
w = now.getCmd(ch);
|
w = now.getCmd(ch);
|
||||||
if (w >= 0) {
|
if (w >= 0) {
|
||||||
int n = w;
|
int n = w;
|
||||||
|
@ -227,7 +227,7 @@ public class Trie {
|
||||||
Cell c;
|
Cell c;
|
||||||
int cmd = -1;
|
int cmd = -1;
|
||||||
StrEnum e = new StrEnum(key, forward);
|
StrEnum e = new StrEnum(key, forward);
|
||||||
Character ch = null;
|
char ch;
|
||||||
|
|
||||||
for (int i = 0; i < key.length(); ) {
|
for (int i = 0; i < key.length(); ) {
|
||||||
ch = e.next();
|
ch = e.next();
|
||||||
|
@ -272,7 +272,7 @@ public class Trie {
|
||||||
StrEnum e = new StrEnum(key, forward);
|
StrEnum e = new StrEnum(key, forward);
|
||||||
|
|
||||||
for (int i = 0; i < key.length() - 1; i++) {
|
for (int i = 0; i < key.length() - 1; i++) {
|
||||||
Character ch = e.next();
|
char ch = e.next();
|
||||||
w = now.getCmd(ch);
|
w = now.getCmd(ch);
|
||||||
if (w >= 0) {
|
if (w >= 0) {
|
||||||
last = cmds.get(w);
|
last = cmds.get(w);
|
||||||
|
@ -343,7 +343,7 @@ public class Trie {
|
||||||
StrEnum e = new StrEnum(key, forward);
|
StrEnum e = new StrEnum(key, forward);
|
||||||
|
|
||||||
for (int i = 0; i < e.length() - 1; i++) {
|
for (int i = 0; i < e.length() - 1; i++) {
|
||||||
Character ch = e.next();
|
char ch = e.next();
|
||||||
node = r.getRef(ch);
|
node = r.getRef(ch);
|
||||||
if (node >= 0) {
|
if (node >= 0) {
|
||||||
r = getRow(node);
|
r = getRow(node);
|
||||||
|
|
|
@ -281,8 +281,8 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
|
||||||
private static List<String> sortFieldNames(
|
private static List<String> sortFieldNames(
|
||||||
IntObjectHashMap<FieldReader> fieldMap, FieldInfos fieldInfos) {
|
IntObjectHashMap<FieldReader> fieldMap, FieldInfos fieldInfos) {
|
||||||
List<String> fieldNames = new ArrayList<>(fieldMap.size());
|
List<String> fieldNames = new ArrayList<>(fieldMap.size());
|
||||||
for (IntCursor fieldNumberCursor : fieldMap.keys()) {
|
for (IntCursor fieldNumber : fieldMap.keys()) {
|
||||||
fieldNames.add(fieldInfos.fieldInfo(fieldNumberCursor.value).name);
|
fieldNames.add(fieldInfos.fieldInfo(fieldNumber.value).name);
|
||||||
}
|
}
|
||||||
fieldNames.sort(null);
|
fieldNames.sort(null);
|
||||||
return Collections.unmodifiableList(fieldNames);
|
return Collections.unmodifiableList(fieldNames);
|
||||||
|
|
|
@ -95,8 +95,8 @@ final class StateSet extends IntSet {
|
||||||
}
|
}
|
||||||
arrayCache = new int[inner.size()];
|
arrayCache = new int[inner.size()];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (IntCursor cursor : inner.keys()) {
|
for (IntCursor key : inner.keys()) {
|
||||||
arrayCache[i++] = cursor.value;
|
arrayCache[i++] = key.value;
|
||||||
}
|
}
|
||||||
// we need to sort this array since "equals" method depend on this
|
// we need to sort this array since "equals" method depend on this
|
||||||
Arrays.sort(arrayCache);
|
Arrays.sort(arrayCache);
|
||||||
|
@ -115,8 +115,8 @@ final class StateSet extends IntSet {
|
||||||
return hashCode;
|
return hashCode;
|
||||||
}
|
}
|
||||||
hashCode = inner.size();
|
hashCode = inner.size();
|
||||||
for (IntCursor cursor : inner.keys()) {
|
for (IntCursor key : inner.keys()) {
|
||||||
hashCode += BitMixer.mix(cursor.value);
|
hashCode += BitMixer.mix(key.value);
|
||||||
}
|
}
|
||||||
hashUpdated = true;
|
hashUpdated = true;
|
||||||
return hashCode;
|
return hashCode;
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.util.hppc;
|
||||||
|
|
||||||
|
/** Forked from HPPC, holding int index and char value */
|
||||||
|
public final class CharCursor {
|
||||||
|
/**
|
||||||
|
* The current value's index in the container this cursor belongs to. The meaning of this index is
|
||||||
|
* defined by the container (usually it will be an index in the underlying storage buffer).
|
||||||
|
*/
|
||||||
|
public int index;
|
||||||
|
|
||||||
|
/** The current value. */
|
||||||
|
public char value;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "[cursor, index: " + index + ", value: " + value + "]";
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,693 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.util.hppc;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_EXPECTED_ELEMENTS;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_LOAD_FACTOR;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.ITERATION_SEED;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.MAX_LOAD_FACTOR;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.MIN_LOAD_FACTOR;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.checkLoadFactor;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.expandAtCount;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.iterationIncrement;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.minBufferSize;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.nextBufferSize;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import org.apache.lucene.util.Accountable;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A hash set of <code>char</code>s, implemented using open addressing with linear probing for
|
||||||
|
* collision resolution.
|
||||||
|
*
|
||||||
|
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.CharHashSet
|
||||||
|
*
|
||||||
|
* <p>github: https://github.com/carrotsearch/hppc release 0.9.0
|
||||||
|
*/
|
||||||
|
public class CharHashSet implements Iterable<CharCursor>, Accountable, Cloneable {
|
||||||
|
|
||||||
|
private static final long BASE_RAM_BYTES_USED =
|
||||||
|
RamUsageEstimator.shallowSizeOfInstance(CharHashSet.class);
|
||||||
|
|
||||||
|
private static final char EMPTY_KEY = (char) 0;
|
||||||
|
|
||||||
|
/** The hash array holding keys. */
|
||||||
|
public char[] keys;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of stored keys (assigned key slots), excluding the special "empty" key, if any.
|
||||||
|
*
|
||||||
|
* @see #size()
|
||||||
|
* @see #hasEmptyKey
|
||||||
|
*/
|
||||||
|
protected int assigned;
|
||||||
|
|
||||||
|
/** Mask for slot scans in {@link #keys}. */
|
||||||
|
protected int mask;
|
||||||
|
|
||||||
|
/** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */
|
||||||
|
protected int resizeAt;
|
||||||
|
|
||||||
|
/** Special treatment for the "empty slot" key marker. */
|
||||||
|
protected boolean hasEmptyKey;
|
||||||
|
|
||||||
|
/** The load factor for {@link #keys}. */
|
||||||
|
protected double loadFactor;
|
||||||
|
|
||||||
|
/** Seed used to ensure the hash iteration order is different from an iteration to another. */
|
||||||
|
protected int iterationSeed;
|
||||||
|
|
||||||
|
/** New instance with sane defaults. */
|
||||||
|
public CharHashSet() {
|
||||||
|
this(DEFAULT_EXPECTED_ELEMENTS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* New instance with sane defaults.
|
||||||
|
*
|
||||||
|
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
|
||||||
|
* (inclusive).
|
||||||
|
*/
|
||||||
|
public CharHashSet(int expectedElements) {
|
||||||
|
this(expectedElements, DEFAULT_LOAD_FACTOR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* New instance with the provided defaults.
|
||||||
|
*
|
||||||
|
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
|
||||||
|
* (inclusive).
|
||||||
|
* @param loadFactor The load factor for internal buffers. Insane load factors (zero, full
|
||||||
|
* capacity) are rejected by {@link #verifyLoadFactor(double)}.
|
||||||
|
*/
|
||||||
|
public CharHashSet(int expectedElements, double loadFactor) {
|
||||||
|
this.loadFactor = verifyLoadFactor(loadFactor);
|
||||||
|
iterationSeed = ITERATION_SEED.incrementAndGet();
|
||||||
|
ensureCapacity(expectedElements);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** New instance copying elements from another set. */
|
||||||
|
public CharHashSet(CharHashSet set) {
|
||||||
|
this(set.size());
|
||||||
|
addAll(set);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean add(char key) {
|
||||||
|
if (((key) == 0)) {
|
||||||
|
assert ((keys[mask + 1]) == 0);
|
||||||
|
boolean added = !hasEmptyKey;
|
||||||
|
hasEmptyKey = true;
|
||||||
|
return added;
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final int mask = this.mask;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((key) == (existing))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assigned == resizeAt) {
|
||||||
|
allocateThenInsertThenRehash(slot, key);
|
||||||
|
} else {
|
||||||
|
keys[slot] = key;
|
||||||
|
}
|
||||||
|
|
||||||
|
assigned++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds all elements from the given list (vararg) to this set.
|
||||||
|
*
|
||||||
|
* @return Returns the number of elements actually added as a result of this call (not previously
|
||||||
|
* present in the set).
|
||||||
|
*/
|
||||||
|
public final int addAll(char... elements) {
|
||||||
|
ensureCapacity(elements.length);
|
||||||
|
int count = 0;
|
||||||
|
for (char e : elements) {
|
||||||
|
if (add(e)) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds all elements from the given set to this set.
|
||||||
|
*
|
||||||
|
* @return Returns the number of elements actually added as a result of this call (not previously
|
||||||
|
* present in the set).
|
||||||
|
*/
|
||||||
|
public int addAll(CharHashSet set) {
|
||||||
|
ensureCapacity(set.size());
|
||||||
|
return addAll((Iterable<? extends CharCursor>) set);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds all elements from the given iterable to this set.
|
||||||
|
*
|
||||||
|
* @return Returns the number of elements actually added as a result of this call (not previously
|
||||||
|
* present in the set).
|
||||||
|
*/
|
||||||
|
public int addAll(Iterable<? extends CharCursor> iterable) {
|
||||||
|
int count = 0;
|
||||||
|
for (CharCursor cursor : iterable) {
|
||||||
|
if (add(cursor.value)) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
public char[] toArray() {
|
||||||
|
|
||||||
|
final char[] cloned = (new char[size()]);
|
||||||
|
int j = 0;
|
||||||
|
if (hasEmptyKey) {
|
||||||
|
cloned[j++] = EMPTY_KEY;
|
||||||
|
}
|
||||||
|
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
int seed = nextIterationSeed();
|
||||||
|
int inc = iterationIncrement(seed);
|
||||||
|
for (int i = 0, mask = this.mask, slot = seed & mask;
|
||||||
|
i <= mask;
|
||||||
|
i++, slot = (slot + inc) & mask) {
|
||||||
|
char existing;
|
||||||
|
if (!((existing = keys[slot]) == 0)) {
|
||||||
|
cloned[j++] = existing;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return cloned;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** An alias for the (preferred) {@link #removeAll}. */
|
||||||
|
public boolean remove(char key) {
|
||||||
|
if (((key) == 0)) {
|
||||||
|
boolean hadEmptyKey = hasEmptyKey;
|
||||||
|
hasEmptyKey = false;
|
||||||
|
return hadEmptyKey;
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final int mask = this.mask;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((key) == (existing))) {
|
||||||
|
shiftConflictingKeys(slot);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes all keys present in a given container.
|
||||||
|
*
|
||||||
|
* @return Returns the number of elements actually removed as a result of this call.
|
||||||
|
*/
|
||||||
|
public int removeAll(CharHashSet other) {
|
||||||
|
final int before = size();
|
||||||
|
|
||||||
|
// Try to iterate over the smaller set or over the container that isn't implementing
|
||||||
|
// efficient contains() lookup.
|
||||||
|
|
||||||
|
if (other.size() >= size()) {
|
||||||
|
if (hasEmptyKey && other.contains(EMPTY_KEY)) {
|
||||||
|
hasEmptyKey = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
for (int slot = 0, max = this.mask; slot <= max; ) {
|
||||||
|
char existing;
|
||||||
|
if (!((existing = keys[slot]) == 0) && other.contains(existing)) {
|
||||||
|
// Shift, do not increment slot.
|
||||||
|
shiftConflictingKeys(slot);
|
||||||
|
} else {
|
||||||
|
slot++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (CharCursor c : other) {
|
||||||
|
remove(c.value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return before - size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean contains(char key) {
|
||||||
|
if (((key) == 0)) {
|
||||||
|
return hasEmptyKey;
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final int mask = this.mask;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((key) == (existing))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clear() {
|
||||||
|
assigned = 0;
|
||||||
|
hasEmptyKey = false;
|
||||||
|
Arrays.fill(keys, EMPTY_KEY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void release() {
|
||||||
|
assigned = 0;
|
||||||
|
hasEmptyKey = false;
|
||||||
|
keys = null;
|
||||||
|
ensureCapacity(DEFAULT_EXPECTED_ELEMENTS);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return size() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure this container can hold at least the given number of elements without resizing its
|
||||||
|
* buffers.
|
||||||
|
*
|
||||||
|
* @param expectedElements The total number of elements, inclusive.
|
||||||
|
*/
|
||||||
|
public void ensureCapacity(int expectedElements) {
|
||||||
|
if (expectedElements > resizeAt || keys == null) {
|
||||||
|
final char[] prevKeys = this.keys;
|
||||||
|
allocateBuffers(minBufferSize(expectedElements, loadFactor));
|
||||||
|
if (prevKeys != null && !isEmpty()) {
|
||||||
|
rehash(prevKeys);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return assigned + (hasEmptyKey ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int h = hasEmptyKey ? 0xDEADBEEF : 0;
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
for (int slot = mask; slot >= 0; slot--) {
|
||||||
|
char existing;
|
||||||
|
if (!((existing = keys[slot]) == 0)) {
|
||||||
|
h += BitMixer.mix(existing);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return (this == obj)
|
||||||
|
|| (obj != null && getClass() == obj.getClass() && sameKeys(getClass().cast(obj)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return true if all keys of some other container exist in this container. */
|
||||||
|
private boolean sameKeys(CharHashSet other) {
|
||||||
|
if (other.size() != size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (CharCursor c : other) {
|
||||||
|
if (!contains(c.value)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CharHashSet clone() {
|
||||||
|
try {
|
||||||
|
/* */
|
||||||
|
CharHashSet cloned = (CharHashSet) super.clone();
|
||||||
|
cloned.keys = keys.clone();
|
||||||
|
cloned.hasEmptyKey = hasEmptyKey;
|
||||||
|
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
|
||||||
|
return cloned;
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<CharCursor> iterator() {
|
||||||
|
return new EntryIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides the next iteration seed used to build the iteration starting slot and offset
|
||||||
|
* increment. This method does not need to be synchronized, what matters is that each thread gets
|
||||||
|
* a sequence of varying seeds.
|
||||||
|
*/
|
||||||
|
protected int nextIterationSeed() {
|
||||||
|
return iterationSeed = BitMixer.mixPhi(iterationSeed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** An iterator implementation for {@link #iterator}. */
|
||||||
|
protected final class EntryIterator extends AbstractIterator<CharCursor> {
|
||||||
|
private final CharCursor cursor;
|
||||||
|
private final int increment;
|
||||||
|
private int index;
|
||||||
|
private int slot;
|
||||||
|
|
||||||
|
public EntryIterator() {
|
||||||
|
cursor = new CharCursor();
|
||||||
|
int seed = nextIterationSeed();
|
||||||
|
increment = iterationIncrement(seed);
|
||||||
|
slot = seed & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected CharCursor fetch() {
|
||||||
|
final int mask = CharHashSet.this.mask;
|
||||||
|
while (index <= mask) {
|
||||||
|
char existing;
|
||||||
|
index++;
|
||||||
|
slot = (slot + increment) & mask;
|
||||||
|
if (!((existing = keys[slot]) == 0)) {
|
||||||
|
cursor.index = slot;
|
||||||
|
cursor.value = existing;
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index == mask + 1 && hasEmptyKey) {
|
||||||
|
cursor.index = index++;
|
||||||
|
cursor.value = EMPTY_KEY;
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
return done();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a set from a variable number of arguments or an array of <code>char</code>. The elements
|
||||||
|
* are copied from the argument to the internal buffer.
|
||||||
|
*/
|
||||||
|
/* */
|
||||||
|
public static CharHashSet from(char... elements) {
|
||||||
|
final CharHashSet set = new CharHashSet(elements.length);
|
||||||
|
set.addAll(elements);
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a hash code for the given key.
|
||||||
|
*
|
||||||
|
* <p>The output from this function should evenly distribute keys across the entire integer range.
|
||||||
|
*/
|
||||||
|
protected int hashKey(char key) {
|
||||||
|
assert !((key) == 0); // Handled as a special case (empty slot marker).
|
||||||
|
return BitMixer.mixPhi(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a logical "index" of a given key that can be used to speed up follow-up logic in
|
||||||
|
* certain scenarios (conditional logic).
|
||||||
|
*
|
||||||
|
* <p>The semantics of "indexes" are not strictly defined. Indexes may (and typically won't be)
|
||||||
|
* contiguous.
|
||||||
|
*
|
||||||
|
* <p>The index is valid only between modifications (it will not be affected by read-only
|
||||||
|
* operations).
|
||||||
|
*
|
||||||
|
* @see #indexExists
|
||||||
|
* @see #indexGet
|
||||||
|
* @see #indexInsert
|
||||||
|
* @see #indexReplace
|
||||||
|
* @param key The key to locate in the set.
|
||||||
|
* @return A non-negative value of the logical "index" of the key in the set or a negative value
|
||||||
|
* if the key did not exist.
|
||||||
|
*/
|
||||||
|
public int indexOf(char key) {
|
||||||
|
final int mask = this.mask;
|
||||||
|
if (((key) == 0)) {
|
||||||
|
return hasEmptyKey ? mask + 1 : ~(mask + 1);
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((key) == (existing))) {
|
||||||
|
return slot;
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ~slot;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see #indexOf
|
||||||
|
* @param index The index of a given key, as returned from {@link #indexOf}.
|
||||||
|
* @return Returns <code>true</code> if the index corresponds to an existing key or false
|
||||||
|
* otherwise. This is equivalent to checking whether the index is a positive value (existing
|
||||||
|
* keys) or a negative value (non-existing keys).
|
||||||
|
*/
|
||||||
|
public boolean indexExists(int index) {
|
||||||
|
assert index < 0 || index <= mask || (index == mask + 1 && hasEmptyKey);
|
||||||
|
|
||||||
|
return index >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the exact value of the existing key. This method makes sense for sets of objects which
|
||||||
|
* define custom key-equality relationship.
|
||||||
|
*
|
||||||
|
* @see #indexOf
|
||||||
|
* @param index The index of an existing key.
|
||||||
|
* @return Returns the equivalent key currently stored in the set.
|
||||||
|
* @throws AssertionError If assertions are enabled and the index does not correspond to an
|
||||||
|
* existing key.
|
||||||
|
*/
|
||||||
|
public char indexGet(int index) {
|
||||||
|
assert index >= 0 : "The index must point at an existing key.";
|
||||||
|
assert index <= mask || (index == mask + 1 && hasEmptyKey);
|
||||||
|
|
||||||
|
return keys[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replaces the existing equivalent key with the given one and returns any previous value stored
|
||||||
|
* for that key.
|
||||||
|
*
|
||||||
|
* @see #indexOf
|
||||||
|
* @param index The index of an existing key.
|
||||||
|
* @param equivalentKey The key to put in the set as a replacement. Must be equivalent to the key
|
||||||
|
* currently stored at the provided index.
|
||||||
|
* @return Returns the previous key stored in the set.
|
||||||
|
* @throws AssertionError If assertions are enabled and the index does not correspond to an
|
||||||
|
* existing key.
|
||||||
|
*/
|
||||||
|
public char indexReplace(int index, char equivalentKey) {
|
||||||
|
assert index >= 0 : "The index must point at an existing key.";
|
||||||
|
assert index <= mask || (index == mask + 1 && hasEmptyKey);
|
||||||
|
assert ((keys[index]) == (equivalentKey));
|
||||||
|
|
||||||
|
char previousValue = keys[index];
|
||||||
|
keys[index] = equivalentKey;
|
||||||
|
return previousValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inserts a key for an index that is not present in the set. This method may help in avoiding
|
||||||
|
* double recalculation of the key's hash.
|
||||||
|
*
|
||||||
|
* @see #indexOf
|
||||||
|
* @param index The index of a previously non-existing key, as returned from {@link #indexOf}.
|
||||||
|
* @throws AssertionError If assertions are enabled and the index does not correspond to an
|
||||||
|
* existing key.
|
||||||
|
*/
|
||||||
|
public void indexInsert(int index, char key) {
|
||||||
|
assert index < 0 : "The index must not point at an existing key.";
|
||||||
|
|
||||||
|
index = ~index;
|
||||||
|
if (((key) == 0)) {
|
||||||
|
assert index == mask + 1;
|
||||||
|
assert ((keys[index]) == 0);
|
||||||
|
hasEmptyKey = true;
|
||||||
|
} else {
|
||||||
|
assert ((keys[index]) == 0);
|
||||||
|
|
||||||
|
if (assigned == resizeAt) {
|
||||||
|
allocateThenInsertThenRehash(index, key);
|
||||||
|
} else {
|
||||||
|
keys[index] = key;
|
||||||
|
}
|
||||||
|
|
||||||
|
assigned++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes a key at an index previously acquired from {@link #indexOf}.
|
||||||
|
*
|
||||||
|
* @see #indexOf
|
||||||
|
* @param index The index of the key to remove, as returned from {@link #indexOf}.
|
||||||
|
* @throws AssertionError If assertions are enabled and the index does not correspond to an
|
||||||
|
* existing key.
|
||||||
|
*/
|
||||||
|
public void indexRemove(int index) {
|
||||||
|
assert index >= 0 : "The index must point at an existing key.";
|
||||||
|
assert index <= mask || (index == mask + 1 && hasEmptyKey);
|
||||||
|
|
||||||
|
if (index > mask) {
|
||||||
|
hasEmptyKey = false;
|
||||||
|
} else {
|
||||||
|
shiftConflictingKeys(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate load factor range and return it. Override and suppress if you need insane load
|
||||||
|
* factors.
|
||||||
|
*/
|
||||||
|
protected double verifyLoadFactor(double loadFactor) {
|
||||||
|
checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR);
|
||||||
|
return loadFactor;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Rehash from old buffers to new buffers. */
|
||||||
|
protected void rehash(char[] fromKeys) {
|
||||||
|
assert HashContainers.checkPowerOfTwo(fromKeys.length - 1);
|
||||||
|
|
||||||
|
// Rehash all stored keys into the new buffers.
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final int mask = this.mask;
|
||||||
|
char existing;
|
||||||
|
for (int i = fromKeys.length - 1; --i >= 0; ) {
|
||||||
|
if (!((existing = fromKeys[i]) == 0)) {
|
||||||
|
int slot = hashKey(existing) & mask;
|
||||||
|
while (!((keys[slot]) == 0)) {
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
keys[slot] = existing;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate new internal buffers. This method attempts to allocate and assign internal buffers
|
||||||
|
* atomically (either allocations succeed or not).
|
||||||
|
*/
|
||||||
|
protected void allocateBuffers(int arraySize) {
|
||||||
|
assert Integer.bitCount(arraySize) == 1;
|
||||||
|
|
||||||
|
// Ensure no change is done if we hit an OOM.
|
||||||
|
char[] prevKeys = this.keys;
|
||||||
|
try {
|
||||||
|
int emptyElementSlot = 1;
|
||||||
|
this.keys = (new char[arraySize + emptyElementSlot]);
|
||||||
|
} catch (OutOfMemoryError e) {
|
||||||
|
this.keys = prevKeys;
|
||||||
|
throw new BufferAllocationException(
|
||||||
|
"Not enough memory to allocate buffers for rehashing: %,d -> %,d",
|
||||||
|
e, this.keys == null ? 0 : size(), arraySize);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.resizeAt = expandAtCount(arraySize, loadFactor);
|
||||||
|
this.mask = arraySize - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method is invoked when there is a new key to be inserted into the buffer but there is not
|
||||||
|
* enough empty slots to do so.
|
||||||
|
*
|
||||||
|
* <p>New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we
|
||||||
|
* assign the pending element to the previous buffer (possibly violating the invariant of having
|
||||||
|
* at least one empty slot) and rehash all keys, substituting new buffers at the end.
|
||||||
|
*/
|
||||||
|
protected void allocateThenInsertThenRehash(int slot, char pendingKey) {
|
||||||
|
assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0);
|
||||||
|
|
||||||
|
// Try to allocate new buffers first. If we OOM, we leave in a consistent state.
|
||||||
|
final char[] prevKeys = this.keys;
|
||||||
|
allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor));
|
||||||
|
assert this.keys.length > prevKeys.length;
|
||||||
|
|
||||||
|
// We have succeeded at allocating new data so insert the pending key/value at
|
||||||
|
// the free slot in the old arrays before rehashing.
|
||||||
|
prevKeys[slot] = pendingKey;
|
||||||
|
|
||||||
|
// Rehash old keys, including the pending key.
|
||||||
|
rehash(prevKeys);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Shift all the slot-conflicting keys allocated to (and including) <code>slot</code>. */
|
||||||
|
protected void shiftConflictingKeys(int gapSlot) {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final int mask = this.mask;
|
||||||
|
|
||||||
|
// Perform shifts of conflicting keys to fill in the gap.
|
||||||
|
int distance = 0;
|
||||||
|
while (true) {
|
||||||
|
final int slot = (gapSlot + (++distance)) & mask;
|
||||||
|
final char existing = keys[slot];
|
||||||
|
if (((existing) == 0)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int idealSlot = hashKey(existing);
|
||||||
|
final int shift = (slot - idealSlot) & mask;
|
||||||
|
if (shift >= distance) {
|
||||||
|
// Entry at this position was originally at or before the gap slot.
|
||||||
|
// Move the conflict-shifted entry to the gap's position and repeat the procedure
|
||||||
|
// for any entries to the right of the current position, treating it
|
||||||
|
// as the new gap.
|
||||||
|
keys[gapSlot] = existing;
|
||||||
|
gapSlot = slot;
|
||||||
|
distance = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark the last found gap slot without a conflict as empty.
|
||||||
|
keys[gapSlot] = EMPTY_KEY;
|
||||||
|
assigned--;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,827 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.util.hppc;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_EXPECTED_ELEMENTS;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_LOAD_FACTOR;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.ITERATION_SEED;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.MAX_LOAD_FACTOR;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.MIN_LOAD_FACTOR;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.checkLoadFactor;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.checkPowerOfTwo;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.expandAtCount;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.iterationIncrement;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.minBufferSize;
|
||||||
|
import static org.apache.lucene.util.hppc.HashContainers.nextBufferSize;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import org.apache.lucene.util.Accountable;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A hash map of <code>char</code> to <code>Object</code>, implemented using open addressing with
|
||||||
|
* linear probing for collision resolution. Supports null values.
|
||||||
|
*
|
||||||
|
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.CharObjectHashMap
|
||||||
|
*
|
||||||
|
* <p>github: https://github.com/carrotsearch/hppc release 0.9.0
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public class CharObjectHashMap<VType>
|
||||||
|
implements Iterable<CharObjectHashMap.CharObjectCursor<VType>>, Accountable, Cloneable {
|
||||||
|
|
||||||
|
private static final long BASE_RAM_BYTES_USED =
|
||||||
|
RamUsageEstimator.shallowSizeOfInstance(CharObjectHashMap.class);
|
||||||
|
|
||||||
|
private static final char EMPTY_KEY = (char) 0;
|
||||||
|
|
||||||
|
/** The array holding keys. */
|
||||||
|
public char[] keys;
|
||||||
|
|
||||||
|
/** The array holding values. */
|
||||||
|
public Object[] values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of stored keys (assigned key slots), excluding the special "empty" key, if any (use
|
||||||
|
* {@link #size()} instead).
|
||||||
|
*
|
||||||
|
* @see #size()
|
||||||
|
*/
|
||||||
|
protected int assigned;
|
||||||
|
|
||||||
|
/** Mask for slot scans in {@link #keys}. */
|
||||||
|
protected int mask;
|
||||||
|
|
||||||
|
/** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */
|
||||||
|
protected int resizeAt;
|
||||||
|
|
||||||
|
/** Special treatment for the "empty slot" key marker. */
|
||||||
|
protected boolean hasEmptyKey;
|
||||||
|
|
||||||
|
/** The load factor for {@link #keys}. */
|
||||||
|
protected double loadFactor;
|
||||||
|
|
||||||
|
/** Seed used to ensure the hash iteration order is different from an iteration to another. */
|
||||||
|
protected int iterationSeed;
|
||||||
|
|
||||||
|
/** New instance with sane defaults. */
|
||||||
|
public CharObjectHashMap() {
|
||||||
|
this(DEFAULT_EXPECTED_ELEMENTS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* New instance with sane defaults.
|
||||||
|
*
|
||||||
|
* @param expectedElements The expected number of elements guaranteed not to cause buffer
|
||||||
|
* expansion (inclusive).
|
||||||
|
*/
|
||||||
|
public CharObjectHashMap(int expectedElements) {
|
||||||
|
this(expectedElements, DEFAULT_LOAD_FACTOR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* New instance with the provided defaults.
|
||||||
|
*
|
||||||
|
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
|
||||||
|
* (inclusive).
|
||||||
|
* @param loadFactor The load factor for internal buffers. Insane load factors (zero, full
|
||||||
|
* capacity) are rejected by {@link #verifyLoadFactor(double)}.
|
||||||
|
*/
|
||||||
|
public CharObjectHashMap(int expectedElements, double loadFactor) {
|
||||||
|
this.loadFactor = verifyLoadFactor(loadFactor);
|
||||||
|
iterationSeed = ITERATION_SEED.incrementAndGet();
|
||||||
|
ensureCapacity(expectedElements);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a hash map from all key-value pairs of another map. */
|
||||||
|
public CharObjectHashMap(CharObjectHashMap<VType> map) {
|
||||||
|
this(map.size());
|
||||||
|
putAll(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
public VType put(char key, VType value) {
|
||||||
|
assert assigned < mask + 1;
|
||||||
|
|
||||||
|
final int mask = this.mask;
|
||||||
|
if (((key) == 0)) {
|
||||||
|
VType previousValue = hasEmptyKey ? (VType) values[mask + 1] : null;
|
||||||
|
hasEmptyKey = true;
|
||||||
|
values[mask + 1] = value;
|
||||||
|
return previousValue;
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((existing) == (key))) {
|
||||||
|
final VType previousValue = (VType) values[slot];
|
||||||
|
values[slot] = value;
|
||||||
|
return previousValue;
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assigned == resizeAt) {
|
||||||
|
allocateThenInsertThenRehash(slot, key, value);
|
||||||
|
} else {
|
||||||
|
keys[slot] = key;
|
||||||
|
values[slot] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
assigned++;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int putAll(Iterable<? extends CharObjectCursor<? extends VType>> iterable) {
|
||||||
|
final int count = size();
|
||||||
|
for (CharObjectCursor<? extends VType> c : iterable) {
|
||||||
|
put(c.key, c.value);
|
||||||
|
}
|
||||||
|
return size() - count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <a href="http://trove4j.sourceforge.net">Trove</a>-inspired API method. An equivalent of the
|
||||||
|
* following code:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* if (!map.containsKey(key)) map.put(value);
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @param key The key of the value to check.
|
||||||
|
* @param value The value to put if <code>key</code> does not exist.
|
||||||
|
* @return <code>true</code> if <code>key</code> did not exist and <code>value</code> was placed
|
||||||
|
* in the map.
|
||||||
|
*/
|
||||||
|
public boolean putIfAbsent(char key, VType value) {
|
||||||
|
int keyIndex = indexOf(key);
|
||||||
|
if (!indexExists(keyIndex)) {
|
||||||
|
indexInsert(keyIndex, key, value);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public VType remove(char key) {
|
||||||
|
final int mask = this.mask;
|
||||||
|
if (((key) == 0)) {
|
||||||
|
if (!hasEmptyKey) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
hasEmptyKey = false;
|
||||||
|
VType previousValue = (VType) values[mask + 1];
|
||||||
|
values[mask + 1] = 0;
|
||||||
|
return previousValue;
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((existing) == (key))) {
|
||||||
|
final VType previousValue = (VType) values[slot];
|
||||||
|
shiftConflictingKeys(slot);
|
||||||
|
return previousValue;
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public VType get(char key) {
|
||||||
|
if (((key) == 0)) {
|
||||||
|
return hasEmptyKey ? (VType) values[mask + 1] : null;
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final int mask = this.mask;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((existing) == (key))) {
|
||||||
|
return (VType) values[slot];
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public VType getOrDefault(char key, VType defaultValue) {
|
||||||
|
if (((key) == 0)) {
|
||||||
|
return hasEmptyKey ? (VType) values[mask + 1] : defaultValue;
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final int mask = this.mask;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((existing) == (key))) {
|
||||||
|
return (VType) values[slot];
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean containsKey(char key) {
|
||||||
|
if (((key) == 0)) {
|
||||||
|
return hasEmptyKey;
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final int mask = this.mask;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((existing) == (key))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int indexOf(char key) {
|
||||||
|
final int mask = this.mask;
|
||||||
|
if (((key) == 0)) {
|
||||||
|
return hasEmptyKey ? mask + 1 : ~(mask + 1);
|
||||||
|
} else {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
int slot = hashKey(key) & mask;
|
||||||
|
|
||||||
|
char existing;
|
||||||
|
while (!((existing = keys[slot]) == 0)) {
|
||||||
|
if (((existing) == (key))) {
|
||||||
|
return slot;
|
||||||
|
}
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ~slot;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean indexExists(int index) {
|
||||||
|
assert index < 0 || (index >= 0 && index <= mask) || (index == mask + 1 && hasEmptyKey);
|
||||||
|
|
||||||
|
return index >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public VType indexGet(int index) {
|
||||||
|
assert index >= 0 : "The index must point at an existing key.";
|
||||||
|
assert index <= mask || (index == mask + 1 && hasEmptyKey);
|
||||||
|
|
||||||
|
return (VType) values[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
public VType indexReplace(int index, VType newValue) {
|
||||||
|
assert index >= 0 : "The index must point at an existing key.";
|
||||||
|
assert index <= mask || (index == mask + 1 && hasEmptyKey);
|
||||||
|
|
||||||
|
VType previousValue = (VType) values[index];
|
||||||
|
values[index] = newValue;
|
||||||
|
return previousValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void indexInsert(int index, char key, VType value) {
|
||||||
|
assert index < 0 : "The index must not point at an existing key.";
|
||||||
|
|
||||||
|
index = ~index;
|
||||||
|
if (((key) == 0)) {
|
||||||
|
assert index == mask + 1;
|
||||||
|
values[index] = value;
|
||||||
|
hasEmptyKey = true;
|
||||||
|
} else {
|
||||||
|
assert ((keys[index]) == 0);
|
||||||
|
|
||||||
|
if (assigned == resizeAt) {
|
||||||
|
allocateThenInsertThenRehash(index, key, value);
|
||||||
|
} else {
|
||||||
|
keys[index] = key;
|
||||||
|
values[index] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
assigned++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public VType indexRemove(int index) {
|
||||||
|
assert index >= 0 : "The index must point at an existing key.";
|
||||||
|
assert index <= mask || (index == mask + 1 && hasEmptyKey);
|
||||||
|
|
||||||
|
VType previousValue = (VType) values[index];
|
||||||
|
if (index > mask) {
|
||||||
|
assert index == mask + 1;
|
||||||
|
hasEmptyKey = false;
|
||||||
|
values[index] = 0;
|
||||||
|
} else {
|
||||||
|
shiftConflictingKeys(index);
|
||||||
|
}
|
||||||
|
return previousValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clear() {
|
||||||
|
assigned = 0;
|
||||||
|
hasEmptyKey = false;
|
||||||
|
|
||||||
|
Arrays.fill(keys, EMPTY_KEY);
|
||||||
|
|
||||||
|
/* */
|
||||||
|
}
|
||||||
|
|
||||||
|
public void release() {
|
||||||
|
assigned = 0;
|
||||||
|
hasEmptyKey = false;
|
||||||
|
|
||||||
|
keys = null;
|
||||||
|
values = null;
|
||||||
|
ensureCapacity(DEFAULT_EXPECTED_ELEMENTS);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return assigned + (hasEmptyKey ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return size() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int h = hasEmptyKey ? 0xDEADBEEF : 0;
|
||||||
|
for (CharObjectCursor<VType> c : this) {
|
||||||
|
h += BitMixer.mix(c.key) + BitMixer.mix(c.value);
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return (this == obj)
|
||||||
|
|| (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return true if all keys of some other container exist in this container. */
|
||||||
|
protected boolean equalElements(CharObjectHashMap<?> other) {
|
||||||
|
if (other.size() != size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (CharObjectCursor<?> c : other) {
|
||||||
|
char key = c.key;
|
||||||
|
if (!containsKey(key) || !java.util.Objects.equals(c.value, get(key))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure this container can hold at least the given number of keys (entries) without resizing its
|
||||||
|
* buffers.
|
||||||
|
*
|
||||||
|
* @param expectedElements The total number of keys, inclusive.
|
||||||
|
*/
|
||||||
|
public void ensureCapacity(int expectedElements) {
|
||||||
|
if (expectedElements > resizeAt || keys == null) {
|
||||||
|
final char[] prevKeys = this.keys;
|
||||||
|
final VType[] prevValues = (VType[]) this.values;
|
||||||
|
allocateBuffers(minBufferSize(expectedElements, loadFactor));
|
||||||
|
if (prevKeys != null && !isEmpty()) {
|
||||||
|
rehash(prevKeys, prevValues);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides the next iteration seed used to build the iteration starting slot and offset
|
||||||
|
* increment. This method does not need to be synchronized, what matters is that each thread gets
|
||||||
|
* a sequence of varying seeds.
|
||||||
|
*/
|
||||||
|
protected int nextIterationSeed() {
|
||||||
|
return iterationSeed = BitMixer.mixPhi(iterationSeed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<CharObjectCursor<VType>> iterator() {
|
||||||
|
return new EntryIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + sizeOfValues();
|
||||||
|
}
|
||||||
|
|
||||||
|
private long sizeOfValues() {
|
||||||
|
long size = RamUsageEstimator.shallowSizeOf(values);
|
||||||
|
for (ObjectCursor<VType> value : values()) {
|
||||||
|
size += RamUsageEstimator.sizeOfObject(value);
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** An iterator implementation for {@link #iterator}. */
|
||||||
|
private final class EntryIterator extends AbstractIterator<CharObjectCursor<VType>> {
|
||||||
|
private final CharObjectCursor<VType> cursor;
|
||||||
|
private final int increment;
|
||||||
|
private int index;
|
||||||
|
private int slot;
|
||||||
|
|
||||||
|
public EntryIterator() {
|
||||||
|
cursor = new CharObjectCursor<VType>();
|
||||||
|
int seed = nextIterationSeed();
|
||||||
|
increment = iterationIncrement(seed);
|
||||||
|
slot = seed & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected CharObjectCursor<VType> fetch() {
|
||||||
|
final int mask = CharObjectHashMap.this.mask;
|
||||||
|
while (index <= mask) {
|
||||||
|
char existing;
|
||||||
|
index++;
|
||||||
|
slot = (slot + increment) & mask;
|
||||||
|
if (!((existing = keys[slot]) == 0)) {
|
||||||
|
cursor.index = slot;
|
||||||
|
cursor.key = existing;
|
||||||
|
cursor.value = (VType) values[slot];
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index == mask + 1 && hasEmptyKey) {
|
||||||
|
cursor.index = index;
|
||||||
|
cursor.key = 0;
|
||||||
|
cursor.value = (VType) values[index++];
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
return done();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a specialized view of the keys of this associated container. */
|
||||||
|
public KeysContainer keys() {
|
||||||
|
return new KeysContainer();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A view of the keys inside this hash map. */
|
||||||
|
public final class KeysContainer implements Iterable<CharCursor> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<CharCursor> iterator() {
|
||||||
|
return new KeysIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return CharObjectHashMap.this.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public char[] toArray() {
|
||||||
|
char[] array = new char[size()];
|
||||||
|
int i = 0;
|
||||||
|
for (CharCursor cursor : this) {
|
||||||
|
array[i++] = cursor.value;
|
||||||
|
}
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** An iterator over the set of assigned keys. */
|
||||||
|
private final class KeysIterator extends AbstractIterator<CharCursor> {
|
||||||
|
private final CharCursor cursor;
|
||||||
|
private final int increment;
|
||||||
|
private int index;
|
||||||
|
private int slot;
|
||||||
|
|
||||||
|
public KeysIterator() {
|
||||||
|
cursor = new CharCursor();
|
||||||
|
int seed = nextIterationSeed();
|
||||||
|
increment = iterationIncrement(seed);
|
||||||
|
slot = seed & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected CharCursor fetch() {
|
||||||
|
final int mask = CharObjectHashMap.this.mask;
|
||||||
|
while (index <= mask) {
|
||||||
|
char existing;
|
||||||
|
index++;
|
||||||
|
slot = (slot + increment) & mask;
|
||||||
|
if (!((existing = keys[slot]) == 0)) {
|
||||||
|
cursor.index = slot;
|
||||||
|
cursor.value = existing;
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index == mask + 1 && hasEmptyKey) {
|
||||||
|
cursor.index = index++;
|
||||||
|
cursor.value = 0;
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
return done();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Returns a container with all values stored in this map.
|
||||||
|
*/
|
||||||
|
public ValuesContainer values() {
|
||||||
|
return new ValuesContainer();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A view over the set of values of this map. */
|
||||||
|
public final class ValuesContainer implements Iterable<ObjectCursor<VType>> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<ObjectCursor<VType>> iterator() {
|
||||||
|
return new ValuesIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return CharObjectHashMap.this.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public VType[] toArray() {
|
||||||
|
VType[] array = (VType[]) new Object[size()];
|
||||||
|
int i = 0;
|
||||||
|
for (ObjectCursor<VType> cursor : this) {
|
||||||
|
array[i++] = cursor.value;
|
||||||
|
}
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** An iterator over the set of assigned values. */
|
||||||
|
private final class ValuesIterator extends AbstractIterator<ObjectCursor<VType>> {
|
||||||
|
private final ObjectCursor<VType> cursor;
|
||||||
|
private final int increment;
|
||||||
|
private int index;
|
||||||
|
private int slot;
|
||||||
|
|
||||||
|
public ValuesIterator() {
|
||||||
|
cursor = new ObjectCursor<>();
|
||||||
|
int seed = nextIterationSeed();
|
||||||
|
increment = iterationIncrement(seed);
|
||||||
|
slot = seed & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ObjectCursor<VType> fetch() {
|
||||||
|
final int mask = CharObjectHashMap.this.mask;
|
||||||
|
while (index <= mask) {
|
||||||
|
index++;
|
||||||
|
slot = (slot + increment) & mask;
|
||||||
|
if (!((keys[slot]) == 0)) {
|
||||||
|
cursor.index = slot;
|
||||||
|
cursor.value = (VType) values[slot];
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index == mask + 1 && hasEmptyKey) {
|
||||||
|
cursor.index = index;
|
||||||
|
cursor.value = (VType) values[index++];
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
return done();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CharObjectHashMap<VType> clone() {
|
||||||
|
try {
|
||||||
|
/* */
|
||||||
|
CharObjectHashMap<VType> cloned = (CharObjectHashMap<VType>) super.clone();
|
||||||
|
cloned.keys = keys.clone();
|
||||||
|
cloned.values = values.clone();
|
||||||
|
cloned.hasEmptyKey = hasEmptyKey;
|
||||||
|
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
|
||||||
|
return cloned;
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Convert the contents of this map to a human-friendly string. */
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
final StringBuilder buffer = new StringBuilder();
|
||||||
|
buffer.append("[");
|
||||||
|
|
||||||
|
boolean first = true;
|
||||||
|
for (CharObjectCursor<VType> cursor : this) {
|
||||||
|
if (!first) {
|
||||||
|
buffer.append(", ");
|
||||||
|
}
|
||||||
|
buffer.append(cursor.key);
|
||||||
|
buffer.append("=>");
|
||||||
|
buffer.append(cursor.value);
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
buffer.append("]");
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Creates a hash map from two index-aligned arrays of key-value pairs. */
|
||||||
|
public static <VType> CharObjectHashMap<VType> from(char[] keys, VType[] values) {
|
||||||
|
if (keys.length != values.length) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Arrays of keys and values must have an identical length.");
|
||||||
|
}
|
||||||
|
|
||||||
|
CharObjectHashMap<VType> map = new CharObjectHashMap<>(keys.length);
|
||||||
|
for (int i = 0; i < keys.length; i++) {
|
||||||
|
map.put(keys[i], values[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a hash code for the given key.
|
||||||
|
*
|
||||||
|
* <p>The output from this function should evenly distribute keys across the entire integer range.
|
||||||
|
*/
|
||||||
|
protected int hashKey(char key) {
|
||||||
|
assert !((key) == 0); // Handled as a special case (empty slot marker).
|
||||||
|
return BitMixer.mixPhi(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate load factor range and return it. Override and suppress if you need insane load
|
||||||
|
* factors.
|
||||||
|
*/
|
||||||
|
protected double verifyLoadFactor(double loadFactor) {
|
||||||
|
checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR);
|
||||||
|
return loadFactor;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Rehash from old buffers to new buffers. */
|
||||||
|
protected void rehash(char[] fromKeys, VType[] fromValues) {
|
||||||
|
assert fromKeys.length == fromValues.length && checkPowerOfTwo(fromKeys.length - 1);
|
||||||
|
|
||||||
|
// Rehash all stored key/value pairs into the new buffers.
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final VType[] values = (VType[]) this.values;
|
||||||
|
final int mask = this.mask;
|
||||||
|
char existing;
|
||||||
|
|
||||||
|
// Copy the zero element's slot, then rehash everything else.
|
||||||
|
int from = fromKeys.length - 1;
|
||||||
|
keys[keys.length - 1] = fromKeys[from];
|
||||||
|
values[values.length - 1] = fromValues[from];
|
||||||
|
while (--from >= 0) {
|
||||||
|
if (!((existing = fromKeys[from]) == 0)) {
|
||||||
|
int slot = hashKey(existing) & mask;
|
||||||
|
while (!((keys[slot]) == 0)) {
|
||||||
|
slot = (slot + 1) & mask;
|
||||||
|
}
|
||||||
|
keys[slot] = existing;
|
||||||
|
values[slot] = fromValues[from];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate new internal buffers. This method attempts to allocate and assign internal buffers
|
||||||
|
* atomically (either allocations succeed or not).
|
||||||
|
*/
|
||||||
|
protected void allocateBuffers(int arraySize) {
|
||||||
|
assert Integer.bitCount(arraySize) == 1;
|
||||||
|
|
||||||
|
// Ensure no change is done if we hit an OOM.
|
||||||
|
char[] prevKeys = this.keys;
|
||||||
|
VType[] prevValues = (VType[]) this.values;
|
||||||
|
try {
|
||||||
|
int emptyElementSlot = 1;
|
||||||
|
this.keys = (new char[arraySize + emptyElementSlot]);
|
||||||
|
this.values = new Object[arraySize + emptyElementSlot];
|
||||||
|
} catch (OutOfMemoryError e) {
|
||||||
|
this.keys = prevKeys;
|
||||||
|
this.values = prevValues;
|
||||||
|
throw new BufferAllocationException(
|
||||||
|
"Not enough memory to allocate buffers for rehashing: %,d -> %,d",
|
||||||
|
e, this.mask + 1, arraySize);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.resizeAt = expandAtCount(arraySize, loadFactor);
|
||||||
|
this.mask = arraySize - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method is invoked when there is a new key/ value pair to be inserted into the buffers but
|
||||||
|
* there is not enough empty slots to do so.
|
||||||
|
*
|
||||||
|
* <p>New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we
|
||||||
|
* assign the pending element to the previous buffer (possibly violating the invariant of having
|
||||||
|
* at least one empty slot) and rehash all keys, substituting new buffers at the end.
|
||||||
|
*/
|
||||||
|
protected void allocateThenInsertThenRehash(int slot, char pendingKey, VType pendingValue) {
|
||||||
|
assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0);
|
||||||
|
|
||||||
|
// Try to allocate new buffers first. If we OOM, we leave in a consistent state.
|
||||||
|
final char[] prevKeys = this.keys;
|
||||||
|
final VType[] prevValues = (VType[]) this.values;
|
||||||
|
allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor));
|
||||||
|
assert this.keys.length > prevKeys.length;
|
||||||
|
|
||||||
|
// We have succeeded at allocating new data so insert the pending key/value at
|
||||||
|
// the free slot in the old arrays before rehashing.
|
||||||
|
prevKeys[slot] = pendingKey;
|
||||||
|
prevValues[slot] = pendingValue;
|
||||||
|
|
||||||
|
// Rehash old keys, including the pending key.
|
||||||
|
rehash(prevKeys, prevValues);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shift all the slot-conflicting keys and values allocated to (and including) <code>slot</code>.
|
||||||
|
*/
|
||||||
|
protected void shiftConflictingKeys(int gapSlot) {
|
||||||
|
final char[] keys = this.keys;
|
||||||
|
final VType[] values = (VType[]) this.values;
|
||||||
|
final int mask = this.mask;
|
||||||
|
|
||||||
|
// Perform shifts of conflicting keys to fill in the gap.
|
||||||
|
int distance = 0;
|
||||||
|
while (true) {
|
||||||
|
final int slot = (gapSlot + (++distance)) & mask;
|
||||||
|
final char existing = keys[slot];
|
||||||
|
if (((existing) == 0)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int idealSlot = hashKey(existing);
|
||||||
|
final int shift = (slot - idealSlot) & mask;
|
||||||
|
if (shift >= distance) {
|
||||||
|
// Entry at this position was originally at or before the gap slot.
|
||||||
|
// Move the conflict-shifted entry to the gap's position and repeat the procedure
|
||||||
|
// for any entries to the right of the current position, treating it
|
||||||
|
// as the new gap.
|
||||||
|
keys[gapSlot] = existing;
|
||||||
|
values[gapSlot] = values[slot];
|
||||||
|
gapSlot = slot;
|
||||||
|
distance = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark the last found gap slot without a conflict as empty.
|
||||||
|
keys[gapSlot] = 0;
|
||||||
|
values[gapSlot] = null;
|
||||||
|
assigned--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Forked from HPPC, holding int index,key and value */
|
||||||
|
public static final class CharObjectCursor<VType> {
|
||||||
|
/**
|
||||||
|
* The current key and value's index in the container this cursor belongs to. The meaning of
|
||||||
|
* this index is defined by the container (usually it will be an index in the underlying storage
|
||||||
|
* buffer).
|
||||||
|
*/
|
||||||
|
public int index;
|
||||||
|
|
||||||
|
/** The current key. */
|
||||||
|
public char key;
|
||||||
|
|
||||||
|
/** The current value. */
|
||||||
|
public VType value;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "[cursor, index: " + index + ", key: " + key + ", value: " + value + "]";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,26 +22,26 @@ import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
/** Constants for primitive maps. */
|
/** Constants for primitive maps. */
|
||||||
public class HashContainers {
|
class HashContainers {
|
||||||
|
|
||||||
public static final int DEFAULT_EXPECTED_ELEMENTS = 4;
|
static final int DEFAULT_EXPECTED_ELEMENTS = 4;
|
||||||
|
|
||||||
public static final float DEFAULT_LOAD_FACTOR = 0.75f;
|
static final float DEFAULT_LOAD_FACTOR = 0.75f;
|
||||||
|
|
||||||
/** Minimal sane load factor (99 empty slots per 100). */
|
/** Minimal sane load factor (99 empty slots per 100). */
|
||||||
public static final float MIN_LOAD_FACTOR = 1 / 100.0f;
|
static final float MIN_LOAD_FACTOR = 1 / 100.0f;
|
||||||
|
|
||||||
/** Maximum sane load factor (1 empty slot per 100). */
|
/** Maximum sane load factor (1 empty slot per 100). */
|
||||||
public static final float MAX_LOAD_FACTOR = 99 / 100.0f;
|
static final float MAX_LOAD_FACTOR = 99 / 100.0f;
|
||||||
|
|
||||||
/** Minimum hash buffer size. */
|
/** Minimum hash buffer size. */
|
||||||
public static final int MIN_HASH_ARRAY_LENGTH = 4;
|
static final int MIN_HASH_ARRAY_LENGTH = 4;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maximum array size for hash containers (power-of-two and still allocable in Java, not a
|
* Maximum array size for hash containers (power-of-two and still allocable in Java, not a
|
||||||
* negative int).
|
* negative int).
|
||||||
*/
|
*/
|
||||||
public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;
|
static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;
|
||||||
|
|
||||||
static final AtomicInteger ITERATION_SEED = new AtomicInteger();
|
static final AtomicInteger ITERATION_SEED = new AtomicInteger();
|
||||||
|
|
||||||
|
|
|
@ -152,6 +152,17 @@ public class IntHashSet implements Iterable<IntCursor>, Accountable, Cloneable {
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds all elements from the given set to this set.
|
||||||
|
*
|
||||||
|
* @return Returns the number of elements actually added as a result of this call (not previously
|
||||||
|
* present in the set).
|
||||||
|
*/
|
||||||
|
public int addAll(IntHashSet set) {
|
||||||
|
ensureCapacity(set.size());
|
||||||
|
return addAll((Iterable<? extends IntCursor>) set);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds all elements from the given iterable to this set.
|
* Adds all elements from the given iterable to this set.
|
||||||
*
|
*
|
||||||
|
|
|
@ -145,6 +145,17 @@ public class LongHashSet implements Iterable<LongCursor>, Accountable, Cloneable
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds all elements from the given set to this set.
|
||||||
|
*
|
||||||
|
* @return Returns the number of elements actually added as a result of this call (not previously
|
||||||
|
* present in the set).
|
||||||
|
*/
|
||||||
|
public int addAll(LongHashSet set) {
|
||||||
|
ensureCapacity(set.size());
|
||||||
|
return addAll((Iterable<? extends LongCursor>) set);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds all elements from the given iterable to this set.
|
* Adds all elements from the given iterable to this set.
|
||||||
*
|
*
|
||||||
|
|
|
@ -0,0 +1,473 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.util.hppc;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.empty;
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
||||||
|
import static org.hamcrest.Matchers.is;
|
||||||
|
import static org.hamcrest.Matchers.lessThan;
|
||||||
|
import static org.hamcrest.Matchers.not;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.RandomizedTest;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
import org.hamcrest.MatcherAssert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for {@link CharHashSet}.
|
||||||
|
*
|
||||||
|
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.CharHashSetTest
|
||||||
|
*
|
||||||
|
* <p>github: https://github.com/carrotsearch/hppc release: 0.9.0
|
||||||
|
*/
|
||||||
|
public class TestCharHashSet extends LuceneTestCase {
|
||||||
|
private static final char EMPTY_KEY = (char) 0;
|
||||||
|
|
||||||
|
private final char keyE = 0;
|
||||||
|
private final char key1 = cast(1);
|
||||||
|
private final char key2 = cast(2);
|
||||||
|
private final char key3 = cast(3);
|
||||||
|
private final char key4 = cast(4);
|
||||||
|
|
||||||
|
/** Per-test fresh initialized instance. */
|
||||||
|
private CharHashSet set;
|
||||||
|
|
||||||
|
/** Convert to target type from an integer used to test stuff. */
|
||||||
|
private static char cast(int v) {
|
||||||
|
return (char) ('a' + v);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void initialize() {
|
||||||
|
set = new CharHashSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddAllViaInterface() {
|
||||||
|
set.addAll(key1, key2);
|
||||||
|
|
||||||
|
CharHashSet iface = new CharHashSet();
|
||||||
|
iface.clear();
|
||||||
|
iface.addAll(set);
|
||||||
|
MatcherAssert.assertThat(set(iface.toArray()), is(equalTo(set(key1, key2))));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndexMethods() {
|
||||||
|
set.add(keyE);
|
||||||
|
set.add(key1);
|
||||||
|
|
||||||
|
MatcherAssert.assertThat(set.indexOf(keyE), is(greaterThanOrEqualTo(0)));
|
||||||
|
MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0)));
|
||||||
|
MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0)));
|
||||||
|
|
||||||
|
MatcherAssert.assertThat(set.indexExists(set.indexOf(keyE)), is(true));
|
||||||
|
MatcherAssert.assertThat(set.indexExists(set.indexOf(key1)), is(true));
|
||||||
|
MatcherAssert.assertThat(set.indexExists(set.indexOf(key2)), is(false));
|
||||||
|
|
||||||
|
MatcherAssert.assertThat(set.indexGet(set.indexOf(keyE)), is(equalTo(keyE)));
|
||||||
|
MatcherAssert.assertThat(set.indexGet(set.indexOf(key1)), is(equalTo(key1)));
|
||||||
|
|
||||||
|
expectThrows(
|
||||||
|
AssertionError.class,
|
||||||
|
() -> {
|
||||||
|
set.indexGet(set.indexOf(key2));
|
||||||
|
});
|
||||||
|
|
||||||
|
MatcherAssert.assertThat(set.indexReplace(set.indexOf(keyE), keyE), is(equalTo(keyE)));
|
||||||
|
MatcherAssert.assertThat(set.indexReplace(set.indexOf(key1), key1), is(equalTo(key1)));
|
||||||
|
|
||||||
|
set.indexInsert(set.indexOf(key2), key2);
|
||||||
|
MatcherAssert.assertThat(set.indexGet(set.indexOf(key2)), is(equalTo(key2)));
|
||||||
|
MatcherAssert.assertThat(set.size(), is(equalTo(3)));
|
||||||
|
|
||||||
|
set.indexRemove(set.indexOf(keyE));
|
||||||
|
MatcherAssert.assertThat(set.size(), is(equalTo(2)));
|
||||||
|
set.indexRemove(set.indexOf(key2));
|
||||||
|
MatcherAssert.assertThat(set.size(), is(equalTo(1)));
|
||||||
|
MatcherAssert.assertThat(set.indexOf(keyE), is(lessThan(0)));
|
||||||
|
MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0)));
|
||||||
|
MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCursorIndexIsValid() {
|
||||||
|
set.add(keyE);
|
||||||
|
set.add(key1);
|
||||||
|
set.add(key2);
|
||||||
|
|
||||||
|
for (CharCursor c : set) {
|
||||||
|
MatcherAssert.assertThat(set.indexExists(c.index), is(true));
|
||||||
|
MatcherAssert.assertThat(set.indexGet(c.index), is(equalTo(c.value)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEmptyKey() {
|
||||||
|
CharHashSet set = new CharHashSet();
|
||||||
|
|
||||||
|
boolean b = set.add(EMPTY_KEY);
|
||||||
|
|
||||||
|
MatcherAssert.assertThat(b, is(true));
|
||||||
|
MatcherAssert.assertThat(set.add(EMPTY_KEY), is(false));
|
||||||
|
MatcherAssert.assertThat(set.size(), is(equalTo(1)));
|
||||||
|
MatcherAssert.assertThat(set.isEmpty(), is(false));
|
||||||
|
MatcherAssert.assertThat(set(set.toArray()), is(equalTo(set(EMPTY_KEY))));
|
||||||
|
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true));
|
||||||
|
int index = set.indexOf(EMPTY_KEY);
|
||||||
|
MatcherAssert.assertThat(set.indexExists(index), is(true));
|
||||||
|
MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY)));
|
||||||
|
MatcherAssert.assertThat(set.indexReplace(index, EMPTY_KEY), is(equalTo(EMPTY_KEY)));
|
||||||
|
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
b = set.remove(EMPTY_KEY);
|
||||||
|
MatcherAssert.assertThat(b, is(true));
|
||||||
|
} else {
|
||||||
|
set.indexRemove(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
MatcherAssert.assertThat(set.size(), is(equalTo(0)));
|
||||||
|
MatcherAssert.assertThat(set.isEmpty(), is(true));
|
||||||
|
MatcherAssert.assertThat(set(set.toArray()), is(empty()));
|
||||||
|
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(false));
|
||||||
|
index = set.indexOf(EMPTY_KEY);
|
||||||
|
MatcherAssert.assertThat(set.indexExists(index), is(false));
|
||||||
|
|
||||||
|
set.indexInsert(index, EMPTY_KEY);
|
||||||
|
set.add(key1);
|
||||||
|
MatcherAssert.assertThat(set.size(), is(equalTo(2)));
|
||||||
|
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true));
|
||||||
|
index = set.indexOf(EMPTY_KEY);
|
||||||
|
MatcherAssert.assertThat(set.indexExists(index), is(true));
|
||||||
|
MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEnsureCapacity() {
|
||||||
|
final AtomicInteger expands = new AtomicInteger();
|
||||||
|
CharHashSet set =
|
||||||
|
new CharHashSet(0) {
|
||||||
|
@Override
|
||||||
|
protected void allocateBuffers(int arraySize) {
|
||||||
|
super.allocateBuffers(arraySize);
|
||||||
|
expands.incrementAndGet();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add some elements.
|
||||||
|
final int max = rarely() ? 0 : randomIntBetween(0, 250);
|
||||||
|
for (int i = 0; i < max; i++) {
|
||||||
|
set.add(cast(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
final int additions = randomIntBetween(max, max + 5000);
|
||||||
|
set.ensureCapacity(additions + set.size());
|
||||||
|
final int before = expands.get();
|
||||||
|
for (int i = 0; i < additions; i++) {
|
||||||
|
set.add(cast(i));
|
||||||
|
}
|
||||||
|
assertEquals(before, expands.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInitiallyEmpty() {
|
||||||
|
assertEquals(0, set.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAdd() {
|
||||||
|
assertTrue(set.add(key1));
|
||||||
|
assertFalse(set.add(key1));
|
||||||
|
assertEquals(1, set.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAdd2() {
|
||||||
|
set.addAll(key1, key1);
|
||||||
|
assertEquals(1, set.size());
|
||||||
|
assertEquals(1, set.addAll(key1, key2));
|
||||||
|
assertEquals(2, set.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddVarArgs() {
|
||||||
|
set.addAll(asArray(0, 1, 2, 1, 0));
|
||||||
|
assertEquals(3, set.size());
|
||||||
|
assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddAll() {
|
||||||
|
CharHashSet set2 = new CharHashSet();
|
||||||
|
set2.addAll(asArray(1, 2));
|
||||||
|
set.addAll(asArray(0, 1));
|
||||||
|
|
||||||
|
assertEquals(1, set.addAll(set2));
|
||||||
|
assertEquals(0, set.addAll(set2));
|
||||||
|
|
||||||
|
assertEquals(3, set.size());
|
||||||
|
assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemove() {
|
||||||
|
set.addAll(asArray(0, 1, 2, 3, 4));
|
||||||
|
|
||||||
|
assertTrue(set.remove(key2));
|
||||||
|
assertFalse(set.remove(key2));
|
||||||
|
assertEquals(4, set.size());
|
||||||
|
assertSortedListEquals(set.toArray(), asArray(0, 1, 3, 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInitialCapacityAndGrowth() {
|
||||||
|
for (int i = 0; i < 256; i++) {
|
||||||
|
CharHashSet set = new CharHashSet(i);
|
||||||
|
|
||||||
|
for (int j = 0; j < i; j++) {
|
||||||
|
set.add(cast(j));
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(i, set.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBug_HPPC73_FullCapacityGet() {
|
||||||
|
final AtomicInteger reallocations = new AtomicInteger();
|
||||||
|
final int elements = 0x7F;
|
||||||
|
set =
|
||||||
|
new CharHashSet(elements, 1f) {
|
||||||
|
@Override
|
||||||
|
protected double verifyLoadFactor(double loadFactor) {
|
||||||
|
// Skip load factor sanity range checking.
|
||||||
|
return loadFactor;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void allocateBuffers(int arraySize) {
|
||||||
|
super.allocateBuffers(arraySize);
|
||||||
|
reallocations.incrementAndGet();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int reallocationsBefore = reallocations.get();
|
||||||
|
assertEquals(reallocationsBefore, 1);
|
||||||
|
for (int i = 1; i <= elements; i++) {
|
||||||
|
set.add(cast(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-existent key.
|
||||||
|
char outOfSet = cast(elements + 1);
|
||||||
|
set.remove(outOfSet);
|
||||||
|
assertFalse(set.contains(outOfSet));
|
||||||
|
assertEquals(reallocationsBefore, reallocations.get());
|
||||||
|
|
||||||
|
// Should not expand because we're replacing an existing element.
|
||||||
|
assertFalse(set.add(key1));
|
||||||
|
assertEquals(reallocationsBefore, reallocations.get());
|
||||||
|
|
||||||
|
// Remove from a full set.
|
||||||
|
set.remove(key1);
|
||||||
|
assertEquals(reallocationsBefore, reallocations.get());
|
||||||
|
set.add(key1);
|
||||||
|
|
||||||
|
// Check expand on "last slot of a full map" condition.
|
||||||
|
set.add(outOfSet);
|
||||||
|
assertEquals(reallocationsBefore + 1, reallocations.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemoveAllFromLookupContainer() {
|
||||||
|
set.addAll(asArray(0, 1, 2, 3, 4));
|
||||||
|
|
||||||
|
CharHashSet list2 = new CharHashSet();
|
||||||
|
list2.addAll(asArray(1, 3, 5));
|
||||||
|
|
||||||
|
assertEquals(2, set.removeAll(list2));
|
||||||
|
assertEquals(3, set.size());
|
||||||
|
assertSortedListEquals(set.toArray(), asArray(0, 2, 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testClear() {
|
||||||
|
set.addAll(asArray(1, 2, 3));
|
||||||
|
set.clear();
|
||||||
|
assertEquals(0, set.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRelease() {
|
||||||
|
set.addAll(asArray(1, 2, 3));
|
||||||
|
set.release();
|
||||||
|
assertEquals(0, set.size());
|
||||||
|
set.addAll(asArray(1, 2, 3));
|
||||||
|
assertEquals(3, set.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIterable() {
|
||||||
|
set.addAll(asArray(1, 2, 2, 3, 4));
|
||||||
|
set.remove(key2);
|
||||||
|
assertEquals(3, set.size());
|
||||||
|
|
||||||
|
int count = 0;
|
||||||
|
for (CharCursor cursor : set) {
|
||||||
|
count++;
|
||||||
|
assertTrue(set.contains(cursor.value));
|
||||||
|
}
|
||||||
|
assertEquals(count, set.size());
|
||||||
|
|
||||||
|
set.clear();
|
||||||
|
assertFalse(set.iterator().hasNext());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Runs random insertions/deletions/clearing and compares the results against {@link HashSet}. */
|
||||||
|
@Test
|
||||||
|
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||||
|
public void testAgainstHashSet() {
|
||||||
|
final Random rnd = RandomizedTest.getRandom();
|
||||||
|
final HashSet other = new HashSet();
|
||||||
|
|
||||||
|
for (int size = 1000; size < 20000; size += 4000) {
|
||||||
|
other.clear();
|
||||||
|
set.clear();
|
||||||
|
|
||||||
|
for (int round = 0; round < size * 20; round++) {
|
||||||
|
char key = cast(rnd.nextInt(size));
|
||||||
|
if (rnd.nextInt(50) == 0) {
|
||||||
|
key = EMPTY_KEY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rnd.nextBoolean()) {
|
||||||
|
if (rnd.nextBoolean()) {
|
||||||
|
int index = set.indexOf(key);
|
||||||
|
if (set.indexExists(index)) {
|
||||||
|
set.indexReplace(index, key);
|
||||||
|
} else {
|
||||||
|
set.indexInsert(index, key);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
set.add(key);
|
||||||
|
}
|
||||||
|
other.add(key);
|
||||||
|
|
||||||
|
assertTrue(set.contains(key));
|
||||||
|
assertTrue(set.indexExists(set.indexOf(key)));
|
||||||
|
} else {
|
||||||
|
assertEquals(other.contains(key), set.contains(key));
|
||||||
|
boolean removed;
|
||||||
|
if (set.contains(key) && rnd.nextBoolean()) {
|
||||||
|
set.indexRemove(set.indexOf(key));
|
||||||
|
removed = true;
|
||||||
|
} else {
|
||||||
|
removed = set.remove(key);
|
||||||
|
}
|
||||||
|
assertEquals(other.remove(key), removed);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(other.size(), set.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHashCodeEquals() {
|
||||||
|
CharHashSet l0 = new CharHashSet();
|
||||||
|
assertEquals(0, l0.hashCode());
|
||||||
|
assertEquals(l0, new CharHashSet());
|
||||||
|
|
||||||
|
CharHashSet l1 = CharHashSet.from(key1, key2, key3);
|
||||||
|
CharHashSet l2 = CharHashSet.from(key1, key2);
|
||||||
|
l2.add(key3);
|
||||||
|
|
||||||
|
assertEquals(l1.hashCode(), l2.hashCode());
|
||||||
|
assertEquals(l1, l2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testClone() {
|
||||||
|
this.set.addAll(asArray(1, 2, 3));
|
||||||
|
|
||||||
|
CharHashSet cloned = set.clone();
|
||||||
|
cloned.remove(key1);
|
||||||
|
|
||||||
|
assertSortedListEquals(set.toArray(), asArray(1, 2, 3));
|
||||||
|
assertSortedListEquals(cloned.toArray(), asArray(2, 3));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEqualsSameClass() {
|
||||||
|
CharHashSet l1 = CharHashSet.from(key1, key2, key3);
|
||||||
|
CharHashSet l2 = CharHashSet.from(key1, key2, key3);
|
||||||
|
CharHashSet l3 = CharHashSet.from(key1, key2, key4);
|
||||||
|
|
||||||
|
MatcherAssert.assertThat(l1, is(equalTo(l2)));
|
||||||
|
MatcherAssert.assertThat(l1.hashCode(), is(equalTo(l2.hashCode())));
|
||||||
|
MatcherAssert.assertThat(l1, is(not(equalTo(l3))));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEqualsSubClass() {
|
||||||
|
class Sub extends CharHashSet {}
|
||||||
|
;
|
||||||
|
|
||||||
|
CharHashSet l1 = CharHashSet.from(key1, key2, key3);
|
||||||
|
CharHashSet l2 = new Sub();
|
||||||
|
CharHashSet l3 = new Sub();
|
||||||
|
l2.addAll(l1);
|
||||||
|
l3.addAll(l1);
|
||||||
|
|
||||||
|
MatcherAssert.assertThat(l2, is(equalTo(l3)));
|
||||||
|
MatcherAssert.assertThat(l1, is(not(equalTo(l2))));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int randomIntBetween(int min, int max) {
|
||||||
|
return min + random().nextInt(max + 1 - min);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Set<Character> set(char... elements) {
|
||||||
|
Set<Character> set = new HashSet<>();
|
||||||
|
for (char element : elements) {
|
||||||
|
set.add(element);
|
||||||
|
}
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static char[] asArray(int... elements) {
|
||||||
|
char[] result = new char[elements.length];
|
||||||
|
for (int i = 0; i < elements.length; i++) {
|
||||||
|
result[i] = cast(elements[i]);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Check if the array's content is identical to a given sequence of elements. */
|
||||||
|
private static void assertSortedListEquals(char[] array, char[] elements) {
|
||||||
|
assertEquals(elements.length, array.length);
|
||||||
|
Arrays.sort(array);
|
||||||
|
assertArrayEquals(elements, array);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,671 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.util.hppc;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.RandomizedTest;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for {@link CharObjectHashMap}.
|
||||||
|
*
|
||||||
|
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.CharObjectHashMapTest
|
||||||
|
*
|
||||||
|
* <p>github: https://github.com/carrotsearch/hppc release: 0.9.0
|
||||||
|
*/
|
||||||
|
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||||
|
public class TestCharObjectHashMap extends LuceneTestCase {
|
||||||
|
/* Ready to use key values. */
|
||||||
|
|
||||||
|
private final char keyE = 0;
|
||||||
|
private final char key1 = cast(1);
|
||||||
|
private final char key2 = cast(2);
|
||||||
|
private final char key3 = cast(3);
|
||||||
|
private final char key4 = cast(4);
|
||||||
|
|
||||||
|
/** Convert to target type from an integer used to test stuff. */
|
||||||
|
private char cast(int v) {
|
||||||
|
return (char) ('a' + v);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a new array of a given type and copy the arguments to this array. */
|
||||||
|
private char[] newArray(char... elements) {
|
||||||
|
return elements;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int randomIntBetween(int min, int max) {
|
||||||
|
return min + random().nextInt(max + 1 - min);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Check if the array's content is identical to a given sequence of elements. */
|
||||||
|
private static void assertSortedListEquals(char[] array, char... elements) {
|
||||||
|
assertEquals(elements.length, array.length);
|
||||||
|
Arrays.sort(array);
|
||||||
|
Arrays.sort(elements);
|
||||||
|
assertArrayEquals(elements, array);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Check if the array's content is identical to a given sequence of elements. */
|
||||||
|
private static void assertSortedListEquals(Object[] array, Object... elements) {
|
||||||
|
assertEquals(elements.length, array.length);
|
||||||
|
Arrays.sort(array);
|
||||||
|
assertArrayEquals(elements, array);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final int value0 = vcast(0);
|
||||||
|
private final int value1 = vcast(1);
|
||||||
|
private final int value2 = vcast(2);
|
||||||
|
private final int value3 = vcast(3);
|
||||||
|
private final int value4 = vcast(4);
|
||||||
|
|
||||||
|
/** Per-test fresh initialized instance. */
|
||||||
|
private CharObjectHashMap<Object> map = newInstance();
|
||||||
|
|
||||||
|
private CharObjectHashMap newInstance() {
|
||||||
|
return new CharObjectHashMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void checkEmptySlotsUninitialized() {
|
||||||
|
if (map != null) {
|
||||||
|
int occupied = 0;
|
||||||
|
for (int i = 0; i <= map.mask; i++) {
|
||||||
|
if (((map.keys[i]) == 0)) {
|
||||||
|
|
||||||
|
} else {
|
||||||
|
occupied++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals(occupied, map.assigned);
|
||||||
|
|
||||||
|
if (!map.hasEmptyKey) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Convert to target type from an integer used to test stuff. */
|
||||||
|
private int vcast(int value) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a new array of a given type and copy the arguments to this array. */
|
||||||
|
/* */
|
||||||
|
private Object[] newvArray(Object... elements) {
|
||||||
|
return elements;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertSameMap(
|
||||||
|
final CharObjectHashMap<Object> c1, final CharObjectHashMap<Object> c2) {
|
||||||
|
assertEquals(c1.size(), c2.size());
|
||||||
|
|
||||||
|
for (CharObjectHashMap.CharObjectCursor entry : c1) {
|
||||||
|
assertTrue(c2.containsKey(entry.key));
|
||||||
|
assertEquals(entry.value, c2.get(entry.key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testEnsureCapacity() {
|
||||||
|
final AtomicInteger expands = new AtomicInteger();
|
||||||
|
CharObjectHashMap map =
|
||||||
|
new CharObjectHashMap(0) {
|
||||||
|
@Override
|
||||||
|
protected void allocateBuffers(int arraySize) {
|
||||||
|
super.allocateBuffers(arraySize);
|
||||||
|
expands.incrementAndGet();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add some elements.
|
||||||
|
final int max = rarely() ? 0 : randomIntBetween(0, 250);
|
||||||
|
for (int i = 0; i < max; i++) {
|
||||||
|
map.put(cast(i), value0);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int additions = randomIntBetween(max, max + 5000);
|
||||||
|
map.ensureCapacity(additions + map.size());
|
||||||
|
final int before = expands.get();
|
||||||
|
for (int i = 0; i < additions; i++) {
|
||||||
|
map.put(cast(i), value0);
|
||||||
|
}
|
||||||
|
assertEquals(before, expands.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCursorIndexIsValid() {
|
||||||
|
map.put(keyE, value1);
|
||||||
|
map.put(key1, value2);
|
||||||
|
map.put(key2, value3);
|
||||||
|
|
||||||
|
for (CharObjectHashMap.CharObjectCursor c : map) {
|
||||||
|
assertTrue(map.indexExists(c.index));
|
||||||
|
assertEquals(c.value, map.indexGet(c.index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndexMethods() {
|
||||||
|
map.put(keyE, value1);
|
||||||
|
map.put(key1, value2);
|
||||||
|
|
||||||
|
assertTrue(map.indexOf(keyE) >= 0);
|
||||||
|
assertTrue(map.indexOf(key1) >= 0);
|
||||||
|
assertTrue(map.indexOf(key2) < 0);
|
||||||
|
|
||||||
|
assertTrue(map.indexExists(map.indexOf(keyE)));
|
||||||
|
assertTrue(map.indexExists(map.indexOf(key1)));
|
||||||
|
assertFalse(map.indexExists(map.indexOf(key2)));
|
||||||
|
|
||||||
|
assertEquals(value1, map.indexGet(map.indexOf(keyE)));
|
||||||
|
assertEquals(value2, map.indexGet(map.indexOf(key1)));
|
||||||
|
|
||||||
|
expectThrows(
|
||||||
|
AssertionError.class,
|
||||||
|
() -> {
|
||||||
|
map.indexGet(map.indexOf(key2));
|
||||||
|
});
|
||||||
|
|
||||||
|
assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3));
|
||||||
|
assertEquals(value2, map.indexReplace(map.indexOf(key1), value4));
|
||||||
|
assertEquals(value3, map.indexGet(map.indexOf(keyE)));
|
||||||
|
assertEquals(value4, map.indexGet(map.indexOf(key1)));
|
||||||
|
|
||||||
|
map.indexInsert(map.indexOf(key2), key2, value1);
|
||||||
|
assertEquals(value1, map.indexGet(map.indexOf(key2)));
|
||||||
|
assertEquals(3, map.size());
|
||||||
|
|
||||||
|
assertEquals(value3, map.indexRemove(map.indexOf(keyE)));
|
||||||
|
assertEquals(2, map.size());
|
||||||
|
assertEquals(value1, map.indexRemove(map.indexOf(key2)));
|
||||||
|
assertEquals(1, map.size());
|
||||||
|
assertTrue(map.indexOf(keyE) < 0);
|
||||||
|
assertTrue(map.indexOf(key1) >= 0);
|
||||||
|
assertTrue(map.indexOf(key2) < 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testCloningConstructor() {
|
||||||
|
map.put(key1, value1);
|
||||||
|
map.put(key2, value2);
|
||||||
|
map.put(key3, value3);
|
||||||
|
|
||||||
|
assertSameMap(map, new CharObjectHashMap(map));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testFromArrays() {
|
||||||
|
map.put(key1, value1);
|
||||||
|
map.put(key2, value2);
|
||||||
|
map.put(key3, value3);
|
||||||
|
|
||||||
|
CharObjectHashMap map2 =
|
||||||
|
CharObjectHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3));
|
||||||
|
|
||||||
|
assertSameMap(map, map2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetOrDefault() {
|
||||||
|
map.put(key2, value2);
|
||||||
|
assertTrue(map.containsKey(key2));
|
||||||
|
|
||||||
|
map.put(key1, value1);
|
||||||
|
assertEquals(value1, map.getOrDefault(key1, value3));
|
||||||
|
assertEquals(value3, map.getOrDefault(key3, value3));
|
||||||
|
map.remove(key1);
|
||||||
|
assertEquals(value3, map.getOrDefault(key1, value3));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testPut() {
|
||||||
|
map.put(key1, value1);
|
||||||
|
|
||||||
|
assertTrue(map.containsKey(key1));
|
||||||
|
assertEquals(value1, map.get(key1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testNullValue() {
|
||||||
|
map.put(key1, null);
|
||||||
|
|
||||||
|
assertTrue(map.containsKey(key1));
|
||||||
|
assertNull(map.get(key1));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPutOverExistingKey() {
|
||||||
|
map.put(key1, value1);
|
||||||
|
assertEquals(value1, map.put(key1, value3));
|
||||||
|
assertEquals(value3, map.get(key1));
|
||||||
|
|
||||||
|
assertEquals(value3, map.put(key1, null));
|
||||||
|
assertTrue(map.containsKey(key1));
|
||||||
|
assertNull(map.get(key1));
|
||||||
|
|
||||||
|
assertNull(map.put(key1, value1));
|
||||||
|
assertEquals(value1, map.get(key1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testPutWithExpansions() {
|
||||||
|
final int COUNT = 10000;
|
||||||
|
final Random rnd = new Random(random().nextLong());
|
||||||
|
final HashSet<Object> values = new HashSet<Object>();
|
||||||
|
|
||||||
|
for (int i = 0; i < COUNT; i++) {
|
||||||
|
final int v = rnd.nextInt();
|
||||||
|
final boolean hadKey = values.contains(cast(v));
|
||||||
|
values.add(cast(v));
|
||||||
|
|
||||||
|
assertEquals(hadKey, map.containsKey(cast(v)));
|
||||||
|
map.put(cast(v), vcast(v));
|
||||||
|
assertEquals(values.size(), map.size());
|
||||||
|
}
|
||||||
|
assertEquals(values.size(), map.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testPutAll() {
|
||||||
|
map.put(key1, value1);
|
||||||
|
map.put(key2, value1);
|
||||||
|
|
||||||
|
CharObjectHashMap map2 = newInstance();
|
||||||
|
|
||||||
|
map2.put(key2, value2);
|
||||||
|
map2.put(keyE, value1);
|
||||||
|
|
||||||
|
// One new key (keyE).
|
||||||
|
assertEquals(1, map.putAll(map2));
|
||||||
|
|
||||||
|
// Assert the value under key2 has been replaced.
|
||||||
|
assertEquals(value2, map.get(key2));
|
||||||
|
|
||||||
|
// And key3 has been added.
|
||||||
|
assertEquals(value1, map.get(keyE));
|
||||||
|
assertEquals(3, map.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testPutIfAbsent() {
|
||||||
|
assertTrue(map.putIfAbsent(key1, value1));
|
||||||
|
assertFalse(map.putIfAbsent(key1, value2));
|
||||||
|
assertEquals(value1, map.get(key1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testRemove() {
|
||||||
|
map.put(key1, value1);
|
||||||
|
assertEquals(value1, map.remove(key1));
|
||||||
|
assertEquals(null, map.remove(key1));
|
||||||
|
assertEquals(0, map.size());
|
||||||
|
|
||||||
|
// These are internals, but perhaps worth asserting too.
|
||||||
|
assertEquals(0, map.assigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testEmptyKey() {
|
||||||
|
final char empty = 0;
|
||||||
|
|
||||||
|
map.put(empty, value1);
|
||||||
|
assertEquals(1, map.size());
|
||||||
|
assertEquals(false, map.isEmpty());
|
||||||
|
assertEquals(value1, map.get(empty));
|
||||||
|
assertEquals(value1, map.getOrDefault(empty, value2));
|
||||||
|
assertEquals(true, map.iterator().hasNext());
|
||||||
|
assertEquals(empty, map.iterator().next().key);
|
||||||
|
assertEquals(value1, map.iterator().next().value);
|
||||||
|
|
||||||
|
map.remove(empty);
|
||||||
|
assertEquals(null, map.get(empty));
|
||||||
|
assertEquals(0, map.size());
|
||||||
|
|
||||||
|
map.put(empty, null);
|
||||||
|
assertEquals(1, map.size());
|
||||||
|
assertTrue(map.containsKey(empty));
|
||||||
|
assertNull(map.get(empty));
|
||||||
|
|
||||||
|
map.remove(empty);
|
||||||
|
assertEquals(0, map.size());
|
||||||
|
assertFalse(map.containsKey(empty));
|
||||||
|
assertNull(map.get(empty));
|
||||||
|
|
||||||
|
assertEquals(null, map.put(empty, value1));
|
||||||
|
assertEquals(value1, map.put(empty, value2));
|
||||||
|
map.clear();
|
||||||
|
assertFalse(map.indexExists(map.indexOf(empty)));
|
||||||
|
assertEquals(null, map.put(empty, value1));
|
||||||
|
map.clear();
|
||||||
|
assertEquals(null, map.remove(empty));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testMapKeySet() {
|
||||||
|
map.put(key1, value3);
|
||||||
|
map.put(key2, value2);
|
||||||
|
map.put(key3, value1);
|
||||||
|
|
||||||
|
assertSortedListEquals(map.keys().toArray(), key1, key2, key3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testMapKeySetIterator() {
|
||||||
|
map.put(key1, value3);
|
||||||
|
map.put(key2, value2);
|
||||||
|
map.put(key3, value1);
|
||||||
|
|
||||||
|
int counted = 0;
|
||||||
|
for (CharCursor c : map.keys()) {
|
||||||
|
assertEquals(map.keys[c.index], c.value);
|
||||||
|
counted++;
|
||||||
|
}
|
||||||
|
assertEquals(counted, map.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testClear() {
|
||||||
|
map.put(key1, value1);
|
||||||
|
map.put(key2, value1);
|
||||||
|
map.clear();
|
||||||
|
assertEquals(0, map.size());
|
||||||
|
|
||||||
|
// These are internals, but perhaps worth asserting too.
|
||||||
|
assertEquals(0, map.assigned);
|
||||||
|
|
||||||
|
// Check values are cleared.
|
||||||
|
assertEquals(null, map.put(key1, value1));
|
||||||
|
assertEquals(null, map.remove(key2));
|
||||||
|
map.clear();
|
||||||
|
|
||||||
|
// Check if the map behaves properly upon subsequent use.
|
||||||
|
testPutWithExpansions();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testRelease() {
|
||||||
|
map.put(key1, value1);
|
||||||
|
map.put(key2, value1);
|
||||||
|
map.release();
|
||||||
|
assertEquals(0, map.size());
|
||||||
|
|
||||||
|
// These are internals, but perhaps worth asserting too.
|
||||||
|
assertEquals(0, map.assigned);
|
||||||
|
|
||||||
|
// Check if the map behaves properly upon subsequent use.
|
||||||
|
testPutWithExpansions();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testIterable() {
|
||||||
|
map.put(key1, value1);
|
||||||
|
map.put(key2, value2);
|
||||||
|
map.put(key3, value3);
|
||||||
|
map.remove(key2);
|
||||||
|
|
||||||
|
int count = 0;
|
||||||
|
for (CharObjectHashMap.CharObjectCursor cursor : map) {
|
||||||
|
count++;
|
||||||
|
assertTrue(map.containsKey(cursor.key));
|
||||||
|
assertEquals(cursor.value, map.get(cursor.key));
|
||||||
|
|
||||||
|
assertEquals(cursor.value, map.values[cursor.index]);
|
||||||
|
assertEquals(cursor.key, map.keys[cursor.index]);
|
||||||
|
}
|
||||||
|
assertEquals(count, map.size());
|
||||||
|
|
||||||
|
map.clear();
|
||||||
|
assertFalse(map.iterator().hasNext());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testBug_HPPC73_FullCapacityGet() {
|
||||||
|
final AtomicInteger reallocations = new AtomicInteger();
|
||||||
|
final int elements = 0x7F;
|
||||||
|
map =
|
||||||
|
new CharObjectHashMap(elements, 1f) {
|
||||||
|
@Override
|
||||||
|
protected double verifyLoadFactor(double loadFactor) {
|
||||||
|
// Skip load factor sanity range checking.
|
||||||
|
return loadFactor;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void allocateBuffers(int arraySize) {
|
||||||
|
super.allocateBuffers(arraySize);
|
||||||
|
reallocations.incrementAndGet();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int reallocationsBefore = reallocations.get();
|
||||||
|
assertEquals(reallocationsBefore, 1);
|
||||||
|
for (int i = 1; i <= elements; i++) {
|
||||||
|
map.put(cast(i), value1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-existent key.
|
||||||
|
char outOfSet = cast(elements + 1);
|
||||||
|
map.remove(outOfSet);
|
||||||
|
assertFalse(map.containsKey(outOfSet));
|
||||||
|
assertEquals(reallocationsBefore, reallocations.get());
|
||||||
|
|
||||||
|
// Should not expand because we're replacing an existing element.
|
||||||
|
map.put(key1, value2);
|
||||||
|
assertEquals(reallocationsBefore, reallocations.get());
|
||||||
|
|
||||||
|
// Remove from a full map.
|
||||||
|
map.remove(key1);
|
||||||
|
assertEquals(reallocationsBefore, reallocations.get());
|
||||||
|
map.put(key1, value2);
|
||||||
|
|
||||||
|
// Check expand on "last slot of a full map" condition.
|
||||||
|
map.put(outOfSet, value1);
|
||||||
|
assertEquals(reallocationsBefore + 1, reallocations.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHashCodeEquals() {
|
||||||
|
CharObjectHashMap l0 = newInstance();
|
||||||
|
assertEquals(0, l0.hashCode());
|
||||||
|
assertEquals(l0, newInstance());
|
||||||
|
|
||||||
|
CharObjectHashMap l1 =
|
||||||
|
CharObjectHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3));
|
||||||
|
|
||||||
|
CharObjectHashMap l2 =
|
||||||
|
CharObjectHashMap.from(newArray(key2, key1, key3), newvArray(value2, value1, value3));
|
||||||
|
|
||||||
|
CharObjectHashMap l3 = CharObjectHashMap.from(newArray(key1, key2), newvArray(value2, value1));
|
||||||
|
|
||||||
|
assertEquals(l1.hashCode(), l2.hashCode());
|
||||||
|
assertEquals(l1, l2);
|
||||||
|
|
||||||
|
assertFalse(l1.equals(l3));
|
||||||
|
assertFalse(l2.equals(l3));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBug_HPPC37() {
|
||||||
|
CharObjectHashMap l1 = CharObjectHashMap.from(newArray(key1), newvArray(value1));
|
||||||
|
|
||||||
|
CharObjectHashMap l2 = CharObjectHashMap.from(newArray(key2), newvArray(value1));
|
||||||
|
|
||||||
|
assertFalse(l1.equals(l2));
|
||||||
|
assertFalse(l2.equals(l1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */
|
||||||
|
@Test
|
||||||
|
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||||
|
public void testAgainstHashMap() {
|
||||||
|
final Random rnd = RandomizedTest.getRandom();
|
||||||
|
final HashMap other = new HashMap();
|
||||||
|
|
||||||
|
for (int size = 1000; size < 20000; size += 4000) {
|
||||||
|
other.clear();
|
||||||
|
map.clear();
|
||||||
|
|
||||||
|
for (int round = 0; round < size * 20; round++) {
|
||||||
|
char key = cast(rnd.nextInt(size));
|
||||||
|
if (rnd.nextInt(50) == 0) {
|
||||||
|
key = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int value = vcast(rnd.nextInt());
|
||||||
|
|
||||||
|
if (rnd.nextBoolean()) {
|
||||||
|
Object previousValue;
|
||||||
|
if (rnd.nextBoolean()) {
|
||||||
|
int index = map.indexOf(key);
|
||||||
|
if (map.indexExists(index)) {
|
||||||
|
previousValue = map.indexReplace(index, value);
|
||||||
|
} else {
|
||||||
|
map.indexInsert(index, key, value);
|
||||||
|
previousValue = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
previousValue = map.put(key, value);
|
||||||
|
}
|
||||||
|
assertEquals(other.put(key, value), previousValue);
|
||||||
|
|
||||||
|
assertEquals(value, map.get(key));
|
||||||
|
assertEquals(value, map.indexGet(map.indexOf(key)));
|
||||||
|
assertTrue(map.containsKey(key));
|
||||||
|
assertTrue(map.indexExists(map.indexOf(key)));
|
||||||
|
} else {
|
||||||
|
assertEquals(other.containsKey(key), map.containsKey(key));
|
||||||
|
Object previousValue =
|
||||||
|
map.containsKey(key) && rnd.nextBoolean()
|
||||||
|
? map.indexRemove(map.indexOf(key))
|
||||||
|
: map.remove(key);
|
||||||
|
assertEquals(other.remove(key), previousValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(other.size(), map.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testClone() {
|
||||||
|
this.map.put(key1, value1);
|
||||||
|
this.map.put(key2, value2);
|
||||||
|
this.map.put(key3, value3);
|
||||||
|
|
||||||
|
CharObjectHashMap cloned = map.clone();
|
||||||
|
cloned.remove(key1);
|
||||||
|
|
||||||
|
assertSortedListEquals(map.keys().toArray(), key1, key2, key3);
|
||||||
|
assertSortedListEquals(cloned.keys().toArray(), key2, key3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testMapValues() {
|
||||||
|
map.put(key1, value3);
|
||||||
|
map.put(key2, value2);
|
||||||
|
map.put(key3, value1);
|
||||||
|
assertSortedListEquals(map.values().toArray(), value1, value2, value3);
|
||||||
|
|
||||||
|
map.clear();
|
||||||
|
map.put(key1, value1);
|
||||||
|
map.put(key2, value2);
|
||||||
|
map.put(key3, value2);
|
||||||
|
assertSortedListEquals(map.values().toArray(), value1, value2, value2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testMapValuesIterator() {
|
||||||
|
map.put(key1, value3);
|
||||||
|
map.put(key2, value2);
|
||||||
|
map.put(key3, value1);
|
||||||
|
|
||||||
|
int counted = 0;
|
||||||
|
for (ObjectCursor c : map.values()) {
|
||||||
|
assertEquals(map.values[c.index], c.value);
|
||||||
|
counted++;
|
||||||
|
}
|
||||||
|
assertEquals(counted, map.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testEqualsSameClass() {
|
||||||
|
CharObjectHashMap l1 = newInstance();
|
||||||
|
l1.put(key1, value0);
|
||||||
|
l1.put(key2, value1);
|
||||||
|
l1.put(key3, value2);
|
||||||
|
|
||||||
|
CharObjectHashMap l2 = new CharObjectHashMap(l1);
|
||||||
|
l2.putAll(l1);
|
||||||
|
|
||||||
|
CharObjectHashMap l3 = new CharObjectHashMap(l2);
|
||||||
|
l3.putAll(l2);
|
||||||
|
l3.put(key4, value0);
|
||||||
|
|
||||||
|
assertEquals(l2, l1);
|
||||||
|
assertEquals(l2.hashCode(), l1.hashCode());
|
||||||
|
assertNotEquals(l1, l3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
@Test
|
||||||
|
public void testEqualsSubClass() {
|
||||||
|
class Sub extends CharObjectHashMap {}
|
||||||
|
|
||||||
|
CharObjectHashMap l1 = newInstance();
|
||||||
|
l1.put(key1, value0);
|
||||||
|
l1.put(key2, value1);
|
||||||
|
l1.put(key3, value2);
|
||||||
|
|
||||||
|
CharObjectHashMap l2 = new Sub();
|
||||||
|
l2.putAll(l1);
|
||||||
|
l2.put(key4, value3);
|
||||||
|
|
||||||
|
CharObjectHashMap l3 = new Sub();
|
||||||
|
l3.putAll(l2);
|
||||||
|
|
||||||
|
assertNotEquals(l1, l2);
|
||||||
|
assertEquals(l3.hashCode(), l2.hashCode());
|
||||||
|
assertEquals(l3, l2);
|
||||||
|
}
|
||||||
|
}
|
|
@ -215,7 +215,7 @@ public class TestIntHashSet extends LuceneTestCase {
|
||||||
public void testAddVarArgs() {
|
public void testAddVarArgs() {
|
||||||
set.addAll(asArray(0, 1, 2, 1, 0));
|
set.addAll(asArray(0, 1, 2, 1, 0));
|
||||||
assertEquals(3, set.size());
|
assertEquals(3, set.size());
|
||||||
assertSortedListEquals(set.toArray(), 0, 1, 2);
|
assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -228,7 +228,7 @@ public class TestIntHashSet extends LuceneTestCase {
|
||||||
assertEquals(0, set.addAll(set2));
|
assertEquals(0, set.addAll(set2));
|
||||||
|
|
||||||
assertEquals(3, set.size());
|
assertEquals(3, set.size());
|
||||||
assertSortedListEquals(set.toArray(), 0, 1, 2);
|
assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -238,7 +238,7 @@ public class TestIntHashSet extends LuceneTestCase {
|
||||||
assertTrue(set.remove(key2));
|
assertTrue(set.remove(key2));
|
||||||
assertFalse(set.remove(key2));
|
assertFalse(set.remove(key2));
|
||||||
assertEquals(4, set.size());
|
assertEquals(4, set.size());
|
||||||
assertSortedListEquals(set.toArray(), 0, 1, 3, 4);
|
assertSortedListEquals(set.toArray(), asArray(0, 1, 3, 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -308,7 +308,7 @@ public class TestIntHashSet extends LuceneTestCase {
|
||||||
|
|
||||||
assertEquals(2, set.removeAll(list2));
|
assertEquals(2, set.removeAll(list2));
|
||||||
assertEquals(3, set.size());
|
assertEquals(3, set.size());
|
||||||
assertSortedListEquals(set.toArray(), 0, 2, 4);
|
assertSortedListEquals(set.toArray(), asArray(0, 2, 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -409,13 +409,13 @@ public class TestIntHashSet extends LuceneTestCase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testClone() {
|
public void testClone() {
|
||||||
this.set.addAll(key1, key2, key3);
|
this.set.addAll(asArray(1, 2, 3));
|
||||||
|
|
||||||
IntHashSet cloned = set.clone();
|
IntHashSet cloned = set.clone();
|
||||||
cloned.remove(key1);
|
cloned.remove(key1);
|
||||||
|
|
||||||
assertSortedListEquals(set.toArray(), key1, key2, key3);
|
assertSortedListEquals(set.toArray(), asArray(1, 2, 3));
|
||||||
assertSortedListEquals(cloned.toArray(), key2, key3);
|
assertSortedListEquals(cloned.toArray(), asArray(2, 3));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -461,7 +461,7 @@ public class TestIntHashSet extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Check if the array's content is identical to a given sequence of elements. */
|
/** Check if the array's content is identical to a given sequence of elements. */
|
||||||
private static void assertSortedListEquals(int[] array, int... elements) {
|
private static void assertSortedListEquals(int[] array, int[] elements) {
|
||||||
assertEquals(elements.length, array.length);
|
assertEquals(elements.length, array.length);
|
||||||
Arrays.sort(array);
|
Arrays.sort(array);
|
||||||
assertArrayEquals(elements, array);
|
assertArrayEquals(elements, array);
|
||||||
|
|
|
@ -210,7 +210,7 @@ public class TestLongHashSet extends LuceneTestCase {
|
||||||
public void testAddVarArgs() {
|
public void testAddVarArgs() {
|
||||||
set.addAll(asArray(0, 1, 2, 1, 0));
|
set.addAll(asArray(0, 1, 2, 1, 0));
|
||||||
assertEquals(3, set.size());
|
assertEquals(3, set.size());
|
||||||
assertSortedListEquals(set.toArray(), 0, 1, 2);
|
assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -223,7 +223,7 @@ public class TestLongHashSet extends LuceneTestCase {
|
||||||
assertEquals(0, set.addAll(set2));
|
assertEquals(0, set.addAll(set2));
|
||||||
|
|
||||||
assertEquals(3, set.size());
|
assertEquals(3, set.size());
|
||||||
assertSortedListEquals(set.toArray(), 0, 1, 2);
|
assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -233,7 +233,7 @@ public class TestLongHashSet extends LuceneTestCase {
|
||||||
assertTrue(set.remove(key2));
|
assertTrue(set.remove(key2));
|
||||||
assertFalse(set.remove(key2));
|
assertFalse(set.remove(key2));
|
||||||
assertEquals(4, set.size());
|
assertEquals(4, set.size());
|
||||||
assertSortedListEquals(set.toArray(), 0, 1, 3, 4);
|
assertSortedListEquals(set.toArray(), asArray(0, 1, 3, 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -303,7 +303,7 @@ public class TestLongHashSet extends LuceneTestCase {
|
||||||
|
|
||||||
assertEquals(2, set.removeAll(list2));
|
assertEquals(2, set.removeAll(list2));
|
||||||
assertEquals(3, set.size());
|
assertEquals(3, set.size());
|
||||||
assertSortedListEquals(set.toArray(), 0, 2, 4);
|
assertSortedListEquals(set.toArray(), asArray(0, 2, 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -404,13 +404,13 @@ public class TestLongHashSet extends LuceneTestCase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testClone() {
|
public void testClone() {
|
||||||
this.set.addAll(key1, key2, key3);
|
this.set.addAll(asArray(1, 2, 3));
|
||||||
|
|
||||||
LongHashSet cloned = set.clone();
|
LongHashSet cloned = set.clone();
|
||||||
cloned.remove(key1);
|
cloned.remove(key1);
|
||||||
|
|
||||||
assertSortedListEquals(set.toArray(), key1, key2, key3);
|
assertSortedListEquals(set.toArray(), asArray(1, 2, 3));
|
||||||
assertSortedListEquals(cloned.toArray(), key2, key3);
|
assertSortedListEquals(cloned.toArray(), asArray(2, 3));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -456,7 +456,7 @@ public class TestLongHashSet extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Check if the array's content is identical to a given sequence of elements. */
|
/** Check if the array's content is identical to a given sequence of elements. */
|
||||||
private static void assertSortedListEquals(long[] array, long... elements) {
|
private static void assertSortedListEquals(long[] array, long[] elements) {
|
||||||
assertEquals(elements.length, array.length);
|
assertEquals(elements.length, array.length);
|
||||||
Arrays.sort(array);
|
Arrays.sort(array);
|
||||||
assertArrayEquals(elements, array);
|
assertArrayEquals(elements, array);
|
||||||
|
|
|
@ -331,7 +331,7 @@ public class TestLongIntHashMap extends LuceneTestCase {
|
||||||
/* */
|
/* */
|
||||||
@Test
|
@Test
|
||||||
public void testEmptyKey() {
|
public void testEmptyKey() {
|
||||||
final int empty = 0;
|
final long empty = 0;
|
||||||
|
|
||||||
map.put(empty, value1);
|
map.put(empty, value1);
|
||||||
assertEquals(1, map.size());
|
assertEquals(1, map.size());
|
||||||
|
|
|
@ -335,7 +335,7 @@ public class TestLongObjectHashMap extends LuceneTestCase {
|
||||||
/* */
|
/* */
|
||||||
@Test
|
@Test
|
||||||
public void testEmptyKey() {
|
public void testEmptyKey() {
|
||||||
final int empty = 0;
|
final long empty = 0;
|
||||||
|
|
||||||
map.put(empty, value1);
|
map.put(empty, value1);
|
||||||
assertEquals(1, map.size());
|
assertEquals(1, map.size());
|
||||||
|
|
|
@ -150,9 +150,9 @@ public class StringValueFacetCounts extends Facets {
|
||||||
List<LabelAndValue> labelValues = new ArrayList<>();
|
List<LabelAndValue> labelValues = new ArrayList<>();
|
||||||
|
|
||||||
if (sparseCounts != null) {
|
if (sparseCounts != null) {
|
||||||
for (IntIntCursor cursor : sparseCounts) {
|
for (IntIntCursor sparseCount : sparseCounts) {
|
||||||
int count = cursor.value;
|
int count = sparseCount.value;
|
||||||
final BytesRef term = docValues.lookupOrd(cursor.key);
|
final BytesRef term = docValues.lookupOrd(sparseCount.key);
|
||||||
labelValues.add(new LabelAndValue(term.utf8ToString(), count));
|
labelValues.add(new LabelAndValue(term.utf8ToString(), count));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -186,10 +186,10 @@ public class StringValueFacetCounts extends Facets {
|
||||||
int childCount = 0; // total number of labels with non-zero count
|
int childCount = 0; // total number of labels with non-zero count
|
||||||
|
|
||||||
if (sparseCounts != null) {
|
if (sparseCounts != null) {
|
||||||
for (IntIntCursor cursor : sparseCounts) {
|
for (IntIntCursor sparseCount : sparseCounts) {
|
||||||
childCount++; // every count in sparseValues should be non-zero
|
childCount++; // every count in sparseValues should be non-zero
|
||||||
int ord = cursor.key;
|
int ord = sparseCount.key;
|
||||||
int count = cursor.value;
|
int count = sparseCount.value;
|
||||||
if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
|
if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
|
||||||
if (q == null) {
|
if (q == null) {
|
||||||
// Lazy init for sparse case:
|
// Lazy init for sparse case:
|
||||||
|
|
|
@ -16,9 +16,9 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.search.vectorhighlight;
|
package org.apache.lucene.search.vectorhighlight;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Iterator;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import org.apache.lucene.util.hppc.CharHashSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple boundary scanner implementation that divides fragments based on a set of separator
|
* Simple boundary scanner implementation that divides fragments based on a set of separator
|
||||||
|
@ -27,10 +27,10 @@ import java.util.Set;
|
||||||
public class SimpleBoundaryScanner implements BoundaryScanner {
|
public class SimpleBoundaryScanner implements BoundaryScanner {
|
||||||
|
|
||||||
public static final int DEFAULT_MAX_SCAN = 20;
|
public static final int DEFAULT_MAX_SCAN = 20;
|
||||||
public static final Character[] DEFAULT_BOUNDARY_CHARS = {'.', ',', '!', '?', ' ', '\t', '\n'};
|
public static final char[] DEFAULT_BOUNDARY_CHARS = {'.', ',', '!', '?', ' ', '\t', '\n'};
|
||||||
|
|
||||||
protected int maxScan;
|
protected int maxScan;
|
||||||
protected Set<Character> boundaryChars;
|
protected CharHashSet boundaryChars;
|
||||||
|
|
||||||
public SimpleBoundaryScanner() {
|
public SimpleBoundaryScanner() {
|
||||||
this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS);
|
this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS);
|
||||||
|
@ -44,15 +44,34 @@ public class SimpleBoundaryScanner implements BoundaryScanner {
|
||||||
this(DEFAULT_MAX_SCAN, boundaryChars);
|
this(DEFAULT_MAX_SCAN, boundaryChars);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SimpleBoundaryScanner(int maxScan, Character[] boundaryChars) {
|
public SimpleBoundaryScanner(int maxScan, char[] boundaryChars) {
|
||||||
this.maxScan = maxScan;
|
this.maxScan = maxScan;
|
||||||
this.boundaryChars = new HashSet<>();
|
this.boundaryChars = CharHashSet.from(boundaryChars);
|
||||||
this.boundaryChars.addAll(Arrays.asList(boundaryChars));
|
}
|
||||||
|
|
||||||
|
public SimpleBoundaryScanner(int maxScan, Character[] boundaryChars) {
|
||||||
|
this(maxScan, toCharArray(boundaryChars));
|
||||||
}
|
}
|
||||||
|
|
||||||
public SimpleBoundaryScanner(int maxScan, Set<Character> boundaryChars) {
|
public SimpleBoundaryScanner(int maxScan, Set<Character> boundaryChars) {
|
||||||
this.maxScan = maxScan;
|
this(maxScan, toCharArray(boundaryChars));
|
||||||
this.boundaryChars = boundaryChars;
|
}
|
||||||
|
|
||||||
|
private static char[] toCharArray(Character[] characters) {
|
||||||
|
char[] chars = new char[characters.length];
|
||||||
|
for (int i = 0; i < characters.length; i++) {
|
||||||
|
chars[i] = characters[i];
|
||||||
|
}
|
||||||
|
return chars;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static char[] toCharArray(Set<Character> characters) {
|
||||||
|
Iterator<Character> iterator = characters.iterator();
|
||||||
|
char[] chars = new char[characters.size()];
|
||||||
|
for (int i = 0; i < chars.length; i++) {
|
||||||
|
chars[i] = iterator.next();
|
||||||
|
}
|
||||||
|
return chars;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue