Replace Map<Character> by CharObjectHashMap and Set<Character> by CharHashSet. (#13420)

Also optimize the character replacement in JapaneseKatakanaUppercaseFilter.
This commit is contained in:
Bruno Roustant 2024-05-27 08:44:23 +02:00 committed by GitHub
parent 444d4e7c42
commit 6e2a8fc9b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 2957 additions and 161 deletions

View File

@ -18,13 +18,13 @@ package org.apache.lucene.analysis.charfilter;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.Map;
import org.apache.lucene.analysis.CharFilter; // javadocs import org.apache.lucene.analysis.CharFilter; // javadocs
import org.apache.lucene.analysis.util.RollingCharBuffer; import org.apache.lucene.analysis.util.RollingCharBuffer;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.fst.CharSequenceOutputs; import org.apache.lucene.util.fst.CharSequenceOutputs;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.hppc.CharObjectHashMap;
/** /**
* Simplistic {@link CharFilter} that applies the mappings contained in a {@link NormalizeCharMap} * Simplistic {@link CharFilter} that applies the mappings contained in a {@link NormalizeCharMap}
@ -38,7 +38,7 @@ public class MappingCharFilter extends BaseCharFilter {
private final FST.BytesReader fstReader; private final FST.BytesReader fstReader;
private final RollingCharBuffer buffer = new RollingCharBuffer(); private final RollingCharBuffer buffer = new RollingCharBuffer();
private final FST.Arc<CharsRef> scratchArc = new FST.Arc<>(); private final FST.Arc<CharsRef> scratchArc = new FST.Arc<>();
private final Map<Character, FST.Arc<CharsRef>> cachedRootArcs; private final CharObjectHashMap<FST.Arc<CharsRef>> cachedRootArcs;
private CharsRef replacement; private CharsRef replacement;
private int replacementPointer; private int replacementPointer;
@ -96,7 +96,7 @@ public class MappingCharFilter extends BaseCharFilter {
final int firstCH = buffer.get(inputOff); final int firstCH = buffer.get(inputOff);
if (firstCH != -1) { if (firstCH != -1) {
FST.Arc<CharsRef> arc = cachedRootArcs.get(Character.valueOf((char) firstCH)); FST.Arc<CharsRef> arc = cachedRootArcs.get((char) firstCH);
if (arc != null) { if (arc != null) {
if (!FST.targetHasArcs(arc)) { if (!FST.targetHasArcs(arc)) {
// Fast pass for single character match: // Fast pass for single character match:

View File

@ -17,7 +17,6 @@
package org.apache.lucene.analysis.charfilter; package org.apache.lucene.analysis.charfilter;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
@ -27,6 +26,7 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.CharObjectHashMap;
// TODO: save/load? // TODO: save/load?
@ -37,7 +37,7 @@ import org.apache.lucene.util.fst.Util;
public class NormalizeCharMap { public class NormalizeCharMap {
final FST<CharsRef> map; final FST<CharsRef> map;
final Map<Character, FST.Arc<CharsRef>> cachedRootArcs = new HashMap<>(); final CharObjectHashMap<FST.Arc<CharsRef>> cachedRootArcs = new CharObjectHashMap<>();
// Use the builder to create: // Use the builder to create:
private NormalizeCharMap(FST<CharsRef> map) { private NormalizeCharMap(FST<CharsRef> map) {
@ -53,8 +53,7 @@ public class NormalizeCharMap {
while (true) { while (true) {
assert scratchArc.label() != FST.END_LABEL; assert scratchArc.label() != FST.END_LABEL;
cachedRootArcs.put( cachedRootArcs.put(
Character.valueOf((char) scratchArc.label()), (char) scratchArc.label(), new FST.Arc<CharsRef>().copyFrom(scratchArc));
new FST.Arc<CharsRef>().copyFrom(scratchArc));
if (scratchArc.isLast()) { if (scratchArc.isLast()) {
break; break;
} }

View File

@ -41,7 +41,6 @@ import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
@ -49,7 +48,6 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.lucene.analysis.hunspell.SortingStrategy.EntryAccumulator; import org.apache.lucene.analysis.hunspell.SortingStrategy.EntryAccumulator;
import org.apache.lucene.analysis.hunspell.SortingStrategy.EntrySupplier; import org.apache.lucene.analysis.hunspell.SortingStrategy.EntrySupplier;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -60,6 +58,7 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.IntSequenceOutputs; import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.CharHashSet;
import org.apache.lucene.util.hppc.IntArrayList; import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor; import org.apache.lucene.util.hppc.IntCursor;
@ -334,8 +333,8 @@ public class Dictionary {
throws IOException, ParseException { throws IOException, ParseException {
TreeMap<String, IntArrayList> prefixes = new TreeMap<>(); TreeMap<String, IntArrayList> prefixes = new TreeMap<>();
TreeMap<String, IntArrayList> suffixes = new TreeMap<>(); TreeMap<String, IntArrayList> suffixes = new TreeMap<>();
Set<Character> prefixContFlags = new HashSet<>(); CharHashSet prefixContFlags = new CharHashSet();
Set<Character> suffixContFlags = new HashSet<>(); CharHashSet suffixContFlags = new CharHashSet();
Map<String, Integer> seenPatterns = new HashMap<>(); Map<String, Integer> seenPatterns = new HashMap<>();
// zero condition -> 0 ord // zero condition -> 0 ord
@ -673,7 +672,7 @@ public class Dictionary {
*/ */
private void parseAffix( private void parseAffix(
TreeMap<String, IntArrayList> affixes, TreeMap<String, IntArrayList> affixes,
Set<Character> secondStageFlags, CharHashSet secondStageFlags,
String header, String header,
LineNumberReader reader, LineNumberReader reader,
AffixKind kind, AffixKind kind,
@ -1178,10 +1177,14 @@ public class Dictionary {
} }
char[] allNonSuggestibleFlags() { char[] allNonSuggestibleFlags() {
return Dictionary.toSortedCharArray( CharHashSet set = new CharHashSet(5);
Stream.of(HIDDEN_FLAG, noSuggest, forbiddenword, onlyincompound, subStandard) set.add(HIDDEN_FLAG);
.filter(c -> c != FLAG_UNSET) for (char c : new char[] {noSuggest, forbiddenword, onlyincompound, subStandard}) {
.collect(Collectors.toSet())); if (c != FLAG_UNSET) {
set.add(c);
}
}
return Dictionary.toSortedCharArray(set);
} }
private List<String> readMorphFields(String word, String unparsed) { private List<String> readMorphFields(String word, String unparsed) {
@ -1538,12 +1541,8 @@ public class Dictionary {
return reuse; return reuse;
} }
static char[] toSortedCharArray(Set<Character> set) { static char[] toSortedCharArray(CharHashSet set) {
char[] chars = new char[set.size()]; char[] chars = set.toArray();
int i = 0;
for (Character c : set) {
chars[i++] = c;
}
Arrays.sort(chars); Arrays.sort(chars);
return chars; return chars;
} }

View File

@ -42,6 +42,8 @@ import org.apache.lucene.analysis.hunspell.AffixedWord.Affix;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.IntsRefFSTEnum; import org.apache.lucene.util.fst.IntsRefFSTEnum;
import org.apache.lucene.util.hppc.CharHashSet;
import org.apache.lucene.util.hppc.CharObjectHashMap;
/** /**
* A utility class used for generating possible word forms by adding affixes to stems ({@link * A utility class used for generating possible word forms by adding affixes to stems ({@link
@ -50,7 +52,7 @@ import org.apache.lucene.util.fst.IntsRefFSTEnum;
*/ */
public class WordFormGenerator { public class WordFormGenerator {
private final Dictionary dictionary; private final Dictionary dictionary;
private final Map<Character, List<AffixEntry>> affixes = new HashMap<>(); private final CharObjectHashMap<List<AffixEntry>> affixes = new CharObjectHashMap<>();
private final Stemmer stemmer; private final Stemmer stemmer;
public WordFormGenerator(Dictionary dictionary) { public WordFormGenerator(Dictionary dictionary) {
@ -75,7 +77,15 @@ public class WordFormGenerator {
char flag = dictionary.affixData(id, AFFIX_FLAG); char flag = dictionary.affixData(id, AFFIX_FLAG);
var entry = var entry =
new AffixEntry(id, flag, kind, toString(kind, io.input), strip(id), condition(id)); new AffixEntry(id, flag, kind, toString(kind, io.input), strip(id), condition(id));
affixes.computeIfAbsent(flag, __ -> new ArrayList<>()).add(entry); List<AffixEntry> entries;
int index = affixes.indexOf(flag);
if (index < 0) {
entries = new ArrayList<>();
affixes.indexInsert(index, flag, entries);
} else {
entries = affixes.indexGet(index);
}
entries.add(entry);
} }
} }
} catch (IOException e) { } catch (IOException e) {
@ -162,11 +172,7 @@ public class WordFormGenerator {
} }
private static char[] deduplicate(char[] flags) { private static char[] deduplicate(char[] flags) {
Set<Character> set = new HashSet<>(); return toSortedCharArray(CharHashSet.from(flags));
for (char flag : flags) {
set.add(flag);
}
return toSortedCharArray(set);
} }
/** /**
@ -408,7 +414,7 @@ public class WordFormGenerator {
int innerSuffix) { int innerSuffix) {
String candidate = new String(word, offset, length); String candidate = new String(word, offset, length);
stemCounts.merge(candidate, 1, Integer::sum); stemCounts.merge(candidate, 1, Integer::sum);
Set<Character> flags = new LinkedHashSet<>(); CharHashSet flags = new CharHashSet();
if (outerPrefix >= 0) flags.add(dictionary.affixData(outerPrefix, AFFIX_FLAG)); if (outerPrefix >= 0) flags.add(dictionary.affixData(outerPrefix, AFFIX_FLAG));
if (innerPrefix >= 0) flags.add(dictionary.affixData(innerPrefix, AFFIX_FLAG)); if (innerPrefix >= 0) flags.add(dictionary.affixData(innerPrefix, AFFIX_FLAG));
if (outerSuffix >= 0) flags.add(dictionary.affixData(outerSuffix, AFFIX_FLAG)); if (outerSuffix >= 0) flags.add(dictionary.affixData(outerSuffix, AFFIX_FLAG));
@ -479,7 +485,7 @@ public class WordFormGenerator {
if (wordSet.contains(extra)) continue; if (wordSet.contains(extra)) continue;
if (forbidden.contains(extra) && dictionary.forbiddenword != FLAG_UNSET) { if (forbidden.contains(extra) && dictionary.forbiddenword != FLAG_UNSET) {
addEntry(toEdit, toAdd, extra, Set.of(dictionary.forbiddenword)); addEntry(toEdit, toAdd, extra, CharHashSet.from(dictionary.forbiddenword));
} else { } else {
extraGenerated.add(extra); extraGenerated.add(extra);
} }
@ -489,7 +495,7 @@ public class WordFormGenerator {
} }
private void addEntry( private void addEntry(
List<DictEntry> toEdit, List<DictEntry> toAdd, String stem, Set<Character> flags) { List<DictEntry> toEdit, List<DictEntry> toAdd, String stem, CharHashSet flags) {
String flagString = toFlagString(flags); String flagString = toFlagString(flags);
(existingStems.contains(stem) ? toEdit : toAdd).add(DictEntry.create(stem, flagString)); (existingStems.contains(stem) ? toEdit : toAdd).add(DictEntry.create(stem, flagString));
} }
@ -529,18 +535,20 @@ public class WordFormGenerator {
.flatMap(swc -> expansionCache.computeIfAbsent(swc, expandToWords).stream()); .flatMap(swc -> expansionCache.computeIfAbsent(swc, expandToWords).stream());
} }
private List<AffixedWord> expand(String stem, Set<Character> flagSet) { private List<AffixedWord> expand(String stem, CharHashSet flagSet) {
return getAllWordForms(stem, toFlagString(flagSet), checkCanceled); return getAllWordForms(stem, toFlagString(flagSet), checkCanceled);
} }
private String toFlagString(Set<Character> flagSet) { private String toFlagString(CharHashSet flagSet) {
return dictionary.flagParsingStrategy.printFlags(Dictionary.toSortedCharArray(flagSet)); return dictionary.flagParsingStrategy.printFlags(Dictionary.toSortedCharArray(flagSet));
} }
} }
private record FlagSet(Set<Character> flags, Dictionary dictionary) { private record FlagSet(CharHashSet flags, Dictionary dictionary) {
static Set<Character> flatten(Set<FlagSet> flagSets) { static CharHashSet flatten(Set<FlagSet> flagSets) {
return flagSets.stream().flatMap(f -> f.flags.stream()).collect(Collectors.toSet()); CharHashSet set = new CharHashSet(flagSets.size() << 1);
flagSets.forEach(flagSet -> set.addAll(flagSet.flags));
return set;
} }
@Override @Override

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja;
import java.util.Map;
import org.apache.lucene.util.hppc.CharObjectHashMap;
/** Utility methods for Japanese filters. */
class JapaneseFilterUtil {
/** Creates a primitive char-to-char map from a set of {@link java.util.Map.Entry}. */
@SafeVarargs
static CharObjectHashMap<Character> createCharMap(
Map.Entry<Character, Character>... charMappings) {
CharObjectHashMap<Character> map = new CharObjectHashMap<>(charMappings.length);
for (Map.Entry<Character, Character> charMapping : charMappings) {
map.put(charMapping.getKey(), charMapping.getValue());
}
return map;
}
}

View File

@ -16,11 +16,14 @@
*/ */
package org.apache.lucene.analysis.ja; package org.apache.lucene.analysis.ja;
import static org.apache.lucene.analysis.ja.JapaneseFilterUtil.createCharMap;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.hppc.CharObjectHashMap;
/** /**
* A {@link TokenFilter} that normalizes small letters (捨て仮名) in hiragana into normal letters. For * A {@link TokenFilter} that normalizes small letters (捨て仮名) in hiragana into normal letters. For
@ -30,13 +33,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* legal, contract policies, etc. * legal, contract policies, etc.
*/ */
public final class JapaneseHiraganaUppercaseFilter extends TokenFilter { public final class JapaneseHiraganaUppercaseFilter extends TokenFilter {
private static final Map<Character, Character> LETTER_MAPPINGS; private static final CharObjectHashMap<Character> LETTER_MAPPINGS;
static { static {
// supported characters are: // supported characters are:
// //
LETTER_MAPPINGS = LETTER_MAPPINGS =
Map.ofEntries( createCharMap(
Map.entry('ぁ', 'あ'), Map.entry('ぁ', 'あ'),
Map.entry('ぃ', 'い'), Map.entry('ぃ', 'い'),
Map.entry('ぅ', 'う'), Map.entry('ぅ', 'う'),
@ -59,17 +62,16 @@ public final class JapaneseHiraganaUppercaseFilter extends TokenFilter {
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
if (input.incrementToken()) { if (!input.incrementToken()) {
char[] termBuffer = termAttr.buffer();
for (int i = 0; i < termBuffer.length; i++) {
Character c = LETTER_MAPPINGS.get(termBuffer[i]);
if (c != null) {
termBuffer[i] = c;
}
}
return true;
} else {
return false; return false;
} }
final char[] termBuffer = termAttr.buffer();
for (int i = 0, length = termAttr.length(); i < length; i++) {
Character c = LETTER_MAPPINGS.get(termBuffer[i]);
if (c != null) {
termBuffer[i] = c;
}
}
return true;
} }
} }

View File

@ -16,11 +16,14 @@
*/ */
package org.apache.lucene.analysis.ja; package org.apache.lucene.analysis.ja;
import static org.apache.lucene.analysis.ja.JapaneseFilterUtil.createCharMap;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.hppc.CharObjectHashMap;
/** /**
* A {@link TokenFilter} that normalizes small letters (捨て仮名) in katakana into normal letters. For * A {@link TokenFilter} that normalizes small letters (捨て仮名) in katakana into normal letters. For
@ -30,13 +33,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* legal, contract policies, etc. * legal, contract policies, etc.
*/ */
public final class JapaneseKatakanaUppercaseFilter extends TokenFilter { public final class JapaneseKatakanaUppercaseFilter extends TokenFilter {
private static final Map<Character, Character> LETTER_MAPPINGS; private static final CharObjectHashMap<Character> LETTER_MAPPINGS;
static { static {
// supported characters are: // supported characters are:
// ㇷ゚ // ㇷ゚
LETTER_MAPPINGS = LETTER_MAPPINGS =
Map.ofEntries( createCharMap(
Map.entry('ァ', 'ア'), Map.entry('ァ', 'ア'),
Map.entry('ィ', 'イ'), Map.entry('ィ', 'イ'),
Map.entry('ゥ', 'ウ'), Map.entry('ゥ', 'ウ'),
@ -75,22 +78,24 @@ public final class JapaneseKatakanaUppercaseFilter extends TokenFilter {
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
if (input.incrementToken()) { if (!input.incrementToken()) {
String term = termAttr.toString();
if (term.contains("ㇷ゚")) {
term = term.replace("ㇷ゚", "");
termAttr.setEmpty().append(term);
}
char[] termBuffer = termAttr.buffer();
for (int i = 0; i < termBuffer.length; i++) {
Character c = LETTER_MAPPINGS.get(termBuffer[i]);
if (c != null) {
termBuffer[i] = c;
}
}
return true;
} else {
return false; return false;
} }
final char[] termBuffer = termAttr.buffer();
int newLength = termAttr.length();
for (int from = 0, to = 0, length = newLength; from < length; from++, to++) {
char c = termBuffer[from];
if (c == 'ㇷ' && from + 1 < length && termBuffer[from + 1] == '゚') {
// ㇷ゚detected, replace it by .
termBuffer[to] = 'プ';
from++;
newLength--;
} else {
Character mappedChar = LETTER_MAPPINGS.get(c);
termBuffer[to] = mappedChar == null ? c : mappedChar;
}
}
termAttr.setLength(newLength);
return true;
} }
} }

View File

@ -65,6 +65,7 @@ public class TestJapaneseKatakanaUppercaseFilter extends BaseTokenStreamTestCase
new String[] {"アイウエオカクケシスツトヌハヒフプヘホムヤユヨラリルレロワ"}); new String[] {"アイウエオカクケシスツトヌハヒフプヘホムヤユヨラリルレロワ"});
assertAnalyzesTo(keywordAnalyzer, "ストップウォッチ", new String[] {"ストツプウオツチ"}); assertAnalyzesTo(keywordAnalyzer, "ストップウォッチ", new String[] {"ストツプウオツチ"});
assertAnalyzesTo(keywordAnalyzer, "サラニㇷ゚ カムイチェㇷ゚ ㇷ゚ㇷ゚", new String[] {"サラニプ", "カムイチエプ", "ププ"}); assertAnalyzesTo(keywordAnalyzer, "サラニㇷ゚ カムイチェㇷ゚ ㇷ゚ㇷ゚", new String[] {"サラニプ", "カムイチエプ", "ププ"});
assertAnalyzesTo(keywordAnalyzer, "カムイチェㇷ゚カムイチェ", new String[] {"カムイチエプカムイチエ"});
} }
public void testKanaUppercaseWithSurrogatePair() throws IOException { public void testKanaUppercaseWithSurrogatePair() throws IOException {

View File

@ -58,6 +58,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import org.apache.lucene.util.hppc.ObjectCursor;
/** /**
* The Gener object helps in the discarding of nodes which break the reduction effort and defend the * The Gener object helps in the discarding of nodes which break the reduction effort and defend the
@ -103,8 +104,8 @@ public class Gener extends Reduce {
*/ */
public boolean eat(Row in, int[] remap) { public boolean eat(Row in, int[] remap) {
int sum = 0; int sum = 0;
for (Iterator<Cell> i = in.cells.values().iterator(); i.hasNext(); ) { for (Iterator<ObjectCursor<Cell>> i = in.cells.values().iterator(); i.hasNext(); ) {
Cell c = i.next(); Cell c = i.next().value;
sum += c.cnt; sum += c.cnt;
if (c.ref >= 0) { if (c.ref >= 0) {
if (remap[c.ref] == 0) { if (remap[c.ref] == 0) {
@ -114,8 +115,8 @@ public class Gener extends Reduce {
} }
int frame = sum / 10; int frame = sum / 10;
boolean live = false; boolean live = false;
for (Iterator<Cell> i = in.cells.values().iterator(); i.hasNext(); ) { for (Iterator<ObjectCursor<Cell>> i = in.cells.values().iterator(); i.hasNext(); ) {
Cell c = i.next(); Cell c = i.next().value;
if (c.cnt < frame && c.cmd >= 0) { if (c.cnt < frame && c.cmd >= 0) {
c.cnt = 0; c.cnt = 0;
c.cmd = -1; c.cmd = -1;

View File

@ -58,6 +58,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import org.apache.lucene.util.hppc.ObjectCursor;
/** /**
* The Lift class is a data structure that is a variation of a Patricia trie. * The Lift class is a data structure that is a variation of a Patricia trie.
@ -111,9 +112,9 @@ public class Lift extends Reduce {
* @param nodes contains the patch commands * @param nodes contains the patch commands
*/ */
public void liftUp(Row in, List<Row> nodes) { public void liftUp(Row in, List<Row> nodes) {
Iterator<Cell> i = in.cells.values().iterator(); Iterator<ObjectCursor<Cell>> i = in.cells.values().iterator();
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Cell c = i.next(); Cell c = i.next().value;
if (c.ref >= 0) { if (c.ref >= 0) {
Row to = nodes.get(c.ref); Row to = nodes.get(c.ref);
int sum = to.uniformCmd(changeSkip); int sum = to.uniformCmd(changeSkip);

View File

@ -58,6 +58,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import org.apache.lucene.util.hppc.CharCursor;
/** /**
* The Optimizer class is a Trie that will be reduced (have empty rows removed). * The Optimizer class is a Trie that will be reduced (have empty rows removed).
@ -116,10 +117,10 @@ public class Optimizer extends Reduce {
* @return the resulting Row, or <code>null</code> if the operation cannot be realized * @return the resulting Row, or <code>null</code> if the operation cannot be realized
*/ */
public Row merge(Row master, Row existing) { public Row merge(Row master, Row existing) {
Iterator<Character> i = master.cells.keySet().iterator(); Iterator<CharCursor> i = master.cells.keys().iterator();
Row n = new Row(); Row n = new Row();
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Character ch = i.next(); char ch = i.next().value;
// XXX also must handle Cnt and Skip !! // XXX also must handle Cnt and Skip !!
Cell a = master.cells.get(ch); Cell a = master.cells.get(ch);
Cell b = existing.cells.get(ch); Cell b = existing.cells.get(ch);
@ -130,9 +131,9 @@ public class Optimizer extends Reduce {
} }
n.cells.put(ch, s); n.cells.put(ch, s);
} }
i = existing.cells.keySet().iterator(); i = existing.cells.keys().iterator();
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Character ch = i.next(); char ch = i.next().value;
if (master.at(ch) != null) { if (master.at(ch) != null) {
continue; continue;
} }

View File

@ -58,6 +58,8 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import org.apache.lucene.util.hppc.CharCursor;
import org.apache.lucene.util.hppc.ObjectCursor;
/** The Reduce object is used to remove gaps in a Trie which stores a dictionary. */ /** The Reduce object is used to remove gaps in a Trie which stores a dictionary. */
public class Reduce { public class Reduce {
@ -88,9 +90,9 @@ public class Reduce {
Row now = old.get(ind); Row now = old.get(ind);
to.add(now); to.add(now);
Iterator<Cell> i = now.cells.values().iterator(); Iterator<ObjectCursor<Cell>> i = now.cells.values().iterator();
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Cell c = i.next(); Cell c = i.next().value;
if (c.ref >= 0 && remap[c.ref] < 0) { if (c.ref >= 0 && remap[c.ref] < 0) {
removeGaps(c.ref, old, to, remap); removeGaps(c.ref, old, to, remap);
} }
@ -109,9 +111,9 @@ public class Reduce {
*/ */
public Remap(Row old, int[] remap) { public Remap(Row old, int[] remap) {
super(); super();
Iterator<Character> i = old.cells.keySet().iterator(); Iterator<CharCursor> i = old.cells.keys().iterator();
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Character ch = i.next(); char ch = i.next().value;
Cell c = old.at(ch); Cell c = old.at(ch);
Cell nc; Cell nc;
if (c.ref >= 0) { if (c.ref >= 0) {

View File

@ -59,11 +59,13 @@ import java.io.DataOutput;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Iterator; import java.util.Iterator;
import java.util.TreeMap; import org.apache.lucene.util.hppc.CharCursor;
import org.apache.lucene.util.hppc.CharObjectHashMap;
import org.apache.lucene.util.hppc.ObjectCursor;
/** The Row class represents a row in a matrix representation of a trie. */ /** The Row class represents a row in a matrix representation of a trie. */
public class Row { public class Row {
TreeMap<Character, Cell> cells = new TreeMap<>(); CharObjectHashMap<Cell> cells = new CharObjectHashMap<>();
int uniformCnt = 0; int uniformCnt = 0;
int uniformSkip = 0; int uniformSkip = 0;
@ -98,12 +100,12 @@ public class Row {
} }
/** /**
* Set the command in the Cell of the given Character to the given integer. * Set the command in the Cell of the given character to the given integer.
* *
* @param way the Character defining the Cell * @param way the character defining the Cell
* @param cmd the new command * @param cmd the new command
*/ */
public void setCmd(Character way, int cmd) { public void setCmd(char way, int cmd) {
Cell c = at(way); Cell c = at(way);
if (c == null) { if (c == null) {
c = new Cell(); c = new Cell();
@ -116,12 +118,12 @@ public class Row {
} }
/** /**
* Set the reference to the next row in the Cell of the given Character to the given integer. * Set the reference to the next row in the Cell of the given character to the given integer.
* *
* @param way the Character defining the Cell * @param way the character defining the Cell
* @param ref The new ref value * @param ref The new ref value
*/ */
public void setRef(Character way, int ref) { public void setRef(char way, int ref) {
Cell c = at(way); Cell c = at(way);
if (c == null) { if (c == null) {
c = new Cell(); c = new Cell();
@ -138,10 +140,10 @@ public class Row {
* @return the number of cells in use * @return the number of cells in use
*/ */
public int getCells() { public int getCells() {
Iterator<Character> i = cells.keySet().iterator(); Iterator<CharCursor> i = cells.keys().iterator();
int size = 0; int size = 0;
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Character c = i.next(); char c = i.next().value;
Cell e = at(c); Cell e = at(c);
if (e.cmd >= 0 || e.ref >= 0) { if (e.cmd >= 0 || e.ref >= 0) {
size++; size++;
@ -156,10 +158,10 @@ public class Row {
* @return the number of references * @return the number of references
*/ */
public int getCellsPnt() { public int getCellsPnt() {
Iterator<Character> i = cells.keySet().iterator(); Iterator<CharCursor> i = cells.keys().iterator();
int size = 0; int size = 0;
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Character c = i.next(); char c = i.next().value;
Cell e = at(c); Cell e = at(c);
if (e.ref >= 0) { if (e.ref >= 0) {
size++; size++;
@ -174,10 +176,10 @@ public class Row {
* @return the number of patch commands * @return the number of patch commands
*/ */
public int getCellsVal() { public int getCellsVal() {
Iterator<Character> i = cells.keySet().iterator(); Iterator<CharCursor> i = cells.keys().iterator();
int size = 0; int size = 0;
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Character c = i.next(); char c = i.next().value;
Cell e = at(c); Cell e = at(c);
if (e.cmd >= 0) { if (e.cmd >= 0) {
size++; size++;
@ -187,35 +189,35 @@ public class Row {
} }
/** /**
* Return the command in the Cell associated with the given Character. * Return the command in the Cell associated with the given character.
* *
* @param way the Character associated with the Cell holding the desired command * @param way the character associated with the Cell holding the desired command
* @return the command * @return the command
*/ */
public int getCmd(Character way) { public int getCmd(char way) {
Cell c = at(way); Cell c = at(way);
return (c == null) ? -1 : c.cmd; return (c == null) ? -1 : c.cmd;
} }
/** /**
* Return the number of patch commands were in the Cell associated with the given Character before * Return the number of patch commands were in the Cell associated with the given character before
* the Trie containing this Row was reduced. * the Trie containing this Row was reduced.
* *
* @param way the Character associated with the desired Cell * @param way the character associated with the desired Cell
* @return the number of patch commands before reduction * @return the number of patch commands before reduction
*/ */
public int getCnt(Character way) { public int getCnt(char way) {
Cell c = at(way); Cell c = at(way);
return (c == null) ? -1 : c.cnt; return (c == null) ? -1 : c.cnt;
} }
/** /**
* Return the reference to the next Row in the Cell associated with the given Character. * Return the reference to the next Row in the Cell associated with the given character.
* *
* @param way the Character associated with the desired Cell * @param way the character associated with the desired Cell
* @return the reference, or -1 if the Cell is <code>null</code> * @return the reference, or -1 if the Cell is <code>null</code>
*/ */
public int getRef(Character way) { public int getRef(char way) {
Cell c = at(way); Cell c = at(way);
return (c == null) ? -1 : c.ref; return (c == null) ? -1 : c.ref;
} }
@ -228,15 +230,15 @@ public class Row {
*/ */
public void store(DataOutput os) throws IOException { public void store(DataOutput os) throws IOException {
os.writeInt(cells.size()); os.writeInt(cells.size());
Iterator<Character> i = cells.keySet().iterator(); Iterator<CharCursor> i = cells.keys().iterator();
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Character c = i.next(); char c = i.next().value;
Cell e = at(c); Cell e = at(c);
if (e.cmd < 0 && e.ref < 0) { if (e.cmd < 0 && e.ref < 0) {
continue; continue;
} }
os.writeChar(c.charValue()); os.writeChar(c);
os.writeInt(e.cmd); os.writeInt(e.cmd);
os.writeInt(e.cnt); os.writeInt(e.cnt);
os.writeInt(e.ref); os.writeInt(e.ref);
@ -251,12 +253,12 @@ public class Row {
* @return the number of identical Cells, or -1 if there are (at least) two different cells * @return the number of identical Cells, or -1 if there are (at least) two different cells
*/ */
public int uniformCmd(boolean eqSkip) { public int uniformCmd(boolean eqSkip) {
Iterator<Cell> i = cells.values().iterator(); Iterator<ObjectCursor<Cell>> i = cells.values().iterator();
int ret = -1; int ret = -1;
uniformCnt = 1; uniformCnt = 1;
uniformSkip = 0; uniformSkip = 0;
for (; i.hasNext(); ) { for (; i.hasNext(); ) {
Cell c = i.next(); Cell c = i.next().value;
if (c.ref >= 0) { if (c.ref >= 0) {
return -1; return -1;
} }
@ -284,15 +286,15 @@ public class Row {
/** Write the contents of this Row to the printstream. */ /** Write the contents of this Row to the printstream. */
public void print(PrintStream out) { public void print(PrintStream out) {
for (Iterator<Character> i = cells.keySet().iterator(); i.hasNext(); ) { for (Iterator<CharCursor> i = cells.keys().iterator(); i.hasNext(); ) {
Character ch = i.next(); char ch = i.next().value;
Cell c = at(ch); Cell c = at(ch);
out.print("[" + ch + ":" + c + "]"); out.print("[" + ch + ":" + c + "]");
} }
out.println(); out.println();
} }
Cell at(Character index) { Cell at(char index) {
return cells.get(index); return cells.get(index);
} }
} }

View File

@ -134,7 +134,7 @@ public class Trie {
boolean br = false; boolean br = false;
for (int i = 0; i < key.length() - 1; i++) { for (int i = 0; i < key.length() - 1; i++) {
Character ch = e.next(); char ch = e.next();
w = now.getCmd(ch); w = now.getCmd(ch);
if (w >= 0) { if (w >= 0) {
int n = w; int n = w;
@ -227,7 +227,7 @@ public class Trie {
Cell c; Cell c;
int cmd = -1; int cmd = -1;
StrEnum e = new StrEnum(key, forward); StrEnum e = new StrEnum(key, forward);
Character ch = null; char ch;
for (int i = 0; i < key.length(); ) { for (int i = 0; i < key.length(); ) {
ch = e.next(); ch = e.next();
@ -272,7 +272,7 @@ public class Trie {
StrEnum e = new StrEnum(key, forward); StrEnum e = new StrEnum(key, forward);
for (int i = 0; i < key.length() - 1; i++) { for (int i = 0; i < key.length() - 1; i++) {
Character ch = e.next(); char ch = e.next();
w = now.getCmd(ch); w = now.getCmd(ch);
if (w >= 0) { if (w >= 0) {
last = cmds.get(w); last = cmds.get(w);
@ -343,7 +343,7 @@ public class Trie {
StrEnum e = new StrEnum(key, forward); StrEnum e = new StrEnum(key, forward);
for (int i = 0; i < e.length() - 1; i++) { for (int i = 0; i < e.length() - 1; i++) {
Character ch = e.next(); char ch = e.next();
node = r.getRef(ch); node = r.getRef(ch);
if (node >= 0) { if (node >= 0) {
r = getRow(node); r = getRow(node);

View File

@ -281,8 +281,8 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
private static List<String> sortFieldNames( private static List<String> sortFieldNames(
IntObjectHashMap<FieldReader> fieldMap, FieldInfos fieldInfos) { IntObjectHashMap<FieldReader> fieldMap, FieldInfos fieldInfos) {
List<String> fieldNames = new ArrayList<>(fieldMap.size()); List<String> fieldNames = new ArrayList<>(fieldMap.size());
for (IntCursor fieldNumberCursor : fieldMap.keys()) { for (IntCursor fieldNumber : fieldMap.keys()) {
fieldNames.add(fieldInfos.fieldInfo(fieldNumberCursor.value).name); fieldNames.add(fieldInfos.fieldInfo(fieldNumber.value).name);
} }
fieldNames.sort(null); fieldNames.sort(null);
return Collections.unmodifiableList(fieldNames); return Collections.unmodifiableList(fieldNames);

View File

@ -95,8 +95,8 @@ final class StateSet extends IntSet {
} }
arrayCache = new int[inner.size()]; arrayCache = new int[inner.size()];
int i = 0; int i = 0;
for (IntCursor cursor : inner.keys()) { for (IntCursor key : inner.keys()) {
arrayCache[i++] = cursor.value; arrayCache[i++] = key.value;
} }
// we need to sort this array since "equals" method depend on this // we need to sort this array since "equals" method depend on this
Arrays.sort(arrayCache); Arrays.sort(arrayCache);
@ -115,8 +115,8 @@ final class StateSet extends IntSet {
return hashCode; return hashCode;
} }
hashCode = inner.size(); hashCode = inner.size();
for (IntCursor cursor : inner.keys()) { for (IntCursor key : inner.keys()) {
hashCode += BitMixer.mix(cursor.value); hashCode += BitMixer.mix(key.value);
} }
hashUpdated = true; hashUpdated = true;
return hashCode; return hashCode;

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.hppc;
/** Forked from HPPC, holding int index and char value */
public final class CharCursor {
/**
* The current value's index in the container this cursor belongs to. The meaning of this index is
* defined by the container (usually it will be an index in the underlying storage buffer).
*/
public int index;
/** The current value. */
public char value;
@Override
public String toString() {
return "[cursor, index: " + index + ", value: " + value + "]";
}
}

View File

@ -0,0 +1,693 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.hppc;
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_EXPECTED_ELEMENTS;
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_LOAD_FACTOR;
import static org.apache.lucene.util.hppc.HashContainers.ITERATION_SEED;
import static org.apache.lucene.util.hppc.HashContainers.MAX_LOAD_FACTOR;
import static org.apache.lucene.util.hppc.HashContainers.MIN_LOAD_FACTOR;
import static org.apache.lucene.util.hppc.HashContainers.checkLoadFactor;
import static org.apache.lucene.util.hppc.HashContainers.expandAtCount;
import static org.apache.lucene.util.hppc.HashContainers.iterationIncrement;
import static org.apache.lucene.util.hppc.HashContainers.minBufferSize;
import static org.apache.lucene.util.hppc.HashContainers.nextBufferSize;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/**
* A hash set of <code>char</code>s, implemented using open addressing with linear probing for
* collision resolution.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.CharHashSet
*
* <p>github: https://github.com/carrotsearch/hppc release 0.9.0
*/
public class CharHashSet implements Iterable<CharCursor>, Accountable, Cloneable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(CharHashSet.class);
private static final char EMPTY_KEY = (char) 0;
/** The hash array holding keys. */
public char[] keys;
/**
* The number of stored keys (assigned key slots), excluding the special "empty" key, if any.
*
* @see #size()
* @see #hasEmptyKey
*/
protected int assigned;
/** Mask for slot scans in {@link #keys}. */
protected int mask;
/** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */
protected int resizeAt;
/** Special treatment for the "empty slot" key marker. */
protected boolean hasEmptyKey;
/** The load factor for {@link #keys}. */
protected double loadFactor;
/** Seed used to ensure the hash iteration order is different from an iteration to another. */
protected int iterationSeed;
/** New instance with sane defaults. */
public CharHashSet() {
this(DEFAULT_EXPECTED_ELEMENTS);
}
/**
* New instance with sane defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
* (inclusive).
*/
public CharHashSet(int expectedElements) {
this(expectedElements, DEFAULT_LOAD_FACTOR);
}
/**
* New instance with the provided defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
* (inclusive).
* @param loadFactor The load factor for internal buffers. Insane load factors (zero, full
* capacity) are rejected by {@link #verifyLoadFactor(double)}.
*/
public CharHashSet(int expectedElements, double loadFactor) {
this.loadFactor = verifyLoadFactor(loadFactor);
iterationSeed = ITERATION_SEED.incrementAndGet();
ensureCapacity(expectedElements);
}
/** New instance copying elements from another set. */
public CharHashSet(CharHashSet set) {
this(set.size());
addAll(set);
}
public boolean add(char key) {
if (((key) == 0)) {
assert ((keys[mask + 1]) == 0);
boolean added = !hasEmptyKey;
hasEmptyKey = true;
return added;
} else {
final char[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return false;
}
slot = (slot + 1) & mask;
}
if (assigned == resizeAt) {
allocateThenInsertThenRehash(slot, key);
} else {
keys[slot] = key;
}
assigned++;
return true;
}
}
/**
* Adds all elements from the given list (vararg) to this set.
*
* @return Returns the number of elements actually added as a result of this call (not previously
* present in the set).
*/
public final int addAll(char... elements) {
ensureCapacity(elements.length);
int count = 0;
for (char e : elements) {
if (add(e)) {
count++;
}
}
return count;
}
/**
* Adds all elements from the given set to this set.
*
* @return Returns the number of elements actually added as a result of this call (not previously
* present in the set).
*/
public int addAll(CharHashSet set) {
ensureCapacity(set.size());
return addAll((Iterable<? extends CharCursor>) set);
}
/**
* Adds all elements from the given iterable to this set.
*
* @return Returns the number of elements actually added as a result of this call (not previously
* present in the set).
*/
public int addAll(Iterable<? extends CharCursor> iterable) {
int count = 0;
for (CharCursor cursor : iterable) {
if (add(cursor.value)) {
count++;
}
}
return count;
}
public char[] toArray() {
final char[] cloned = (new char[size()]);
int j = 0;
if (hasEmptyKey) {
cloned[j++] = EMPTY_KEY;
}
final char[] keys = this.keys;
int seed = nextIterationSeed();
int inc = iterationIncrement(seed);
for (int i = 0, mask = this.mask, slot = seed & mask;
i <= mask;
i++, slot = (slot + inc) & mask) {
char existing;
if (!((existing = keys[slot]) == 0)) {
cloned[j++] = existing;
}
}
return cloned;
}
/** An alias for the (preferred) {@link #removeAll}. */
public boolean remove(char key) {
if (((key) == 0)) {
boolean hadEmptyKey = hasEmptyKey;
hasEmptyKey = false;
return hadEmptyKey;
} else {
final char[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
shiftConflictingKeys(slot);
return true;
}
slot = (slot + 1) & mask;
}
return false;
}
}
/**
* Removes all keys present in a given container.
*
* @return Returns the number of elements actually removed as a result of this call.
*/
public int removeAll(CharHashSet other) {
final int before = size();
// Try to iterate over the smaller set or over the container that isn't implementing
// efficient contains() lookup.
if (other.size() >= size()) {
if (hasEmptyKey && other.contains(EMPTY_KEY)) {
hasEmptyKey = false;
}
final char[] keys = this.keys;
for (int slot = 0, max = this.mask; slot <= max; ) {
char existing;
if (!((existing = keys[slot]) == 0) && other.contains(existing)) {
// Shift, do not increment slot.
shiftConflictingKeys(slot);
} else {
slot++;
}
}
} else {
for (CharCursor c : other) {
remove(c.value);
}
}
return before - size();
}
public boolean contains(char key) {
if (((key) == 0)) {
return hasEmptyKey;
} else {
final char[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return true;
}
slot = (slot + 1) & mask;
}
return false;
}
}
public void clear() {
assigned = 0;
hasEmptyKey = false;
Arrays.fill(keys, EMPTY_KEY);
}
public void release() {
assigned = 0;
hasEmptyKey = false;
keys = null;
ensureCapacity(DEFAULT_EXPECTED_ELEMENTS);
}
public boolean isEmpty() {
return size() == 0;
}
/**
* Ensure this container can hold at least the given number of elements without resizing its
* buffers.
*
* @param expectedElements The total number of elements, inclusive.
*/
public void ensureCapacity(int expectedElements) {
if (expectedElements > resizeAt || keys == null) {
final char[] prevKeys = this.keys;
allocateBuffers(minBufferSize(expectedElements, loadFactor));
if (prevKeys != null && !isEmpty()) {
rehash(prevKeys);
}
}
}
public int size() {
return assigned + (hasEmptyKey ? 1 : 0);
}
@Override
public int hashCode() {
int h = hasEmptyKey ? 0xDEADBEEF : 0;
final char[] keys = this.keys;
for (int slot = mask; slot >= 0; slot--) {
char existing;
if (!((existing = keys[slot]) == 0)) {
h += BitMixer.mix(existing);
}
}
return h;
}
@Override
public boolean equals(Object obj) {
return (this == obj)
|| (obj != null && getClass() == obj.getClass() && sameKeys(getClass().cast(obj)));
}
/** Return true if all keys of some other container exist in this container. */
private boolean sameKeys(CharHashSet other) {
if (other.size() != size()) {
return false;
}
for (CharCursor c : other) {
if (!contains(c.value)) {
return false;
}
}
return true;
}
@Override
public CharHashSet clone() {
try {
/* */
CharHashSet cloned = (CharHashSet) super.clone();
cloned.keys = keys.clone();
cloned.hasEmptyKey = hasEmptyKey;
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
return cloned;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
@Override
public Iterator<CharCursor> iterator() {
return new EntryIterator();
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys);
}
/**
* Provides the next iteration seed used to build the iteration starting slot and offset
* increment. This method does not need to be synchronized, what matters is that each thread gets
* a sequence of varying seeds.
*/
protected int nextIterationSeed() {
return iterationSeed = BitMixer.mixPhi(iterationSeed);
}
/** An iterator implementation for {@link #iterator}. */
protected final class EntryIterator extends AbstractIterator<CharCursor> {
private final CharCursor cursor;
private final int increment;
private int index;
private int slot;
public EntryIterator() {
cursor = new CharCursor();
int seed = nextIterationSeed();
increment = iterationIncrement(seed);
slot = seed & mask;
}
@Override
protected CharCursor fetch() {
final int mask = CharHashSet.this.mask;
while (index <= mask) {
char existing;
index++;
slot = (slot + increment) & mask;
if (!((existing = keys[slot]) == 0)) {
cursor.index = slot;
cursor.value = existing;
return cursor;
}
}
if (index == mask + 1 && hasEmptyKey) {
cursor.index = index++;
cursor.value = EMPTY_KEY;
return cursor;
}
return done();
}
}
/**
* Create a set from a variable number of arguments or an array of <code>char</code>. The elements
* are copied from the argument to the internal buffer.
*/
/* */
public static CharHashSet from(char... elements) {
final CharHashSet set = new CharHashSet(elements.length);
set.addAll(elements);
return set;
}
/**
* Returns a hash code for the given key.
*
* <p>The output from this function should evenly distribute keys across the entire integer range.
*/
protected int hashKey(char key) {
assert !((key) == 0); // Handled as a special case (empty slot marker).
return BitMixer.mixPhi(key);
}
/**
* Returns a logical "index" of a given key that can be used to speed up follow-up logic in
* certain scenarios (conditional logic).
*
* <p>The semantics of "indexes" are not strictly defined. Indexes may (and typically won't be)
* contiguous.
*
* <p>The index is valid only between modifications (it will not be affected by read-only
* operations).
*
* @see #indexExists
* @see #indexGet
* @see #indexInsert
* @see #indexReplace
* @param key The key to locate in the set.
* @return A non-negative value of the logical "index" of the key in the set or a negative value
* if the key did not exist.
*/
public int indexOf(char key) {
final int mask = this.mask;
if (((key) == 0)) {
return hasEmptyKey ? mask + 1 : ~(mask + 1);
} else {
final char[] keys = this.keys;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return slot;
}
slot = (slot + 1) & mask;
}
return ~slot;
}
}
/**
* @see #indexOf
* @param index The index of a given key, as returned from {@link #indexOf}.
* @return Returns <code>true</code> if the index corresponds to an existing key or false
* otherwise. This is equivalent to checking whether the index is a positive value (existing
* keys) or a negative value (non-existing keys).
*/
public boolean indexExists(int index) {
assert index < 0 || index <= mask || (index == mask + 1 && hasEmptyKey);
return index >= 0;
}
/**
* Returns the exact value of the existing key. This method makes sense for sets of objects which
* define custom key-equality relationship.
*
* @see #indexOf
* @param index The index of an existing key.
* @return Returns the equivalent key currently stored in the set.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public char indexGet(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
return keys[index];
}
/**
* Replaces the existing equivalent key with the given one and returns any previous value stored
* for that key.
*
* @see #indexOf
* @param index The index of an existing key.
* @param equivalentKey The key to put in the set as a replacement. Must be equivalent to the key
* currently stored at the provided index.
* @return Returns the previous key stored in the set.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public char indexReplace(int index, char equivalentKey) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
assert ((keys[index]) == (equivalentKey));
char previousValue = keys[index];
keys[index] = equivalentKey;
return previousValue;
}
/**
* Inserts a key for an index that is not present in the set. This method may help in avoiding
* double recalculation of the key's hash.
*
* @see #indexOf
* @param index The index of a previously non-existing key, as returned from {@link #indexOf}.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public void indexInsert(int index, char key) {
assert index < 0 : "The index must not point at an existing key.";
index = ~index;
if (((key) == 0)) {
assert index == mask + 1;
assert ((keys[index]) == 0);
hasEmptyKey = true;
} else {
assert ((keys[index]) == 0);
if (assigned == resizeAt) {
allocateThenInsertThenRehash(index, key);
} else {
keys[index] = key;
}
assigned++;
}
}
/**
* Removes a key at an index previously acquired from {@link #indexOf}.
*
* @see #indexOf
* @param index The index of the key to remove, as returned from {@link #indexOf}.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public void indexRemove(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
if (index > mask) {
hasEmptyKey = false;
} else {
shiftConflictingKeys(index);
}
}
/**
* Validate load factor range and return it. Override and suppress if you need insane load
* factors.
*/
protected double verifyLoadFactor(double loadFactor) {
checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR);
return loadFactor;
}
/** Rehash from old buffers to new buffers. */
protected void rehash(char[] fromKeys) {
assert HashContainers.checkPowerOfTwo(fromKeys.length - 1);
// Rehash all stored keys into the new buffers.
final char[] keys = this.keys;
final int mask = this.mask;
char existing;
for (int i = fromKeys.length - 1; --i >= 0; ) {
if (!((existing = fromKeys[i]) == 0)) {
int slot = hashKey(existing) & mask;
while (!((keys[slot]) == 0)) {
slot = (slot + 1) & mask;
}
keys[slot] = existing;
}
}
}
/**
* Allocate new internal buffers. This method attempts to allocate and assign internal buffers
* atomically (either allocations succeed or not).
*/
protected void allocateBuffers(int arraySize) {
assert Integer.bitCount(arraySize) == 1;
// Ensure no change is done if we hit an OOM.
char[] prevKeys = this.keys;
try {
int emptyElementSlot = 1;
this.keys = (new char[arraySize + emptyElementSlot]);
} catch (OutOfMemoryError e) {
this.keys = prevKeys;
throw new BufferAllocationException(
"Not enough memory to allocate buffers for rehashing: %,d -> %,d",
e, this.keys == null ? 0 : size(), arraySize);
}
this.resizeAt = expandAtCount(arraySize, loadFactor);
this.mask = arraySize - 1;
}
/**
* This method is invoked when there is a new key to be inserted into the buffer but there is not
* enough empty slots to do so.
*
* <p>New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we
* assign the pending element to the previous buffer (possibly violating the invariant of having
* at least one empty slot) and rehash all keys, substituting new buffers at the end.
*/
protected void allocateThenInsertThenRehash(int slot, char pendingKey) {
assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0);
// Try to allocate new buffers first. If we OOM, we leave in a consistent state.
final char[] prevKeys = this.keys;
allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor));
assert this.keys.length > prevKeys.length;
// We have succeeded at allocating new data so insert the pending key/value at
// the free slot in the old arrays before rehashing.
prevKeys[slot] = pendingKey;
// Rehash old keys, including the pending key.
rehash(prevKeys);
}
/** Shift all the slot-conflicting keys allocated to (and including) <code>slot</code>. */
protected void shiftConflictingKeys(int gapSlot) {
final char[] keys = this.keys;
final int mask = this.mask;
// Perform shifts of conflicting keys to fill in the gap.
int distance = 0;
while (true) {
final int slot = (gapSlot + (++distance)) & mask;
final char existing = keys[slot];
if (((existing) == 0)) {
break;
}
final int idealSlot = hashKey(existing);
final int shift = (slot - idealSlot) & mask;
if (shift >= distance) {
// Entry at this position was originally at or before the gap slot.
// Move the conflict-shifted entry to the gap's position and repeat the procedure
// for any entries to the right of the current position, treating it
// as the new gap.
keys[gapSlot] = existing;
gapSlot = slot;
distance = 0;
}
}
// Mark the last found gap slot without a conflict as empty.
keys[gapSlot] = EMPTY_KEY;
assigned--;
}
}

View File

@ -0,0 +1,827 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.hppc;
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_EXPECTED_ELEMENTS;
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_LOAD_FACTOR;
import static org.apache.lucene.util.hppc.HashContainers.ITERATION_SEED;
import static org.apache.lucene.util.hppc.HashContainers.MAX_LOAD_FACTOR;
import static org.apache.lucene.util.hppc.HashContainers.MIN_LOAD_FACTOR;
import static org.apache.lucene.util.hppc.HashContainers.checkLoadFactor;
import static org.apache.lucene.util.hppc.HashContainers.checkPowerOfTwo;
import static org.apache.lucene.util.hppc.HashContainers.expandAtCount;
import static org.apache.lucene.util.hppc.HashContainers.iterationIncrement;
import static org.apache.lucene.util.hppc.HashContainers.minBufferSize;
import static org.apache.lucene.util.hppc.HashContainers.nextBufferSize;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/**
* A hash map of <code>char</code> to <code>Object</code>, implemented using open addressing with
* linear probing for collision resolution. Supports null values.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.CharObjectHashMap
*
* <p>github: https://github.com/carrotsearch/hppc release 0.9.0
*/
@SuppressWarnings("unchecked")
public class CharObjectHashMap<VType>
implements Iterable<CharObjectHashMap.CharObjectCursor<VType>>, Accountable, Cloneable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(CharObjectHashMap.class);
private static final char EMPTY_KEY = (char) 0;
/** The array holding keys. */
public char[] keys;
/** The array holding values. */
public Object[] values;
/**
* The number of stored keys (assigned key slots), excluding the special "empty" key, if any (use
* {@link #size()} instead).
*
* @see #size()
*/
protected int assigned;
/** Mask for slot scans in {@link #keys}. */
protected int mask;
/** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */
protected int resizeAt;
/** Special treatment for the "empty slot" key marker. */
protected boolean hasEmptyKey;
/** The load factor for {@link #keys}. */
protected double loadFactor;
/** Seed used to ensure the hash iteration order is different from an iteration to another. */
protected int iterationSeed;
/** New instance with sane defaults. */
public CharObjectHashMap() {
this(DEFAULT_EXPECTED_ELEMENTS);
}
/**
* New instance with sane defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause buffer
* expansion (inclusive).
*/
public CharObjectHashMap(int expectedElements) {
this(expectedElements, DEFAULT_LOAD_FACTOR);
}
/**
* New instance with the provided defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
* (inclusive).
* @param loadFactor The load factor for internal buffers. Insane load factors (zero, full
* capacity) are rejected by {@link #verifyLoadFactor(double)}.
*/
public CharObjectHashMap(int expectedElements, double loadFactor) {
this.loadFactor = verifyLoadFactor(loadFactor);
iterationSeed = ITERATION_SEED.incrementAndGet();
ensureCapacity(expectedElements);
}
/** Create a hash map from all key-value pairs of another map. */
public CharObjectHashMap(CharObjectHashMap<VType> map) {
this(map.size());
putAll(map);
}
public VType put(char key, VType value) {
assert assigned < mask + 1;
final int mask = this.mask;
if (((key) == 0)) {
VType previousValue = hasEmptyKey ? (VType) values[mask + 1] : null;
hasEmptyKey = true;
values[mask + 1] = value;
return previousValue;
} else {
final char[] keys = this.keys;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((existing) == (key))) {
final VType previousValue = (VType) values[slot];
values[slot] = value;
return previousValue;
}
slot = (slot + 1) & mask;
}
if (assigned == resizeAt) {
allocateThenInsertThenRehash(slot, key, value);
} else {
keys[slot] = key;
values[slot] = value;
}
assigned++;
return null;
}
}
public int putAll(Iterable<? extends CharObjectCursor<? extends VType>> iterable) {
final int count = size();
for (CharObjectCursor<? extends VType> c : iterable) {
put(c.key, c.value);
}
return size() - count;
}
/**
* <a href="http://trove4j.sourceforge.net">Trove</a>-inspired API method. An equivalent of the
* following code:
*
* <pre>
* if (!map.containsKey(key)) map.put(value);
* </pre>
*
* @param key The key of the value to check.
* @param value The value to put if <code>key</code> does not exist.
* @return <code>true</code> if <code>key</code> did not exist and <code>value</code> was placed
* in the map.
*/
public boolean putIfAbsent(char key, VType value) {
int keyIndex = indexOf(key);
if (!indexExists(keyIndex)) {
indexInsert(keyIndex, key, value);
return true;
} else {
return false;
}
}
public VType remove(char key) {
final int mask = this.mask;
if (((key) == 0)) {
if (!hasEmptyKey) {
return null;
}
hasEmptyKey = false;
VType previousValue = (VType) values[mask + 1];
values[mask + 1] = 0;
return previousValue;
} else {
final char[] keys = this.keys;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((existing) == (key))) {
final VType previousValue = (VType) values[slot];
shiftConflictingKeys(slot);
return previousValue;
}
slot = (slot + 1) & mask;
}
return null;
}
}
public VType get(char key) {
if (((key) == 0)) {
return hasEmptyKey ? (VType) values[mask + 1] : null;
} else {
final char[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((existing) == (key))) {
return (VType) values[slot];
}
slot = (slot + 1) & mask;
}
return null;
}
}
public VType getOrDefault(char key, VType defaultValue) {
if (((key) == 0)) {
return hasEmptyKey ? (VType) values[mask + 1] : defaultValue;
} else {
final char[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((existing) == (key))) {
return (VType) values[slot];
}
slot = (slot + 1) & mask;
}
return defaultValue;
}
}
public boolean containsKey(char key) {
if (((key) == 0)) {
return hasEmptyKey;
} else {
final char[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((existing) == (key))) {
return true;
}
slot = (slot + 1) & mask;
}
return false;
}
}
public int indexOf(char key) {
final int mask = this.mask;
if (((key) == 0)) {
return hasEmptyKey ? mask + 1 : ~(mask + 1);
} else {
final char[] keys = this.keys;
int slot = hashKey(key) & mask;
char existing;
while (!((existing = keys[slot]) == 0)) {
if (((existing) == (key))) {
return slot;
}
slot = (slot + 1) & mask;
}
return ~slot;
}
}
public boolean indexExists(int index) {
assert index < 0 || (index >= 0 && index <= mask) || (index == mask + 1 && hasEmptyKey);
return index >= 0;
}
public VType indexGet(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
return (VType) values[index];
}
public VType indexReplace(int index, VType newValue) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
VType previousValue = (VType) values[index];
values[index] = newValue;
return previousValue;
}
public void indexInsert(int index, char key, VType value) {
assert index < 0 : "The index must not point at an existing key.";
index = ~index;
if (((key) == 0)) {
assert index == mask + 1;
values[index] = value;
hasEmptyKey = true;
} else {
assert ((keys[index]) == 0);
if (assigned == resizeAt) {
allocateThenInsertThenRehash(index, key, value);
} else {
keys[index] = key;
values[index] = value;
}
assigned++;
}
}
public VType indexRemove(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
VType previousValue = (VType) values[index];
if (index > mask) {
assert index == mask + 1;
hasEmptyKey = false;
values[index] = 0;
} else {
shiftConflictingKeys(index);
}
return previousValue;
}
public void clear() {
assigned = 0;
hasEmptyKey = false;
Arrays.fill(keys, EMPTY_KEY);
/* */
}
public void release() {
assigned = 0;
hasEmptyKey = false;
keys = null;
values = null;
ensureCapacity(DEFAULT_EXPECTED_ELEMENTS);
}
public int size() {
return assigned + (hasEmptyKey ? 1 : 0);
}
public boolean isEmpty() {
return size() == 0;
}
@Override
public int hashCode() {
int h = hasEmptyKey ? 0xDEADBEEF : 0;
for (CharObjectCursor<VType> c : this) {
h += BitMixer.mix(c.key) + BitMixer.mix(c.value);
}
return h;
}
@Override
public boolean equals(Object obj) {
return (this == obj)
|| (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)));
}
/** Return true if all keys of some other container exist in this container. */
protected boolean equalElements(CharObjectHashMap<?> other) {
if (other.size() != size()) {
return false;
}
for (CharObjectCursor<?> c : other) {
char key = c.key;
if (!containsKey(key) || !java.util.Objects.equals(c.value, get(key))) {
return false;
}
}
return true;
}
/**
* Ensure this container can hold at least the given number of keys (entries) without resizing its
* buffers.
*
* @param expectedElements The total number of keys, inclusive.
*/
public void ensureCapacity(int expectedElements) {
if (expectedElements > resizeAt || keys == null) {
final char[] prevKeys = this.keys;
final VType[] prevValues = (VType[]) this.values;
allocateBuffers(minBufferSize(expectedElements, loadFactor));
if (prevKeys != null && !isEmpty()) {
rehash(prevKeys, prevValues);
}
}
}
/**
* Provides the next iteration seed used to build the iteration starting slot and offset
* increment. This method does not need to be synchronized, what matters is that each thread gets
* a sequence of varying seeds.
*/
protected int nextIterationSeed() {
return iterationSeed = BitMixer.mixPhi(iterationSeed);
}
@Override
public Iterator<CharObjectCursor<VType>> iterator() {
return new EntryIterator();
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + sizeOfValues();
}
private long sizeOfValues() {
long size = RamUsageEstimator.shallowSizeOf(values);
for (ObjectCursor<VType> value : values()) {
size += RamUsageEstimator.sizeOfObject(value);
}
return size;
}
/** An iterator implementation for {@link #iterator}. */
private final class EntryIterator extends AbstractIterator<CharObjectCursor<VType>> {
private final CharObjectCursor<VType> cursor;
private final int increment;
private int index;
private int slot;
public EntryIterator() {
cursor = new CharObjectCursor<VType>();
int seed = nextIterationSeed();
increment = iterationIncrement(seed);
slot = seed & mask;
}
@Override
protected CharObjectCursor<VType> fetch() {
final int mask = CharObjectHashMap.this.mask;
while (index <= mask) {
char existing;
index++;
slot = (slot + increment) & mask;
if (!((existing = keys[slot]) == 0)) {
cursor.index = slot;
cursor.key = existing;
cursor.value = (VType) values[slot];
return cursor;
}
}
if (index == mask + 1 && hasEmptyKey) {
cursor.index = index;
cursor.key = 0;
cursor.value = (VType) values[index++];
return cursor;
}
return done();
}
}
/** Returns a specialized view of the keys of this associated container. */
public KeysContainer keys() {
return new KeysContainer();
}
/** A view of the keys inside this hash map. */
public final class KeysContainer implements Iterable<CharCursor> {
@Override
public Iterator<CharCursor> iterator() {
return new KeysIterator();
}
public int size() {
return CharObjectHashMap.this.size();
}
public char[] toArray() {
char[] array = new char[size()];
int i = 0;
for (CharCursor cursor : this) {
array[i++] = cursor.value;
}
return array;
}
}
/** An iterator over the set of assigned keys. */
private final class KeysIterator extends AbstractIterator<CharCursor> {
private final CharCursor cursor;
private final int increment;
private int index;
private int slot;
public KeysIterator() {
cursor = new CharCursor();
int seed = nextIterationSeed();
increment = iterationIncrement(seed);
slot = seed & mask;
}
@Override
protected CharCursor fetch() {
final int mask = CharObjectHashMap.this.mask;
while (index <= mask) {
char existing;
index++;
slot = (slot + increment) & mask;
if (!((existing = keys[slot]) == 0)) {
cursor.index = slot;
cursor.value = existing;
return cursor;
}
}
if (index == mask + 1 && hasEmptyKey) {
cursor.index = index++;
cursor.value = 0;
return cursor;
}
return done();
}
}
/**
* @return Returns a container with all values stored in this map.
*/
public ValuesContainer values() {
return new ValuesContainer();
}
/** A view over the set of values of this map. */
public final class ValuesContainer implements Iterable<ObjectCursor<VType>> {
@Override
public Iterator<ObjectCursor<VType>> iterator() {
return new ValuesIterator();
}
public int size() {
return CharObjectHashMap.this.size();
}
public VType[] toArray() {
VType[] array = (VType[]) new Object[size()];
int i = 0;
for (ObjectCursor<VType> cursor : this) {
array[i++] = cursor.value;
}
return array;
}
}
/** An iterator over the set of assigned values. */
private final class ValuesIterator extends AbstractIterator<ObjectCursor<VType>> {
private final ObjectCursor<VType> cursor;
private final int increment;
private int index;
private int slot;
public ValuesIterator() {
cursor = new ObjectCursor<>();
int seed = nextIterationSeed();
increment = iterationIncrement(seed);
slot = seed & mask;
}
@Override
protected ObjectCursor<VType> fetch() {
final int mask = CharObjectHashMap.this.mask;
while (index <= mask) {
index++;
slot = (slot + increment) & mask;
if (!((keys[slot]) == 0)) {
cursor.index = slot;
cursor.value = (VType) values[slot];
return cursor;
}
}
if (index == mask + 1 && hasEmptyKey) {
cursor.index = index;
cursor.value = (VType) values[index++];
return cursor;
}
return done();
}
}
@Override
public CharObjectHashMap<VType> clone() {
try {
/* */
CharObjectHashMap<VType> cloned = (CharObjectHashMap<VType>) super.clone();
cloned.keys = keys.clone();
cloned.values = values.clone();
cloned.hasEmptyKey = hasEmptyKey;
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
return cloned;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
/** Convert the contents of this map to a human-friendly string. */
@Override
public String toString() {
final StringBuilder buffer = new StringBuilder();
buffer.append("[");
boolean first = true;
for (CharObjectCursor<VType> cursor : this) {
if (!first) {
buffer.append(", ");
}
buffer.append(cursor.key);
buffer.append("=>");
buffer.append(cursor.value);
first = false;
}
buffer.append("]");
return buffer.toString();
}
/** Creates a hash map from two index-aligned arrays of key-value pairs. */
public static <VType> CharObjectHashMap<VType> from(char[] keys, VType[] values) {
if (keys.length != values.length) {
throw new IllegalArgumentException(
"Arrays of keys and values must have an identical length.");
}
CharObjectHashMap<VType> map = new CharObjectHashMap<>(keys.length);
for (int i = 0; i < keys.length; i++) {
map.put(keys[i], values[i]);
}
return map;
}
/**
* Returns a hash code for the given key.
*
* <p>The output from this function should evenly distribute keys across the entire integer range.
*/
protected int hashKey(char key) {
assert !((key) == 0); // Handled as a special case (empty slot marker).
return BitMixer.mixPhi(key);
}
/**
* Validate load factor range and return it. Override and suppress if you need insane load
* factors.
*/
protected double verifyLoadFactor(double loadFactor) {
checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR);
return loadFactor;
}
/** Rehash from old buffers to new buffers. */
protected void rehash(char[] fromKeys, VType[] fromValues) {
assert fromKeys.length == fromValues.length && checkPowerOfTwo(fromKeys.length - 1);
// Rehash all stored key/value pairs into the new buffers.
final char[] keys = this.keys;
final VType[] values = (VType[]) this.values;
final int mask = this.mask;
char existing;
// Copy the zero element's slot, then rehash everything else.
int from = fromKeys.length - 1;
keys[keys.length - 1] = fromKeys[from];
values[values.length - 1] = fromValues[from];
while (--from >= 0) {
if (!((existing = fromKeys[from]) == 0)) {
int slot = hashKey(existing) & mask;
while (!((keys[slot]) == 0)) {
slot = (slot + 1) & mask;
}
keys[slot] = existing;
values[slot] = fromValues[from];
}
}
}
/**
* Allocate new internal buffers. This method attempts to allocate and assign internal buffers
* atomically (either allocations succeed or not).
*/
protected void allocateBuffers(int arraySize) {
assert Integer.bitCount(arraySize) == 1;
// Ensure no change is done if we hit an OOM.
char[] prevKeys = this.keys;
VType[] prevValues = (VType[]) this.values;
try {
int emptyElementSlot = 1;
this.keys = (new char[arraySize + emptyElementSlot]);
this.values = new Object[arraySize + emptyElementSlot];
} catch (OutOfMemoryError e) {
this.keys = prevKeys;
this.values = prevValues;
throw new BufferAllocationException(
"Not enough memory to allocate buffers for rehashing: %,d -> %,d",
e, this.mask + 1, arraySize);
}
this.resizeAt = expandAtCount(arraySize, loadFactor);
this.mask = arraySize - 1;
}
/**
* This method is invoked when there is a new key/ value pair to be inserted into the buffers but
* there is not enough empty slots to do so.
*
* <p>New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we
* assign the pending element to the previous buffer (possibly violating the invariant of having
* at least one empty slot) and rehash all keys, substituting new buffers at the end.
*/
protected void allocateThenInsertThenRehash(int slot, char pendingKey, VType pendingValue) {
assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0);
// Try to allocate new buffers first. If we OOM, we leave in a consistent state.
final char[] prevKeys = this.keys;
final VType[] prevValues = (VType[]) this.values;
allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor));
assert this.keys.length > prevKeys.length;
// We have succeeded at allocating new data so insert the pending key/value at
// the free slot in the old arrays before rehashing.
prevKeys[slot] = pendingKey;
prevValues[slot] = pendingValue;
// Rehash old keys, including the pending key.
rehash(prevKeys, prevValues);
}
/**
* Shift all the slot-conflicting keys and values allocated to (and including) <code>slot</code>.
*/
protected void shiftConflictingKeys(int gapSlot) {
final char[] keys = this.keys;
final VType[] values = (VType[]) this.values;
final int mask = this.mask;
// Perform shifts of conflicting keys to fill in the gap.
int distance = 0;
while (true) {
final int slot = (gapSlot + (++distance)) & mask;
final char existing = keys[slot];
if (((existing) == 0)) {
break;
}
final int idealSlot = hashKey(existing);
final int shift = (slot - idealSlot) & mask;
if (shift >= distance) {
// Entry at this position was originally at or before the gap slot.
// Move the conflict-shifted entry to the gap's position and repeat the procedure
// for any entries to the right of the current position, treating it
// as the new gap.
keys[gapSlot] = existing;
values[gapSlot] = values[slot];
gapSlot = slot;
distance = 0;
}
}
// Mark the last found gap slot without a conflict as empty.
keys[gapSlot] = 0;
values[gapSlot] = null;
assigned--;
}
/** Forked from HPPC, holding int index,key and value */
public static final class CharObjectCursor<VType> {
/**
* The current key and value's index in the container this cursor belongs to. The meaning of
* this index is defined by the container (usually it will be an index in the underlying storage
* buffer).
*/
public int index;
/** The current key. */
public char key;
/** The current value. */
public VType value;
@Override
public String toString() {
return "[cursor, index: " + index + ", key: " + key + ", value: " + value + "]";
}
}
}

View File

@ -22,26 +22,26 @@ import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
/** Constants for primitive maps. */ /** Constants for primitive maps. */
public class HashContainers { class HashContainers {
public static final int DEFAULT_EXPECTED_ELEMENTS = 4; static final int DEFAULT_EXPECTED_ELEMENTS = 4;
public static final float DEFAULT_LOAD_FACTOR = 0.75f; static final float DEFAULT_LOAD_FACTOR = 0.75f;
/** Minimal sane load factor (99 empty slots per 100). */ /** Minimal sane load factor (99 empty slots per 100). */
public static final float MIN_LOAD_FACTOR = 1 / 100.0f; static final float MIN_LOAD_FACTOR = 1 / 100.0f;
/** Maximum sane load factor (1 empty slot per 100). */ /** Maximum sane load factor (1 empty slot per 100). */
public static final float MAX_LOAD_FACTOR = 99 / 100.0f; static final float MAX_LOAD_FACTOR = 99 / 100.0f;
/** Minimum hash buffer size. */ /** Minimum hash buffer size. */
public static final int MIN_HASH_ARRAY_LENGTH = 4; static final int MIN_HASH_ARRAY_LENGTH = 4;
/** /**
* Maximum array size for hash containers (power-of-two and still allocable in Java, not a * Maximum array size for hash containers (power-of-two and still allocable in Java, not a
* negative int). * negative int).
*/ */
public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1; static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;
static final AtomicInteger ITERATION_SEED = new AtomicInteger(); static final AtomicInteger ITERATION_SEED = new AtomicInteger();

View File

@ -152,6 +152,17 @@ public class IntHashSet implements Iterable<IntCursor>, Accountable, Cloneable {
return count; return count;
} }
/**
* Adds all elements from the given set to this set.
*
* @return Returns the number of elements actually added as a result of this call (not previously
* present in the set).
*/
public int addAll(IntHashSet set) {
ensureCapacity(set.size());
return addAll((Iterable<? extends IntCursor>) set);
}
/** /**
* Adds all elements from the given iterable to this set. * Adds all elements from the given iterable to this set.
* *

View File

@ -145,6 +145,17 @@ public class LongHashSet implements Iterable<LongCursor>, Accountable, Cloneable
return count; return count;
} }
/**
* Adds all elements from the given set to this set.
*
* @return Returns the number of elements actually added as a result of this call (not previously
* present in the set).
*/
public int addAll(LongHashSet set) {
ensureCapacity(set.size());
return addAll((Iterable<? extends LongCursor>) set);
}
/** /**
* Adds all elements from the given iterable to this set. * Adds all elements from the given iterable to this set.
* *

View File

@ -0,0 +1,473 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.hppc;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.lessThan;
import static org.hamcrest.Matchers.not;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.hamcrest.MatcherAssert;
import org.junit.Before;
import org.junit.Test;
/**
* Tests for {@link CharHashSet}.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.CharHashSetTest
*
* <p>github: https://github.com/carrotsearch/hppc release: 0.9.0
*/
public class TestCharHashSet extends LuceneTestCase {
private static final char EMPTY_KEY = (char) 0;
private final char keyE = 0;
private final char key1 = cast(1);
private final char key2 = cast(2);
private final char key3 = cast(3);
private final char key4 = cast(4);
/** Per-test fresh initialized instance. */
private CharHashSet set;
/** Convert to target type from an integer used to test stuff. */
private static char cast(int v) {
return (char) ('a' + v);
}
@Before
public void initialize() {
set = new CharHashSet();
}
@Test
public void testAddAllViaInterface() {
set.addAll(key1, key2);
CharHashSet iface = new CharHashSet();
iface.clear();
iface.addAll(set);
MatcherAssert.assertThat(set(iface.toArray()), is(equalTo(set(key1, key2))));
}
@Test
public void testIndexMethods() {
set.add(keyE);
set.add(key1);
MatcherAssert.assertThat(set.indexOf(keyE), is(greaterThanOrEqualTo(0)));
MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0)));
MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0)));
MatcherAssert.assertThat(set.indexExists(set.indexOf(keyE)), is(true));
MatcherAssert.assertThat(set.indexExists(set.indexOf(key1)), is(true));
MatcherAssert.assertThat(set.indexExists(set.indexOf(key2)), is(false));
MatcherAssert.assertThat(set.indexGet(set.indexOf(keyE)), is(equalTo(keyE)));
MatcherAssert.assertThat(set.indexGet(set.indexOf(key1)), is(equalTo(key1)));
expectThrows(
AssertionError.class,
() -> {
set.indexGet(set.indexOf(key2));
});
MatcherAssert.assertThat(set.indexReplace(set.indexOf(keyE), keyE), is(equalTo(keyE)));
MatcherAssert.assertThat(set.indexReplace(set.indexOf(key1), key1), is(equalTo(key1)));
set.indexInsert(set.indexOf(key2), key2);
MatcherAssert.assertThat(set.indexGet(set.indexOf(key2)), is(equalTo(key2)));
MatcherAssert.assertThat(set.size(), is(equalTo(3)));
set.indexRemove(set.indexOf(keyE));
MatcherAssert.assertThat(set.size(), is(equalTo(2)));
set.indexRemove(set.indexOf(key2));
MatcherAssert.assertThat(set.size(), is(equalTo(1)));
MatcherAssert.assertThat(set.indexOf(keyE), is(lessThan(0)));
MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0)));
MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0)));
}
@Test
public void testCursorIndexIsValid() {
set.add(keyE);
set.add(key1);
set.add(key2);
for (CharCursor c : set) {
MatcherAssert.assertThat(set.indexExists(c.index), is(true));
MatcherAssert.assertThat(set.indexGet(c.index), is(equalTo(c.value)));
}
}
@Test
public void testEmptyKey() {
CharHashSet set = new CharHashSet();
boolean b = set.add(EMPTY_KEY);
MatcherAssert.assertThat(b, is(true));
MatcherAssert.assertThat(set.add(EMPTY_KEY), is(false));
MatcherAssert.assertThat(set.size(), is(equalTo(1)));
MatcherAssert.assertThat(set.isEmpty(), is(false));
MatcherAssert.assertThat(set(set.toArray()), is(equalTo(set(EMPTY_KEY))));
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true));
int index = set.indexOf(EMPTY_KEY);
MatcherAssert.assertThat(set.indexExists(index), is(true));
MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY)));
MatcherAssert.assertThat(set.indexReplace(index, EMPTY_KEY), is(equalTo(EMPTY_KEY)));
if (random().nextBoolean()) {
b = set.remove(EMPTY_KEY);
MatcherAssert.assertThat(b, is(true));
} else {
set.indexRemove(index);
}
MatcherAssert.assertThat(set.size(), is(equalTo(0)));
MatcherAssert.assertThat(set.isEmpty(), is(true));
MatcherAssert.assertThat(set(set.toArray()), is(empty()));
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(false));
index = set.indexOf(EMPTY_KEY);
MatcherAssert.assertThat(set.indexExists(index), is(false));
set.indexInsert(index, EMPTY_KEY);
set.add(key1);
MatcherAssert.assertThat(set.size(), is(equalTo(2)));
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true));
index = set.indexOf(EMPTY_KEY);
MatcherAssert.assertThat(set.indexExists(index), is(true));
MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY)));
}
@Test
public void testEnsureCapacity() {
final AtomicInteger expands = new AtomicInteger();
CharHashSet set =
new CharHashSet(0) {
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
expands.incrementAndGet();
}
};
// Add some elements.
final int max = rarely() ? 0 : randomIntBetween(0, 250);
for (int i = 0; i < max; i++) {
set.add(cast(i));
}
final int additions = randomIntBetween(max, max + 5000);
set.ensureCapacity(additions + set.size());
final int before = expands.get();
for (int i = 0; i < additions; i++) {
set.add(cast(i));
}
assertEquals(before, expands.get());
}
@Test
public void testInitiallyEmpty() {
assertEquals(0, set.size());
}
@Test
public void testAdd() {
assertTrue(set.add(key1));
assertFalse(set.add(key1));
assertEquals(1, set.size());
}
@Test
public void testAdd2() {
set.addAll(key1, key1);
assertEquals(1, set.size());
assertEquals(1, set.addAll(key1, key2));
assertEquals(2, set.size());
}
@Test
public void testAddVarArgs() {
set.addAll(asArray(0, 1, 2, 1, 0));
assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
}
@Test
public void testAddAll() {
CharHashSet set2 = new CharHashSet();
set2.addAll(asArray(1, 2));
set.addAll(asArray(0, 1));
assertEquals(1, set.addAll(set2));
assertEquals(0, set.addAll(set2));
assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
}
@Test
public void testRemove() {
set.addAll(asArray(0, 1, 2, 3, 4));
assertTrue(set.remove(key2));
assertFalse(set.remove(key2));
assertEquals(4, set.size());
assertSortedListEquals(set.toArray(), asArray(0, 1, 3, 4));
}
@Test
public void testInitialCapacityAndGrowth() {
for (int i = 0; i < 256; i++) {
CharHashSet set = new CharHashSet(i);
for (int j = 0; j < i; j++) {
set.add(cast(j));
}
assertEquals(i, set.size());
}
}
@Test
public void testBug_HPPC73_FullCapacityGet() {
final AtomicInteger reallocations = new AtomicInteger();
final int elements = 0x7F;
set =
new CharHashSet(elements, 1f) {
@Override
protected double verifyLoadFactor(double loadFactor) {
// Skip load factor sanity range checking.
return loadFactor;
}
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
reallocations.incrementAndGet();
}
};
int reallocationsBefore = reallocations.get();
assertEquals(reallocationsBefore, 1);
for (int i = 1; i <= elements; i++) {
set.add(cast(i));
}
// Non-existent key.
char outOfSet = cast(elements + 1);
set.remove(outOfSet);
assertFalse(set.contains(outOfSet));
assertEquals(reallocationsBefore, reallocations.get());
// Should not expand because we're replacing an existing element.
assertFalse(set.add(key1));
assertEquals(reallocationsBefore, reallocations.get());
// Remove from a full set.
set.remove(key1);
assertEquals(reallocationsBefore, reallocations.get());
set.add(key1);
// Check expand on "last slot of a full map" condition.
set.add(outOfSet);
assertEquals(reallocationsBefore + 1, reallocations.get());
}
@Test
public void testRemoveAllFromLookupContainer() {
set.addAll(asArray(0, 1, 2, 3, 4));
CharHashSet list2 = new CharHashSet();
list2.addAll(asArray(1, 3, 5));
assertEquals(2, set.removeAll(list2));
assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), asArray(0, 2, 4));
}
@Test
public void testClear() {
set.addAll(asArray(1, 2, 3));
set.clear();
assertEquals(0, set.size());
}
@Test
public void testRelease() {
set.addAll(asArray(1, 2, 3));
set.release();
assertEquals(0, set.size());
set.addAll(asArray(1, 2, 3));
assertEquals(3, set.size());
}
@Test
public void testIterable() {
set.addAll(asArray(1, 2, 2, 3, 4));
set.remove(key2);
assertEquals(3, set.size());
int count = 0;
for (CharCursor cursor : set) {
count++;
assertTrue(set.contains(cursor.value));
}
assertEquals(count, set.size());
set.clear();
assertFalse(set.iterator().hasNext());
}
/** Runs random insertions/deletions/clearing and compares the results against {@link HashSet}. */
@Test
@SuppressWarnings({"rawtypes", "unchecked"})
public void testAgainstHashSet() {
final Random rnd = RandomizedTest.getRandom();
final HashSet other = new HashSet();
for (int size = 1000; size < 20000; size += 4000) {
other.clear();
set.clear();
for (int round = 0; round < size * 20; round++) {
char key = cast(rnd.nextInt(size));
if (rnd.nextInt(50) == 0) {
key = EMPTY_KEY;
}
if (rnd.nextBoolean()) {
if (rnd.nextBoolean()) {
int index = set.indexOf(key);
if (set.indexExists(index)) {
set.indexReplace(index, key);
} else {
set.indexInsert(index, key);
}
} else {
set.add(key);
}
other.add(key);
assertTrue(set.contains(key));
assertTrue(set.indexExists(set.indexOf(key)));
} else {
assertEquals(other.contains(key), set.contains(key));
boolean removed;
if (set.contains(key) && rnd.nextBoolean()) {
set.indexRemove(set.indexOf(key));
removed = true;
} else {
removed = set.remove(key);
}
assertEquals(other.remove(key), removed);
}
assertEquals(other.size(), set.size());
}
}
}
@Test
public void testHashCodeEquals() {
CharHashSet l0 = new CharHashSet();
assertEquals(0, l0.hashCode());
assertEquals(l0, new CharHashSet());
CharHashSet l1 = CharHashSet.from(key1, key2, key3);
CharHashSet l2 = CharHashSet.from(key1, key2);
l2.add(key3);
assertEquals(l1.hashCode(), l2.hashCode());
assertEquals(l1, l2);
}
@Test
public void testClone() {
this.set.addAll(asArray(1, 2, 3));
CharHashSet cloned = set.clone();
cloned.remove(key1);
assertSortedListEquals(set.toArray(), asArray(1, 2, 3));
assertSortedListEquals(cloned.toArray(), asArray(2, 3));
}
@Test
public void testEqualsSameClass() {
CharHashSet l1 = CharHashSet.from(key1, key2, key3);
CharHashSet l2 = CharHashSet.from(key1, key2, key3);
CharHashSet l3 = CharHashSet.from(key1, key2, key4);
MatcherAssert.assertThat(l1, is(equalTo(l2)));
MatcherAssert.assertThat(l1.hashCode(), is(equalTo(l2.hashCode())));
MatcherAssert.assertThat(l1, is(not(equalTo(l3))));
}
@Test
public void testEqualsSubClass() {
class Sub extends CharHashSet {}
;
CharHashSet l1 = CharHashSet.from(key1, key2, key3);
CharHashSet l2 = new Sub();
CharHashSet l3 = new Sub();
l2.addAll(l1);
l3.addAll(l1);
MatcherAssert.assertThat(l2, is(equalTo(l3)));
MatcherAssert.assertThat(l1, is(not(equalTo(l2))));
}
private static int randomIntBetween(int min, int max) {
return min + random().nextInt(max + 1 - min);
}
private static Set<Character> set(char... elements) {
Set<Character> set = new HashSet<>();
for (char element : elements) {
set.add(element);
}
return set;
}
private static char[] asArray(int... elements) {
char[] result = new char[elements.length];
for (int i = 0; i < elements.length; i++) {
result[i] = cast(elements[i]);
}
return result;
}
/** Check if the array's content is identical to a given sequence of elements. */
private static void assertSortedListEquals(char[] array, char[] elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
assertArrayEquals(elements, array);
}
}

View File

@ -0,0 +1,671 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.hppc;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.After;
import org.junit.Test;
/**
* Tests for {@link CharObjectHashMap}.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.CharObjectHashMapTest
*
* <p>github: https://github.com/carrotsearch/hppc release: 0.9.0
*/
@SuppressWarnings({"rawtypes", "unchecked"})
public class TestCharObjectHashMap extends LuceneTestCase {
/* Ready to use key values. */
private final char keyE = 0;
private final char key1 = cast(1);
private final char key2 = cast(2);
private final char key3 = cast(3);
private final char key4 = cast(4);
/** Convert to target type from an integer used to test stuff. */
private char cast(int v) {
return (char) ('a' + v);
}
/** Create a new array of a given type and copy the arguments to this array. */
private char[] newArray(char... elements) {
return elements;
}
private static int randomIntBetween(int min, int max) {
return min + random().nextInt(max + 1 - min);
}
/** Check if the array's content is identical to a given sequence of elements. */
private static void assertSortedListEquals(char[] array, char... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
Arrays.sort(elements);
assertArrayEquals(elements, array);
}
/** Check if the array's content is identical to a given sequence of elements. */
private static void assertSortedListEquals(Object[] array, Object... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
assertArrayEquals(elements, array);
}
private final int value0 = vcast(0);
private final int value1 = vcast(1);
private final int value2 = vcast(2);
private final int value3 = vcast(3);
private final int value4 = vcast(4);
/** Per-test fresh initialized instance. */
private CharObjectHashMap<Object> map = newInstance();
private CharObjectHashMap newInstance() {
return new CharObjectHashMap();
}
@After
public void checkEmptySlotsUninitialized() {
if (map != null) {
int occupied = 0;
for (int i = 0; i <= map.mask; i++) {
if (((map.keys[i]) == 0)) {
} else {
occupied++;
}
}
assertEquals(occupied, map.assigned);
if (!map.hasEmptyKey) {}
}
}
/** Convert to target type from an integer used to test stuff. */
private int vcast(int value) {
return value;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
private Object[] newvArray(Object... elements) {
return elements;
}
private void assertSameMap(
final CharObjectHashMap<Object> c1, final CharObjectHashMap<Object> c2) {
assertEquals(c1.size(), c2.size());
for (CharObjectHashMap.CharObjectCursor entry : c1) {
assertTrue(c2.containsKey(entry.key));
assertEquals(entry.value, c2.get(entry.key));
}
}
/* */
@Test
public void testEnsureCapacity() {
final AtomicInteger expands = new AtomicInteger();
CharObjectHashMap map =
new CharObjectHashMap(0) {
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
expands.incrementAndGet();
}
};
// Add some elements.
final int max = rarely() ? 0 : randomIntBetween(0, 250);
for (int i = 0; i < max; i++) {
map.put(cast(i), value0);
}
final int additions = randomIntBetween(max, max + 5000);
map.ensureCapacity(additions + map.size());
final int before = expands.get();
for (int i = 0; i < additions; i++) {
map.put(cast(i), value0);
}
assertEquals(before, expands.get());
}
@Test
public void testCursorIndexIsValid() {
map.put(keyE, value1);
map.put(key1, value2);
map.put(key2, value3);
for (CharObjectHashMap.CharObjectCursor c : map) {
assertTrue(map.indexExists(c.index));
assertEquals(c.value, map.indexGet(c.index));
}
}
@Test
public void testIndexMethods() {
map.put(keyE, value1);
map.put(key1, value2);
assertTrue(map.indexOf(keyE) >= 0);
assertTrue(map.indexOf(key1) >= 0);
assertTrue(map.indexOf(key2) < 0);
assertTrue(map.indexExists(map.indexOf(keyE)));
assertTrue(map.indexExists(map.indexOf(key1)));
assertFalse(map.indexExists(map.indexOf(key2)));
assertEquals(value1, map.indexGet(map.indexOf(keyE)));
assertEquals(value2, map.indexGet(map.indexOf(key1)));
expectThrows(
AssertionError.class,
() -> {
map.indexGet(map.indexOf(key2));
});
assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3));
assertEquals(value2, map.indexReplace(map.indexOf(key1), value4));
assertEquals(value3, map.indexGet(map.indexOf(keyE)));
assertEquals(value4, map.indexGet(map.indexOf(key1)));
map.indexInsert(map.indexOf(key2), key2, value1);
assertEquals(value1, map.indexGet(map.indexOf(key2)));
assertEquals(3, map.size());
assertEquals(value3, map.indexRemove(map.indexOf(keyE)));
assertEquals(2, map.size());
assertEquals(value1, map.indexRemove(map.indexOf(key2)));
assertEquals(1, map.size());
assertTrue(map.indexOf(keyE) < 0);
assertTrue(map.indexOf(key1) >= 0);
assertTrue(map.indexOf(key2) < 0);
}
/* */
@Test
public void testCloningConstructor() {
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value3);
assertSameMap(map, new CharObjectHashMap(map));
}
/* */
@Test
public void testFromArrays() {
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value3);
CharObjectHashMap map2 =
CharObjectHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3));
assertSameMap(map, map2);
}
@Test
public void testGetOrDefault() {
map.put(key2, value2);
assertTrue(map.containsKey(key2));
map.put(key1, value1);
assertEquals(value1, map.getOrDefault(key1, value3));
assertEquals(value3, map.getOrDefault(key3, value3));
map.remove(key1);
assertEquals(value3, map.getOrDefault(key1, value3));
}
/* */
@Test
public void testPut() {
map.put(key1, value1);
assertTrue(map.containsKey(key1));
assertEquals(value1, map.get(key1));
}
/* */
@Test
public void testNullValue() {
map.put(key1, null);
assertTrue(map.containsKey(key1));
assertNull(map.get(key1));
}
@Test
public void testPutOverExistingKey() {
map.put(key1, value1);
assertEquals(value1, map.put(key1, value3));
assertEquals(value3, map.get(key1));
assertEquals(value3, map.put(key1, null));
assertTrue(map.containsKey(key1));
assertNull(map.get(key1));
assertNull(map.put(key1, value1));
assertEquals(value1, map.get(key1));
}
/* */
@Test
public void testPutWithExpansions() {
final int COUNT = 10000;
final Random rnd = new Random(random().nextLong());
final HashSet<Object> values = new HashSet<Object>();
for (int i = 0; i < COUNT; i++) {
final int v = rnd.nextInt();
final boolean hadKey = values.contains(cast(v));
values.add(cast(v));
assertEquals(hadKey, map.containsKey(cast(v)));
map.put(cast(v), vcast(v));
assertEquals(values.size(), map.size());
}
assertEquals(values.size(), map.size());
}
/* */
@Test
public void testPutAll() {
map.put(key1, value1);
map.put(key2, value1);
CharObjectHashMap map2 = newInstance();
map2.put(key2, value2);
map2.put(keyE, value1);
// One new key (keyE).
assertEquals(1, map.putAll(map2));
// Assert the value under key2 has been replaced.
assertEquals(value2, map.get(key2));
// And key3 has been added.
assertEquals(value1, map.get(keyE));
assertEquals(3, map.size());
}
/* */
@Test
public void testPutIfAbsent() {
assertTrue(map.putIfAbsent(key1, value1));
assertFalse(map.putIfAbsent(key1, value2));
assertEquals(value1, map.get(key1));
}
/* */
@Test
public void testRemove() {
map.put(key1, value1);
assertEquals(value1, map.remove(key1));
assertEquals(null, map.remove(key1));
assertEquals(0, map.size());
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
}
/* */
@Test
public void testEmptyKey() {
final char empty = 0;
map.put(empty, value1);
assertEquals(1, map.size());
assertEquals(false, map.isEmpty());
assertEquals(value1, map.get(empty));
assertEquals(value1, map.getOrDefault(empty, value2));
assertEquals(true, map.iterator().hasNext());
assertEquals(empty, map.iterator().next().key);
assertEquals(value1, map.iterator().next().value);
map.remove(empty);
assertEquals(null, map.get(empty));
assertEquals(0, map.size());
map.put(empty, null);
assertEquals(1, map.size());
assertTrue(map.containsKey(empty));
assertNull(map.get(empty));
map.remove(empty);
assertEquals(0, map.size());
assertFalse(map.containsKey(empty));
assertNull(map.get(empty));
assertEquals(null, map.put(empty, value1));
assertEquals(value1, map.put(empty, value2));
map.clear();
assertFalse(map.indexExists(map.indexOf(empty)));
assertEquals(null, map.put(empty, value1));
map.clear();
assertEquals(null, map.remove(empty));
}
/* */
@Test
public void testMapKeySet() {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
assertSortedListEquals(map.keys().toArray(), key1, key2, key3);
}
/* */
@Test
public void testMapKeySetIterator() {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
int counted = 0;
for (CharCursor c : map.keys()) {
assertEquals(map.keys[c.index], c.value);
counted++;
}
assertEquals(counted, map.size());
}
/* */
@Test
public void testClear() {
map.put(key1, value1);
map.put(key2, value1);
map.clear();
assertEquals(0, map.size());
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
// Check values are cleared.
assertEquals(null, map.put(key1, value1));
assertEquals(null, map.remove(key2));
map.clear();
// Check if the map behaves properly upon subsequent use.
testPutWithExpansions();
}
/* */
@Test
public void testRelease() {
map.put(key1, value1);
map.put(key2, value1);
map.release();
assertEquals(0, map.size());
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
// Check if the map behaves properly upon subsequent use.
testPutWithExpansions();
}
/* */
@Test
public void testIterable() {
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value3);
map.remove(key2);
int count = 0;
for (CharObjectHashMap.CharObjectCursor cursor : map) {
count++;
assertTrue(map.containsKey(cursor.key));
assertEquals(cursor.value, map.get(cursor.key));
assertEquals(cursor.value, map.values[cursor.index]);
assertEquals(cursor.key, map.keys[cursor.index]);
}
assertEquals(count, map.size());
map.clear();
assertFalse(map.iterator().hasNext());
}
/* */
@Test
public void testBug_HPPC73_FullCapacityGet() {
final AtomicInteger reallocations = new AtomicInteger();
final int elements = 0x7F;
map =
new CharObjectHashMap(elements, 1f) {
@Override
protected double verifyLoadFactor(double loadFactor) {
// Skip load factor sanity range checking.
return loadFactor;
}
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
reallocations.incrementAndGet();
}
};
int reallocationsBefore = reallocations.get();
assertEquals(reallocationsBefore, 1);
for (int i = 1; i <= elements; i++) {
map.put(cast(i), value1);
}
// Non-existent key.
char outOfSet = cast(elements + 1);
map.remove(outOfSet);
assertFalse(map.containsKey(outOfSet));
assertEquals(reallocationsBefore, reallocations.get());
// Should not expand because we're replacing an existing element.
map.put(key1, value2);
assertEquals(reallocationsBefore, reallocations.get());
// Remove from a full map.
map.remove(key1);
assertEquals(reallocationsBefore, reallocations.get());
map.put(key1, value2);
// Check expand on "last slot of a full map" condition.
map.put(outOfSet, value1);
assertEquals(reallocationsBefore + 1, reallocations.get());
}
@Test
public void testHashCodeEquals() {
CharObjectHashMap l0 = newInstance();
assertEquals(0, l0.hashCode());
assertEquals(l0, newInstance());
CharObjectHashMap l1 =
CharObjectHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3));
CharObjectHashMap l2 =
CharObjectHashMap.from(newArray(key2, key1, key3), newvArray(value2, value1, value3));
CharObjectHashMap l3 = CharObjectHashMap.from(newArray(key1, key2), newvArray(value2, value1));
assertEquals(l1.hashCode(), l2.hashCode());
assertEquals(l1, l2);
assertFalse(l1.equals(l3));
assertFalse(l2.equals(l3));
}
@Test
public void testBug_HPPC37() {
CharObjectHashMap l1 = CharObjectHashMap.from(newArray(key1), newvArray(value1));
CharObjectHashMap l2 = CharObjectHashMap.from(newArray(key2), newvArray(value1));
assertFalse(l1.equals(l2));
assertFalse(l2.equals(l1));
}
/** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */
@Test
@SuppressWarnings({"rawtypes", "unchecked"})
public void testAgainstHashMap() {
final Random rnd = RandomizedTest.getRandom();
final HashMap other = new HashMap();
for (int size = 1000; size < 20000; size += 4000) {
other.clear();
map.clear();
for (int round = 0; round < size * 20; round++) {
char key = cast(rnd.nextInt(size));
if (rnd.nextInt(50) == 0) {
key = 0;
}
int value = vcast(rnd.nextInt());
if (rnd.nextBoolean()) {
Object previousValue;
if (rnd.nextBoolean()) {
int index = map.indexOf(key);
if (map.indexExists(index)) {
previousValue = map.indexReplace(index, value);
} else {
map.indexInsert(index, key, value);
previousValue = null;
}
} else {
previousValue = map.put(key, value);
}
assertEquals(other.put(key, value), previousValue);
assertEquals(value, map.get(key));
assertEquals(value, map.indexGet(map.indexOf(key)));
assertTrue(map.containsKey(key));
assertTrue(map.indexExists(map.indexOf(key)));
} else {
assertEquals(other.containsKey(key), map.containsKey(key));
Object previousValue =
map.containsKey(key) && rnd.nextBoolean()
? map.indexRemove(map.indexOf(key))
: map.remove(key);
assertEquals(other.remove(key), previousValue);
}
assertEquals(other.size(), map.size());
}
}
}
/*
*
*/
@Test
public void testClone() {
this.map.put(key1, value1);
this.map.put(key2, value2);
this.map.put(key3, value3);
CharObjectHashMap cloned = map.clone();
cloned.remove(key1);
assertSortedListEquals(map.keys().toArray(), key1, key2, key3);
assertSortedListEquals(cloned.keys().toArray(), key2, key3);
}
/* */
@Test
public void testMapValues() {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
assertSortedListEquals(map.values().toArray(), value1, value2, value3);
map.clear();
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value2);
assertSortedListEquals(map.values().toArray(), value1, value2, value2);
}
/* */
@Test
public void testMapValuesIterator() {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
int counted = 0;
for (ObjectCursor c : map.values()) {
assertEquals(map.values[c.index], c.value);
counted++;
}
assertEquals(counted, map.size());
}
/* */
@Test
public void testEqualsSameClass() {
CharObjectHashMap l1 = newInstance();
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
CharObjectHashMap l2 = new CharObjectHashMap(l1);
l2.putAll(l1);
CharObjectHashMap l3 = new CharObjectHashMap(l2);
l3.putAll(l2);
l3.put(key4, value0);
assertEquals(l2, l1);
assertEquals(l2.hashCode(), l1.hashCode());
assertNotEquals(l1, l3);
}
/* */
@Test
public void testEqualsSubClass() {
class Sub extends CharObjectHashMap {}
CharObjectHashMap l1 = newInstance();
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
CharObjectHashMap l2 = new Sub();
l2.putAll(l1);
l2.put(key4, value3);
CharObjectHashMap l3 = new Sub();
l3.putAll(l2);
assertNotEquals(l1, l2);
assertEquals(l3.hashCode(), l2.hashCode());
assertEquals(l3, l2);
}
}

View File

@ -215,7 +215,7 @@ public class TestIntHashSet extends LuceneTestCase {
public void testAddVarArgs() { public void testAddVarArgs() {
set.addAll(asArray(0, 1, 2, 1, 0)); set.addAll(asArray(0, 1, 2, 1, 0));
assertEquals(3, set.size()); assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 2); assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
} }
@Test @Test
@ -228,7 +228,7 @@ public class TestIntHashSet extends LuceneTestCase {
assertEquals(0, set.addAll(set2)); assertEquals(0, set.addAll(set2));
assertEquals(3, set.size()); assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 2); assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
} }
@Test @Test
@ -238,7 +238,7 @@ public class TestIntHashSet extends LuceneTestCase {
assertTrue(set.remove(key2)); assertTrue(set.remove(key2));
assertFalse(set.remove(key2)); assertFalse(set.remove(key2));
assertEquals(4, set.size()); assertEquals(4, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 3, 4); assertSortedListEquals(set.toArray(), asArray(0, 1, 3, 4));
} }
@Test @Test
@ -308,7 +308,7 @@ public class TestIntHashSet extends LuceneTestCase {
assertEquals(2, set.removeAll(list2)); assertEquals(2, set.removeAll(list2));
assertEquals(3, set.size()); assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 2, 4); assertSortedListEquals(set.toArray(), asArray(0, 2, 4));
} }
@Test @Test
@ -409,13 +409,13 @@ public class TestIntHashSet extends LuceneTestCase {
@Test @Test
public void testClone() { public void testClone() {
this.set.addAll(key1, key2, key3); this.set.addAll(asArray(1, 2, 3));
IntHashSet cloned = set.clone(); IntHashSet cloned = set.clone();
cloned.remove(key1); cloned.remove(key1);
assertSortedListEquals(set.toArray(), key1, key2, key3); assertSortedListEquals(set.toArray(), asArray(1, 2, 3));
assertSortedListEquals(cloned.toArray(), key2, key3); assertSortedListEquals(cloned.toArray(), asArray(2, 3));
} }
@Test @Test
@ -461,7 +461,7 @@ public class TestIntHashSet extends LuceneTestCase {
} }
/** Check if the array's content is identical to a given sequence of elements. */ /** Check if the array's content is identical to a given sequence of elements. */
private static void assertSortedListEquals(int[] array, int... elements) { private static void assertSortedListEquals(int[] array, int[] elements) {
assertEquals(elements.length, array.length); assertEquals(elements.length, array.length);
Arrays.sort(array); Arrays.sort(array);
assertArrayEquals(elements, array); assertArrayEquals(elements, array);

View File

@ -210,7 +210,7 @@ public class TestLongHashSet extends LuceneTestCase {
public void testAddVarArgs() { public void testAddVarArgs() {
set.addAll(asArray(0, 1, 2, 1, 0)); set.addAll(asArray(0, 1, 2, 1, 0));
assertEquals(3, set.size()); assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 2); assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
} }
@Test @Test
@ -223,7 +223,7 @@ public class TestLongHashSet extends LuceneTestCase {
assertEquals(0, set.addAll(set2)); assertEquals(0, set.addAll(set2));
assertEquals(3, set.size()); assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 2); assertSortedListEquals(set.toArray(), asArray(0, 1, 2));
} }
@Test @Test
@ -233,7 +233,7 @@ public class TestLongHashSet extends LuceneTestCase {
assertTrue(set.remove(key2)); assertTrue(set.remove(key2));
assertFalse(set.remove(key2)); assertFalse(set.remove(key2));
assertEquals(4, set.size()); assertEquals(4, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 3, 4); assertSortedListEquals(set.toArray(), asArray(0, 1, 3, 4));
} }
@Test @Test
@ -303,7 +303,7 @@ public class TestLongHashSet extends LuceneTestCase {
assertEquals(2, set.removeAll(list2)); assertEquals(2, set.removeAll(list2));
assertEquals(3, set.size()); assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 2, 4); assertSortedListEquals(set.toArray(), asArray(0, 2, 4));
} }
@Test @Test
@ -404,13 +404,13 @@ public class TestLongHashSet extends LuceneTestCase {
@Test @Test
public void testClone() { public void testClone() {
this.set.addAll(key1, key2, key3); this.set.addAll(asArray(1, 2, 3));
LongHashSet cloned = set.clone(); LongHashSet cloned = set.clone();
cloned.remove(key1); cloned.remove(key1);
assertSortedListEquals(set.toArray(), key1, key2, key3); assertSortedListEquals(set.toArray(), asArray(1, 2, 3));
assertSortedListEquals(cloned.toArray(), key2, key3); assertSortedListEquals(cloned.toArray(), asArray(2, 3));
} }
@Test @Test
@ -456,7 +456,7 @@ public class TestLongHashSet extends LuceneTestCase {
} }
/** Check if the array's content is identical to a given sequence of elements. */ /** Check if the array's content is identical to a given sequence of elements. */
private static void assertSortedListEquals(long[] array, long... elements) { private static void assertSortedListEquals(long[] array, long[] elements) {
assertEquals(elements.length, array.length); assertEquals(elements.length, array.length);
Arrays.sort(array); Arrays.sort(array);
assertArrayEquals(elements, array); assertArrayEquals(elements, array);

View File

@ -331,7 +331,7 @@ public class TestLongIntHashMap extends LuceneTestCase {
/* */ /* */
@Test @Test
public void testEmptyKey() { public void testEmptyKey() {
final int empty = 0; final long empty = 0;
map.put(empty, value1); map.put(empty, value1);
assertEquals(1, map.size()); assertEquals(1, map.size());

View File

@ -335,7 +335,7 @@ public class TestLongObjectHashMap extends LuceneTestCase {
/* */ /* */
@Test @Test
public void testEmptyKey() { public void testEmptyKey() {
final int empty = 0; final long empty = 0;
map.put(empty, value1); map.put(empty, value1);
assertEquals(1, map.size()); assertEquals(1, map.size());

View File

@ -150,9 +150,9 @@ public class StringValueFacetCounts extends Facets {
List<LabelAndValue> labelValues = new ArrayList<>(); List<LabelAndValue> labelValues = new ArrayList<>();
if (sparseCounts != null) { if (sparseCounts != null) {
for (IntIntCursor cursor : sparseCounts) { for (IntIntCursor sparseCount : sparseCounts) {
int count = cursor.value; int count = sparseCount.value;
final BytesRef term = docValues.lookupOrd(cursor.key); final BytesRef term = docValues.lookupOrd(sparseCount.key);
labelValues.add(new LabelAndValue(term.utf8ToString(), count)); labelValues.add(new LabelAndValue(term.utf8ToString(), count));
} }
} else { } else {
@ -186,10 +186,10 @@ public class StringValueFacetCounts extends Facets {
int childCount = 0; // total number of labels with non-zero count int childCount = 0; // total number of labels with non-zero count
if (sparseCounts != null) { if (sparseCounts != null) {
for (IntIntCursor cursor : sparseCounts) { for (IntIntCursor sparseCount : sparseCounts) {
childCount++; // every count in sparseValues should be non-zero childCount++; // every count in sparseValues should be non-zero
int ord = cursor.key; int ord = sparseCount.key;
int count = cursor.value; int count = sparseCount.value;
if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) { if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
if (q == null) { if (q == null) {
// Lazy init for sparse case: // Lazy init for sparse case:

View File

@ -16,9 +16,9 @@
*/ */
package org.apache.lucene.search.vectorhighlight; package org.apache.lucene.search.vectorhighlight;
import java.util.Arrays; import java.util.Iterator;
import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.apache.lucene.util.hppc.CharHashSet;
/** /**
* Simple boundary scanner implementation that divides fragments based on a set of separator * Simple boundary scanner implementation that divides fragments based on a set of separator
@ -27,10 +27,10 @@ import java.util.Set;
public class SimpleBoundaryScanner implements BoundaryScanner { public class SimpleBoundaryScanner implements BoundaryScanner {
public static final int DEFAULT_MAX_SCAN = 20; public static final int DEFAULT_MAX_SCAN = 20;
public static final Character[] DEFAULT_BOUNDARY_CHARS = {'.', ',', '!', '?', ' ', '\t', '\n'}; public static final char[] DEFAULT_BOUNDARY_CHARS = {'.', ',', '!', '?', ' ', '\t', '\n'};
protected int maxScan; protected int maxScan;
protected Set<Character> boundaryChars; protected CharHashSet boundaryChars;
public SimpleBoundaryScanner() { public SimpleBoundaryScanner() {
this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS); this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS);
@ -44,15 +44,34 @@ public class SimpleBoundaryScanner implements BoundaryScanner {
this(DEFAULT_MAX_SCAN, boundaryChars); this(DEFAULT_MAX_SCAN, boundaryChars);
} }
public SimpleBoundaryScanner(int maxScan, Character[] boundaryChars) { public SimpleBoundaryScanner(int maxScan, char[] boundaryChars) {
this.maxScan = maxScan; this.maxScan = maxScan;
this.boundaryChars = new HashSet<>(); this.boundaryChars = CharHashSet.from(boundaryChars);
this.boundaryChars.addAll(Arrays.asList(boundaryChars)); }
public SimpleBoundaryScanner(int maxScan, Character[] boundaryChars) {
this(maxScan, toCharArray(boundaryChars));
} }
public SimpleBoundaryScanner(int maxScan, Set<Character> boundaryChars) { public SimpleBoundaryScanner(int maxScan, Set<Character> boundaryChars) {
this.maxScan = maxScan; this(maxScan, toCharArray(boundaryChars));
this.boundaryChars = boundaryChars; }
private static char[] toCharArray(Character[] characters) {
char[] chars = new char[characters.length];
for (int i = 0; i < characters.length; i++) {
chars[i] = characters[i];
}
return chars;
}
private static char[] toCharArray(Set<Character> characters) {
Iterator<Character> iterator = characters.iterator();
char[] chars = new char[characters.size()];
for (int i = 0; i < chars.length; i++) {
chars[i] = iterator.next();
}
return chars;
} }
@Override @Override