mirror of https://github.com/apache/lucene.git
hunspell: make the hash table load factor customizable (#12464)
* hunspell: make the hash table load factor customizable
This commit is contained in:
parent
155b2edbe3
commit
1af68bf2d7
|
@ -77,6 +77,8 @@ Improvements
|
|||
|
||||
* GITHUB#12447: Hunspell: speed up the dictionary enumeration (Peter Gromov)
|
||||
|
||||
* GITHUB#12464: Hunspell: allow customizing the hash table load factor (Peter Gromov)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -1133,7 +1133,8 @@ public class Dictionary {
|
|||
Map<String, Integer> morphIndices = new HashMap<>();
|
||||
|
||||
WordStorage.Builder builder =
|
||||
new WordStorage.Builder(wordCount, hasCustomMorphData, flags, allNonSuggestibleFlags());
|
||||
new WordStorage.Builder(
|
||||
wordCount, hashFactor(), hasCustomMorphData, flags, allNonSuggestibleFlags());
|
||||
|
||||
try (ByteSequencesReader reader =
|
||||
new ByteSequencesReader(tempDir.openChecksumInput(sorted), sorted)) {
|
||||
|
@ -1204,6 +1205,16 @@ public class Dictionary {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The factor determining the size of the internal hash table used for storing the entries. The
|
||||
* table size is {@code entry_count * hashFactor}. The default factor is 1.0. If there are too
|
||||
* many hash collisions, the factor can be increased, resulting in faster access, but more memory
|
||||
* usage.
|
||||
*/
|
||||
protected double hashFactor() {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
char[] allNonSuggestibleFlags() {
|
||||
return Dictionary.toSortedCharArray(
|
||||
Stream.of(HIDDEN_FLAG, noSuggest, forbiddenword, onlyincompound, subStandard)
|
||||
|
|
|
@ -264,6 +264,7 @@ abstract class WordStorage {
|
|||
private final List<Integer> morphDataIDs = new ArrayList<>();
|
||||
private String currentEntry = null;
|
||||
private final int wordCount;
|
||||
private final double hashFactor;
|
||||
private final FlagEnumerator flagEnumerator;
|
||||
|
||||
private final ByteArrayDataOutput dataWriter;
|
||||
|
@ -278,15 +279,17 @@ abstract class WordStorage {
|
|||
*/
|
||||
Builder(
|
||||
int wordCount,
|
||||
double hashFactor,
|
||||
boolean hasCustomMorphData,
|
||||
FlagEnumerator flagEnumerator,
|
||||
char[] noSuggestFlags) {
|
||||
this.wordCount = wordCount;
|
||||
this.hashFactor = hashFactor;
|
||||
this.flagEnumerator = flagEnumerator;
|
||||
this.hasCustomMorphData = hasCustomMorphData;
|
||||
this.noSuggestFlags = noSuggestFlags;
|
||||
|
||||
hashTable = new int[wordCount];
|
||||
hashTable = new int[(int) (wordCount * hashFactor)];
|
||||
wordData = new byte[wordCount * 6];
|
||||
|
||||
dataWriter =
|
||||
|
@ -390,7 +393,9 @@ abstract class WordStorage {
|
|||
|
||||
if (++chainLengths[hash] > 20) {
|
||||
throw new RuntimeException(
|
||||
"Too many collisions, please report this to dev@lucene.apache.org");
|
||||
"Too many collisions. "
|
||||
+ ("Try a larger Dictionary#hashFactor (now " + hashFactor + "). ")
|
||||
+ "If this doesn't help, please report this to dev@lucene.apache.org");
|
||||
}
|
||||
|
||||
// write the leaf entry for the last character
|
||||
|
|
Loading…
Reference in New Issue