LUCENE-9697: Hunspell Stemmer: use the same FST.BytesReader on all recursion levels (#2242)

This commit is contained in:
Peter Gromov 2021-01-26 09:31:23 +01:00 committed by GitHub
parent a82634db9d
commit 80e4def97b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 6 additions and 9 deletions

View File

@ -52,14 +52,14 @@ final class Stemmer {
*/ */
public Stemmer(Dictionary dictionary) { public Stemmer(Dictionary dictionary) {
this.dictionary = dictionary; this.dictionary = dictionary;
prefixReader = dictionary.prefixes == null ? null : dictionary.prefixes.getBytesReader();
suffixReader = dictionary.suffixes == null ? null : dictionary.suffixes.getBytesReader();
for (int level = 0; level < 3; level++) { for (int level = 0; level < 3; level++) {
if (dictionary.prefixes != null) { if (dictionary.prefixes != null) {
prefixArcs[level] = new FST.Arc<>(); prefixArcs[level] = new FST.Arc<>();
prefixReaders[level] = dictionary.prefixes.getBytesReader();
} }
if (dictionary.suffixes != null) { if (dictionary.suffixes != null) {
suffixArcs[level] = new FST.Arc<>(); suffixArcs[level] = new FST.Arc<>();
suffixReaders[level] = dictionary.suffixes.getBytesReader();
} }
} }
formStep = dictionary.formStep(); formStep = dictionary.formStep();
@ -252,13 +252,12 @@ final class Stemmer {
} }
// some state for traversing FSTs // some state for traversing FSTs
private final FST.BytesReader[] prefixReaders = new FST.BytesReader[3]; private final FST.BytesReader prefixReader;
private final FST.BytesReader suffixReader;
@SuppressWarnings({"unchecked", "rawtypes"}) @SuppressWarnings({"unchecked", "rawtypes"})
private final FST.Arc<IntsRef>[] prefixArcs = new FST.Arc[3]; private final FST.Arc<IntsRef>[] prefixArcs = new FST.Arc[3];
private final FST.BytesReader[] suffixReaders = new FST.BytesReader[3];
@SuppressWarnings({"unchecked", "rawtypes"}) @SuppressWarnings({"unchecked", "rawtypes"})
private final FST.Arc<IntsRef>[] suffixArcs = new FST.Arc[3]; private final FST.Arc<IntsRef>[] suffixArcs = new FST.Arc[3];
@ -302,7 +301,6 @@ final class Stemmer {
if (doPrefix && dictionary.prefixes != null) { if (doPrefix && dictionary.prefixes != null) {
FST<IntsRef> fst = dictionary.prefixes; FST<IntsRef> fst = dictionary.prefixes;
FST.BytesReader bytesReader = prefixReaders[recursionDepth];
FST.Arc<IntsRef> arc = prefixArcs[recursionDepth]; FST.Arc<IntsRef> arc = prefixArcs[recursionDepth];
fst.getFirstArc(arc); fst.getFirstArc(arc);
IntsRef NO_OUTPUT = fst.outputs.getNoOutput(); IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
@ -311,7 +309,7 @@ final class Stemmer {
for (int i = 0; i < limit; i++) { for (int i = 0; i < limit; i++) {
if (i > 0) { if (i > 0) {
int ch = word[i - 1]; int ch = word[i - 1];
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) { if (fst.findTargetArc(ch, arc, arc, prefixReader) == null) {
break; break;
} else if (arc.output() != NO_OUTPUT) { } else if (arc.output() != NO_OUTPUT) {
output = fst.outputs.add(output, arc.output()); output = fst.outputs.add(output, arc.output());
@ -351,7 +349,6 @@ final class Stemmer {
if (doSuffix && dictionary.suffixes != null) { if (doSuffix && dictionary.suffixes != null) {
FST<IntsRef> fst = dictionary.suffixes; FST<IntsRef> fst = dictionary.suffixes;
FST.BytesReader bytesReader = suffixReaders[recursionDepth];
FST.Arc<IntsRef> arc = suffixArcs[recursionDepth]; FST.Arc<IntsRef> arc = suffixArcs[recursionDepth];
fst.getFirstArc(arc); fst.getFirstArc(arc);
IntsRef NO_OUTPUT = fst.outputs.getNoOutput(); IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
@ -360,7 +357,7 @@ final class Stemmer {
for (int i = length; i >= limit; i--) { for (int i = length; i >= limit; i--) {
if (i < length) { if (i < length) {
int ch = word[i]; int ch = word[i];
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) { if (fst.findTargetArc(ch, arc, arc, suffixReader) == null) {
break; break;
} else if (arc.output() != NO_OUTPUT) { } else if (arc.output() != NO_OUTPUT) {
output = fst.outputs.add(output, arc.output()); output = fst.outputs.add(output, arc.output());