diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index a4b2f6c52f4..b932bf88208 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -140,8 +140,6 @@ public class Dictionary { // when set, some words have exceptional stems, and the last entry is a pointer to stemExceptions boolean hasStemExceptions; - private final Path tempPath = getDefaultTempDir(); // TODO: make this configurable? - boolean ignoreCase; boolean complexPrefixes; // if no affixes have continuation classes, no need to do 2-level affix stripping @@ -210,6 +208,7 @@ public class Dictionary { this.needsOutputCleaning = false; // set if we have an OCONV flagLookup.add(new BytesRef()); // no flags -> ord 0 + Path tempPath = getDefaultTempDir(); // TODO: make this configurable? Path aff = Files.createTempFile(tempPath, "affix", "aff"); OutputStream out = new BufferedOutputStream(Files.newOutputStream(aff)); InputStream aff1 = null; @@ -252,33 +251,33 @@ public class Dictionary { } /** Looks up Hunspell word forms from the dictionary */ - IntsRef lookupWord(char word[], int offset, int length) { + IntsRef lookupWord(char[] word, int offset, int length) { return lookup(words, word, offset, length); } // only for testing - IntsRef lookupPrefix(char word[], int offset, int length) { - return lookup(prefixes, word, offset, length); + IntsRef lookupPrefix(char[] word) { + return lookup(prefixes, word, 0, word.length); } // only for testing - IntsRef lookupSuffix(char word[], int offset, int length) { - return lookup(suffixes, word, offset, length); + IntsRef lookupSuffix(char[] word) { + return lookup(suffixes, word, 0, word.length); } - IntsRef lookup(FST fst, char word[], int offset, int length) { + IntsRef lookup(FST fst, char[] word, int offset, int length) { if (fst == null) { return null; } final FST.BytesReader bytesReader = fst.getBytesReader(); - final FST.Arc arc = fst.getFirstArc(new FST.Arc()); + final FST.Arc arc = fst.getFirstArc(new FST.Arc<>()); // Accumulate output as we go final IntsRef NO_OUTPUT = fst.outputs.getNoOutput(); IntsRef output = NO_OUTPUT; int l = offset + length; try { - for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) { + for (int i = offset, cp; i < l; i += Character.charCount(cp)) { cp = Character.codePointAt(word, i, l); if (fst.findTargetArc(cp, arc, arc, bytesReader) == null) { return null; @@ -320,7 +319,7 @@ public class Dictionary { seenStrips.put("", 0); LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder)); - String line = null; + String line; while ((line = reader.readLine()) != null) { // ignore any BOM marker on first line if (reader.getLineNumber() == 1 && line.startsWith("\uFEFF")) { @@ -344,31 +343,31 @@ public class Dictionary { complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix } else if (line.startsWith(CIRCUMFIX_KEY)) { - String parts[] = line.split("\\s+"); + String[] parts = line.split("\\s+"); if (parts.length != 2) { throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber()); } circumfix = flagParsingStrategy.parseFlag(parts[1]); } else if (line.startsWith(KEEPCASE_KEY)) { - String parts[] = line.split("\\s+"); + String[] parts = line.split("\\s+"); if (parts.length != 2) { throw new ParseException("Illegal KEEPCASE declaration", reader.getLineNumber()); } keepcase = flagParsingStrategy.parseFlag(parts[1]); } else if (line.startsWith(NEEDAFFIX_KEY) || line.startsWith(PSEUDOROOT_KEY)) { - String parts[] = line.split("\\s+"); + String[] parts = line.split("\\s+"); if (parts.length != 2) { throw new ParseException("Illegal NEEDAFFIX declaration", reader.getLineNumber()); } needaffix = flagParsingStrategy.parseFlag(parts[1]); } else if (line.startsWith(ONLYINCOMPOUND_KEY)) { - String parts[] = line.split("\\s+"); + String[] parts = line.split("\\s+"); if (parts.length != 2) { throw new ParseException("Illegal ONLYINCOMPOUND declaration", reader.getLineNumber()); } onlyincompound = flagParsingStrategy.parseFlag(parts[1]); } else if (line.startsWith(IGNORE_KEY)) { - String parts[] = line.split("\\s+"); + String[] parts = line.split("\\s+"); if (parts.length != 2) { throw new ParseException("Illegal IGNORE declaration", reader.getLineNumber()); } @@ -376,7 +375,7 @@ public class Dictionary { Arrays.sort(ignore); needsInputCleaning = true; } else if (line.startsWith(ICONV_KEY) || line.startsWith(OCONV_KEY)) { - String parts[] = line.split("\\s+"); + String[] parts = line.split("\\s+"); String type = parts[0]; if (parts.length != 2) { throw new ParseException("Illegal " + type + " declaration", reader.getLineNumber()); @@ -475,10 +474,10 @@ public class Dictionary { BytesRefBuilder scratch = new BytesRefBuilder(); StringBuilder sb = new StringBuilder(); - String args[] = header.split("\\s+"); + String[] args = header.split("\\s+"); boolean crossProduct = args[2].equals("Y"); - boolean isSuffix = conditionPattern == SUFFIX_CONDITION_REGEX_PATTERN; + boolean isSuffix = conditionPattern.equals(SUFFIX_CONDITION_REGEX_PATTERN); int numLines = Integer.parseInt(args[3]); affixData = ArrayUtil.grow(affixData, (currentAffix << 3) + (numLines << 3)); @@ -488,7 +487,7 @@ public class Dictionary { for (int i = 0; i < numLines; i++) { assert affixWriter.getPosition() == currentAffix << 3; String line = reader.readLine(); - String ruleArgs[] = line.split("\\s+"); + String[] ruleArgs = line.split("\\s+"); // from the manpage: PFX flag stripping prefix [condition [morphological_fields...]] // condition is optional @@ -501,7 +500,7 @@ public class Dictionary { char flag = flagParsingStrategy.parseFlag(ruleArgs[1]); String strip = ruleArgs[2].equals("0") ? "" : ruleArgs[2]; String affixArg = ruleArgs[3]; - char appendFlags[] = null; + char[] appendFlags = null; // first: parse continuation classes out of affix int flagSep = affixArg.lastIndexOf('/'); @@ -585,7 +584,7 @@ public class Dictionary { affixWriter.writeShort((short) flag); affixWriter.writeShort((short) stripOrd.intValue()); // encode crossProduct into patternIndex - int patternOrd = patternIndex.intValue() << 1 | (crossProduct ? 1 : 0); + int patternOrd = patternIndex << 1 | (crossProduct ? 1 : 0); affixWriter.writeShort((short) patternOrd); affixWriter.writeShort((short) appendFlagsOrd); @@ -598,12 +597,7 @@ public class Dictionary { affixArg = new StringBuilder(affixArg).reverse().toString(); } - List list = affixes.get(affixArg); - if (list == null) { - list = new ArrayList<>(); - affixes.put(affixArg, list); - } - list.add(currentAffix); + affixes.computeIfAbsent(affixArg, __ -> new ArrayList<>()).add(currentAffix); currentAffix++; } } @@ -614,7 +608,7 @@ public class Dictionary { for (int i = 0; i < num; i++) { String line = reader.readLine(); - String parts[] = line.split("\\s+"); + String[] parts = line.split("\\s+"); if (parts.length != 3) { throw new ParseException("invalid syntax: " + line, reader.getLineNumber()); } @@ -707,7 +701,7 @@ public class Dictionary { * definition */ static FlagParsingStrategy getFlagParsingStrategy(String flagLine) { - String parts[] = flagLine.split("\\s+"); + String[] parts = flagLine.split("\\s+"); if (parts.length != 2) { throw new IllegalArgumentException("Illegal FLAG specification: " + flagLine); } @@ -724,11 +718,11 @@ public class Dictionary { throw new IllegalArgumentException("Unknown flag type: " + flagType); } - final char FLAG_SEPARATOR = 0x1f; // flag separator after escaping - final char MORPH_SEPARATOR = + private static final char FLAG_SEPARATOR = 0x1f; // flag separator after escaping + private static final char MORPH_SEPARATOR = 0x1e; // separator for boundary of entry (may be followed by morph data) - String unescapeEntry(String entry) { + private String unescapeEntry(String entry) { StringBuilder sb = new StringBuilder(); int end = morphBoundary(entry); for (int i = 0; i < end; i++) { @@ -738,9 +732,7 @@ public class Dictionary { i++; } else if (ch == '/') { sb.append(FLAG_SEPARATOR); - } else if (ch == MORPH_SEPARATOR || ch == FLAG_SEPARATOR) { - // BINARY EXECUTABLES EMBEDDED IN ZULU DICTIONARIES!!!!!!! - } else { + } else if (!shouldSkipEscapedChar(ch)) { sb.append(ch); } } @@ -748,9 +740,7 @@ public class Dictionary { if (end < entry.length()) { for (int i = end; i < entry.length(); i++) { char c = entry.charAt(i); - if (c == FLAG_SEPARATOR || c == MORPH_SEPARATOR) { - // BINARY EXECUTABLES EMBEDDED IN ZULU DICTIONARIES!!!!!!! - } else { + if (!shouldSkipEscapedChar(c)) { sb.append(c); } } @@ -758,6 +748,11 @@ public class Dictionary { return sb.toString(); } + private static boolean shouldSkipEscapedChar(char ch) { + return ch == FLAG_SEPARATOR + || ch == MORPH_SEPARATOR; // BINARY EXECUTABLES EMBEDDED IN ZULU DICTIONARIES!!!!!!! + } + static int morphBoundary(String line) { int end = indexOfSpaceOrTab(line, 0); if (end == -1) { @@ -812,9 +807,9 @@ public class Dictionary { try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) { for (InputStream dictionary : dictionaries) { BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder)); - String line = - lines.readLine(); // first line is number of entries (approximately, sometimes) + lines.readLine(); // first line is number of entries (approximately, sometimes) + String line; while ((line = lines.readLine()) != null) { // wild and unpredictable code comment rules if (line.isEmpty() @@ -825,7 +820,7 @@ public class Dictionary { } line = unescapeEntry(line); // if we havent seen any stem exceptions, try to parse one - if (hasStemExceptions == false) { + if (!hasStemExceptions) { int morphStart = line.indexOf(MORPH_SEPARATOR); if (morphStart >= 0 && morphStart < line.length()) { hasStemExceptions = parseStemException(line.substring(morphStart + 1)) != null; @@ -861,35 +856,28 @@ public class Dictionary { new OfflineSorter( tempDir, tempFileNamePrefix, - new Comparator() { - BytesRef scratch1 = new BytesRef(); - BytesRef scratch2 = new BytesRef(); + new Comparator<>() { + final BytesRef scratch1 = new BytesRef(); + final BytesRef scratch2 = new BytesRef(); + + private void initScratch(BytesRef o, BytesRef scratch) { + scratch.bytes = o.bytes; + scratch.offset = o.offset; + scratch.length = o.length; + + for (int i = scratch.length - 1; i >= 0; i--) { + if (scratch.bytes[scratch.offset + i] == FLAG_SEPARATOR + || scratch.bytes[scratch.offset + i] == MORPH_SEPARATOR) { + scratch.length = i; + break; + } + } + } @Override public int compare(BytesRef o1, BytesRef o2) { - scratch1.bytes = o1.bytes; - scratch1.offset = o1.offset; - scratch1.length = o1.length; - - for (int i = scratch1.length - 1; i >= 0; i--) { - if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR - || scratch1.bytes[scratch1.offset + i] == MORPH_SEPARATOR) { - scratch1.length = i; - break; - } - } - - scratch2.bytes = o2.bytes; - scratch2.offset = o2.offset; - scratch2.length = o2.length; - - for (int i = scratch2.length - 1; i >= 0; i--) { - if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR - || scratch2.bytes[scratch2.offset + i] == MORPH_SEPARATOR) { - scratch2.length = i; - break; - } - } + initScratch(o1, scratch1); + initScratch(o2, scratch2); int cmp = scratch1.compareTo(scratch2); if (cmp == 0) { @@ -933,7 +921,7 @@ public class Dictionary { String line = scratch.utf8ToString(); String entry; - char wordForm[]; + char[] wordForm; int end; int flagSep = line.indexOf(FLAG_SEPARATOR); @@ -980,7 +968,7 @@ public class Dictionary { words.add(scratchInts.get(), currentOrds.get()); } // swap current - if (cmp > 0 || currentEntry == null) { + if (cmp > 0) { currentEntry = entry; currentOrds = new IntsRefBuilder(); // must be this way } @@ -994,6 +982,7 @@ public class Dictionary { } // finalize last entry + assert currentEntry != null; Util.toUTF32(currentEntry, scratchInts); words.add(scratchInts.get(), currentOrds.get()); success2 = true; @@ -1011,7 +1000,7 @@ public class Dictionary { return CharsRef.EMPTY_CHARS; } int len = b.length >>> 1; - char flags[] = new char[len]; + char[] flags = new char[len]; int upto = 0; int end = b.offset + b.length; for (int i = b.offset; i < end; i += 2) { @@ -1020,19 +1009,18 @@ public class Dictionary { return flags; } - static void encodeFlags(BytesRefBuilder b, char flags[]) { + private static void encodeFlags(BytesRefBuilder b, char[] flags) { int len = flags.length << 1; b.grow(len); b.clear(); - for (int i = 0; i < flags.length; i++) { - int flag = flags[i]; + for (int flag : flags) { b.append((byte) ((flag >> 8) & 0xff)); b.append((byte) (flag & 0xff)); } } private void parseAlias(String line) { - String ruleArgs[] = line.split("\\s+"); + String[] ruleArgs = line.split("\\s+"); if (aliases == null) { // first line should be the aliases count final int count = Integer.parseInt(ruleArgs[1]); @@ -1102,7 +1090,7 @@ public class Dictionary { * @return Parsed flag */ char parseFlag(String rawFlag) { - char flags[] = parseFlags(rawFlag); + char[] flags = parseFlags(rawFlag); if (flags.length != 1) { throw new IllegalArgumentException("expected only one flag, got: " + rawFlag); } @@ -1140,9 +1128,9 @@ public class Dictionary { char[] flags = new char[rawFlagParts.length]; int upto = 0; - for (int i = 0; i < rawFlagParts.length; i++) { + for (String rawFlagPart : rawFlagParts) { // note, removing the trailing X/leading I for nepali... what is the rule here?! - String replacement = rawFlagParts[i].replaceAll("[^0-9]", ""); + String replacement = rawFlagPart.replaceAll("[^0-9]", ""); // note, ignoring empty flags (this happens in danish, for example) if (replacement.isEmpty()) { continue; @@ -1185,13 +1173,13 @@ public class Dictionary { builder.append(combined); } - char flags[] = new char[builder.length()]; + char[] flags = new char[builder.length()]; builder.getChars(0, builder.length(), flags, 0); return flags; } } - static boolean hasFlag(char flags[], char flag) { + static boolean hasFlag(char[] flags, char flag) { return Arrays.binarySearch(flags, flag) >= 0; } @@ -1247,7 +1235,7 @@ public class Dictionary { // TODO: this could be more efficient! static void applyMappings(FST fst, StringBuilder sb) throws IOException { final FST.BytesReader bytesReader = fst.getBytesReader(); - final FST.Arc firstArc = fst.getFirstArc(new FST.Arc()); + final FST.Arc firstArc = fst.getFirstArc(new FST.Arc<>()); final CharsRef NO_OUTPUT = fst.outputs.getNoOutput(); // temporary stuff @@ -1290,6 +1278,7 @@ public class Dictionary { private static Path DEFAULT_TEMP_DIR; /** Used by test framework */ + @SuppressWarnings("unused") public static void setDefaultTempDir(Path tempDir) { DEFAULT_TEMP_DIR = tempDir; } @@ -1306,7 +1295,7 @@ public class Dictionary { throw new IOException("Java has no temporary folder property (java.io.tmpdir)?"); } Path tempDirectory = Paths.get(tempDirPath); - if (Files.isWritable(tempDirectory) == false) { + if (!Files.isWritable(tempDirectory)) { throw new IOException( "Java's temporary folder not present or writeable?: " + tempDirectory.toAbsolutePath()); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java index d067d6566e0..c0f22994388 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java @@ -43,7 +43,7 @@ final class Stemmer { // used for normalization private final StringBuilder scratchSegment = new StringBuilder(); - private char scratchBuffer[] = new char[32]; + private char[] scratchBuffer = new char[32]; // it's '1' if we have no stem exceptions, otherwise every other form // is really an ID pointing to the exception table @@ -86,7 +86,7 @@ final class Stemmer { * @param word Word to find the stems for * @return List of stems for the word */ - public List stem(char word[], int length) { + public List stem(char[] word, int length) { if (dictionary.needsInputCleaning) { scratchSegment.setLength(0); @@ -128,7 +128,7 @@ final class Stemmer { private static final int UPPER_CASE = 2; /** returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word */ - private int caseOf(char word[], int length) { + private int caseOf(char[] word, int length) { if (dictionary.ignoreCase || length == 0 || !Character.isUpperCase(word[0])) { return EXACT_CASE; } @@ -152,7 +152,7 @@ final class Stemmer { } /** folds titlecase variant of word to titleBuffer */ - private void caseFoldTitle(char word[], int length) { + private void caseFoldTitle(char[] word, int length) { titleBuffer = ArrayUtil.grow(titleBuffer, length); System.arraycopy(word, 0, titleBuffer, 0, length); for (int i = 1; i < length; i++) { @@ -161,13 +161,13 @@ final class Stemmer { } /** folds lowercase variant of word (title cased) to lowerBuffer */ - private void caseFoldLower(char word[], int length) { + private void caseFoldLower(char[] word, int length) { lowerBuffer = ArrayUtil.grow(lowerBuffer, length); System.arraycopy(word, 0, lowerBuffer, 0, length); lowerBuffer[0] = dictionary.caseFold(lowerBuffer[0]); } - private List doStem(char word[], int length, boolean caseVariant) { + private List doStem(char[] word, int length, boolean caseVariant) { List stems = new ArrayList<>(); IntsRef forms = dictionary.lookupWord(word, 0, length); if (forms != null) { @@ -177,7 +177,7 @@ final class Stemmer { boolean checkOnlyInCompound = dictionary.onlyincompound != -1; if (checkKeepCase || checkNeedAffix || checkOnlyInCompound) { dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch); - char wordFlags[] = Dictionary.decodeFlags(scratch); + char[] wordFlags = Dictionary.decodeFlags(scratch); // we are looking for a case variant, but this word does not allow it if (checkKeepCase && Dictionary.hasFlag(wordFlags, (char) dictionary.keepcase)) { continue; @@ -196,8 +196,7 @@ final class Stemmer { } } try { - boolean v = - stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant)); + stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant)); } catch (IOException bogus) { throw new RuntimeException(bogus); } @@ -210,7 +209,7 @@ final class Stemmer { * @param word Word to find the stems for * @return List of stems for the word */ - public List uniqueStems(char word[], int length) { + public List uniqueStems(char[] word, int length) { List stems = stem(word, length); if (stems.size() < 2) { return stems; @@ -226,7 +225,7 @@ final class Stemmer { return deduped; } - private CharsRef newStem(char buffer[], int length, IntsRef forms, int formID) { + private CharsRef newStem(char[] buffer, int length, IntsRef forms, int formID) { final String exception; if (dictionary.hasStemExceptions) { int exceptionID = forms.ints[forms.offset + formID + 1]; @@ -251,7 +250,7 @@ final class Stemmer { } catch (IOException bogus) { throw new RuntimeException(bogus); } - char cleaned[] = new char[scratchSegment.length()]; + char[] cleaned = new char[scratchSegment.length()]; scratchSegment.getChars(0, cleaned.length, cleaned, 0); return new CharsRef(cleaned, 0, cleaned.length); } else { @@ -264,15 +263,15 @@ final class Stemmer { } // some state for traversing FSTs - final FST.BytesReader prefixReaders[] = new FST.BytesReader[3]; + private final FST.BytesReader[] prefixReaders = new FST.BytesReader[3]; @SuppressWarnings({"unchecked", "rawtypes"}) - final FST.Arc prefixArcs[] = new FST.Arc[3]; + private final FST.Arc[] prefixArcs = new FST.Arc[3]; - final FST.BytesReader suffixReaders[] = new FST.BytesReader[3]; + private final FST.BytesReader[] suffixReaders = new FST.BytesReader[3]; @SuppressWarnings({"unchecked", "rawtypes"}) - final FST.Arc suffixArcs[] = new FST.Arc[3]; + private final FST.Arc[] suffixArcs = new FST.Arc[3]; /** * Generates a list of stems for the provided word @@ -296,7 +295,7 @@ final class Stemmer { * @return List of stems, or empty list if no stems are found */ private List stem( - char word[], + char[] word, int length, int previous, int prevFlag, @@ -330,12 +329,10 @@ final class Stemmer { output = fst.outputs.add(output, arc.output()); } } - IntsRef prefixes = null; if (!arc.isFinal()) { continue; - } else { - prefixes = fst.outputs.add(output, arc.nextFinalOutput()); } + IntsRef prefixes = fst.outputs.add(output, arc.nextFinalOutput()); for (int j = 0; j < prefixes.length; j++) { int prefix = prefixes.ints[prefixes.offset + j]; @@ -357,13 +354,13 @@ final class Stemmer { } else { // check if affix is allowed in a non-compound word dictionary.flagLookup.get(append, scratch); - char appendFlags[] = Dictionary.decodeFlags(scratch); + char[] appendFlags = Dictionary.decodeFlags(scratch); compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); } } else if (crossProduct) { // cross check incoming continuation class (flag of previous affix) against list. dictionary.flagLookup.get(append, scratch); - char appendFlags[] = Dictionary.decodeFlags(scratch); + char[] appendFlags = Dictionary.decodeFlags(scratch); assert prevFlag >= 0; boolean allowed = dictionary.onlyincompound == -1 @@ -374,8 +371,7 @@ final class Stemmer { } if (compatible) { - int deAffixedStart = i; - int deAffixedLength = length - deAffixedStart; + int deAffixedLength = length - i; int stripStart = dictionary.stripOffsets[stripOrd]; int stripEnd = dictionary.stripOffsets[stripOrd + 1]; @@ -387,14 +383,14 @@ final class Stemmer { stripStart, stripLength, word, - deAffixedStart, + i, deAffixedLength)) { continue; } - char strippedWord[] = new char[stripLength + deAffixedLength]; + char[] strippedWord = new char[stripLength + deAffixedLength]; System.arraycopy(dictionary.stripData, stripStart, strippedWord, 0, stripLength); - System.arraycopy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength); + System.arraycopy(word, i, strippedWord, stripLength, deAffixedLength); List stemList = applyAffix( @@ -431,12 +427,10 @@ final class Stemmer { output = fst.outputs.add(output, arc.output()); } } - IntsRef suffixes = null; if (!arc.isFinal()) { continue; - } else { - suffixes = fst.outputs.add(output, arc.nextFinalOutput()); } + IntsRef suffixes = fst.outputs.add(output, arc.nextFinalOutput()); for (int j = 0; j < suffixes.length; j++) { int suffix = suffixes.ints[suffixes.offset + j]; @@ -458,13 +452,13 @@ final class Stemmer { } else { // check if affix is allowed in a non-compound word dictionary.flagLookup.get(append, scratch); - char appendFlags[] = Dictionary.decodeFlags(scratch); + char[] appendFlags = Dictionary.decodeFlags(scratch); compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); } } else if (crossProduct) { // cross check incoming continuation class (flag of previous affix) against list. dictionary.flagLookup.get(append, scratch); - char appendFlags[] = Dictionary.decodeFlags(scratch); + char[] appendFlags = Dictionary.decodeFlags(scratch); assert prevFlag >= 0; boolean allowed = dictionary.onlyincompound == -1 @@ -494,7 +488,7 @@ final class Stemmer { continue; } - char strippedWord[] = new char[stripLength + deAffixedLength]; + char[] strippedWord = new char[stripLength + deAffixedLength]; System.arraycopy(word, 0, strippedWord, 0, deAffixedLength); System.arraycopy( dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength); @@ -524,7 +518,7 @@ final class Stemmer { // just check the stem // but this is a little bit more complicated. private boolean checkCondition( - int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) { + int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len) { if (condition != 0) { CharacterRunAutomaton pattern = dictionary.patterns.get(condition); int state = 0; @@ -559,7 +553,7 @@ final class Stemmer { * @return List of stems for the word, or an empty list if none are found */ List applyAffix( - char strippedWord[], + char[] strippedWord, int length, int affix, int prefixFlag, @@ -572,9 +566,7 @@ final class Stemmer { affixReader.setPosition(8 * affix); char flag = (char) (affixReader.readShort() & 0xffff); affixReader.skipBytes(2); // strip - int condition = (char) (affixReader.readShort() & 0xffff); - boolean crossProduct = (condition & 1) == 1; - condition >>>= 1; + boolean crossProduct = ((int) (char) (affixReader.readShort() & 0xffff) & 1) == 1; char append = (char) (affixReader.readShort() & 0xffff); List stems = new ArrayList<>(); @@ -583,18 +575,18 @@ final class Stemmer { if (forms != null) { for (int i = 0; i < forms.length; i += formStep) { dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch); - char wordFlags[] = Dictionary.decodeFlags(scratch); + char[] wordFlags = Dictionary.decodeFlags(scratch); if (Dictionary.hasFlag(wordFlags, flag)) { // confusing: in this one exception, we already chained the first prefix against the // second, // so it doesnt need to be checked against the word boolean chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix; - if (chainedPrefix == false + if (!chainedPrefix && prefixFlag >= 0 && !Dictionary.hasFlag(wordFlags, (char) prefixFlag)) { // see if we can chain prefix thru the suffix continuation class (only if it has any!) dictionary.flagLookup.get(append, scratch); - char appendFlags[] = Dictionary.decodeFlags(scratch); + char[] appendFlags = Dictionary.decodeFlags(scratch); if (!hasCrossCheckedFlag((char) prefixFlag, appendFlags, false)) { continue; } @@ -604,7 +596,7 @@ final class Stemmer { // to ensure it has it, and vice versa if (dictionary.circumfix != -1) { dictionary.flagLookup.get(append, scratch); - char appendFlags[] = Dictionary.decodeFlags(scratch); + char[] appendFlags = Dictionary.decodeFlags(scratch); boolean suffixCircumfix = Dictionary.hasFlag(appendFlags, (char) dictionary.circumfix); if (circumfix != suffixCircumfix) { continue; @@ -631,7 +623,7 @@ final class Stemmer { // have that flag if (dictionary.circumfix != -1 && !circumfix && prefix) { dictionary.flagLookup.get(append, scratch); - char appendFlags[] = Dictionary.decodeFlags(scratch); + char[] appendFlags = Dictionary.decodeFlags(scratch); circumfix = Dictionary.hasFlag(appendFlags, (char) dictionary.circumfix); } @@ -654,7 +646,7 @@ final class Stemmer { true, circumfix, caseVariant)); - } else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix) { + } else if (!dictionary.complexPrefixes && dictionary.twoStageAffix) { // we took away a suffix. // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed // COMPLEXPREFIXES = false: combine with another suffix @@ -688,9 +680,7 @@ final class Stemmer { true, circumfix, caseVariant)); - } else if (prefix == false - && dictionary.complexPrefixes == false - && dictionary.twoStageAffix) { + } else if (!prefix && !dictionary.complexPrefixes && dictionary.twoStageAffix) { // we took away a prefix, then a suffix: go look for another suffix stems.addAll( stem( diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java index 34852cfa623..d8bc47ccb41 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java @@ -42,18 +42,19 @@ public class TestDictionary extends LuceneTestCase { Directory tempDir = getDirectory(); Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream); - assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}, 0, 1).length); - assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}, 0, 1).length); + assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length); + assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length); IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3); assertNotNull(ordList); assertEquals(1, ordList.length); BytesRef ref = new BytesRef(); dictionary.flagLookup.get(ordList.ints[0], ref); - char flags[] = Dictionary.decodeFlags(ref); + char[] flags = Dictionary.decodeFlags(ref); assertEquals(1, flags.length); - ordList = dictionary.lookupWord(new char[] {'l', 'u', 'c', 'e', 'n'}, 0, 5); + int offset = random().nextInt(10); + ordList = dictionary.lookupWord((" ".repeat(offset) + "lucen").toCharArray(), offset, 5); assertNotNull(ordList); assertEquals(1, ordList.length); dictionary.flagLookup.get(ordList.ints[0], ref); @@ -71,12 +72,12 @@ public class TestDictionary extends LuceneTestCase { Directory tempDir = getDirectory(); Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream); - assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}, 0, 1).length); - assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}, 0, 1).length); + assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length); + assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length); IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3); BytesRef ref = new BytesRef(); dictionary.flagLookup.get(ordList.ints[0], ref); - char flags[] = Dictionary.decodeFlags(ref); + char[] flags = Dictionary.decodeFlags(ref); assertEquals(1, flags.length); affixStream.close(); @@ -90,12 +91,12 @@ public class TestDictionary extends LuceneTestCase { Directory tempDir = getDirectory(); Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream); - assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}, 0, 1).length); - assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}, 0, 1).length); + assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length); + assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length); IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3); BytesRef ref = new BytesRef(); dictionary.flagLookup.get(ordList.ints[0], ref); - char flags[] = Dictionary.decodeFlags(ref); + char[] flags = Dictionary.decodeFlags(ref); assertEquals(1, flags.length); affixStream.close(); @@ -109,12 +110,12 @@ public class TestDictionary extends LuceneTestCase { Directory tempDir = getDirectory(); Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream); - assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}, 0, 1).length); - assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}, 0, 1).length); + assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length); + assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length); IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3); BytesRef ref = new BytesRef(); dictionary.flagLookup.get(ordList.ints[0], ref); - char flags[] = Dictionary.decodeFlags(ref); + char[] flags = Dictionary.decodeFlags(ref); assertEquals(1, flags.length); affixStream.close(); @@ -131,9 +132,7 @@ public class TestDictionary extends LuceneTestCase { ParseException expected = expectThrows( ParseException.class, - () -> { - new Dictionary(tempDir, "dictionary", affixStream, dictStream); - }); + () -> new Dictionary(tempDir, "dictionary", affixStream, dictStream)); assertTrue( expected .getMessage() @@ -153,10 +152,7 @@ public class TestDictionary extends LuceneTestCase { Exception expected = expectThrows( - Exception.class, - () -> { - new Dictionary(tempDir, "dictionary", affixStream, dictStream); - }); + Exception.class, () -> new Dictionary(tempDir, "dictionary", affixStream, dictStream)); assertTrue(expected.getMessage().startsWith("expected only one flag")); affixStream.close(); @@ -272,7 +268,7 @@ public class TestDictionary extends LuceneTestCase { Dictionary.getDictionaryEncoding(new ByteArrayInputStream(new byte[0]))); } - public void testFlagWithCrazyWhitespace() throws Exception { + public void testFlagWithCrazyWhitespace() { assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8")); assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8")); }