From 760f2dbdcb29b993aab8f981d84ccbf2e20e9fa5 Mon Sep 17 00:00:00 2001 From: Michael Sokolov Date: Thu, 4 Jul 2019 09:45:51 -0400 Subject: [PATCH] LUCENE-8920: encapsulate FST.Arc data --- .../charfilter/MappingCharFilter.java | 8 +- .../analysis/charfilter/NormalizeCharMap.java | 6 +- .../lucene/analysis/hunspell/Dictionary.java | 12 +- .../lucene/analysis/hunspell/Stemmer.java | 12 +- .../miscellaneous/StemmerOverrideFilter.java | 4 +- .../analysis/synonym/SynonymFilter.java | 8 +- .../analysis/synonym/SynonymGraphFilter.java | 8 +- .../lucene/analysis/ja/JapaneseTokenizer.java | 8 +- .../analysis/ja/dict/UserDictionary.java | 4 +- .../lucene/analysis/ko/KoreanTokenizer.java | 8 +- .../analysis/ko/dict/UserDictionary.java | 4 +- .../blocktreeords/OrdsIntersectTermsEnum.java | 6 +- .../blocktreeords/OrdsSegmentTermsEnum.java | 74 ++-- .../codecs/memory/FSTOrdTermsReader.java | 28 +- .../lucene/codecs/memory/FSTTermsReader.java | 30 +- .../codecs/blocktree/IntersectTermsEnum.java | 6 +- .../codecs/blocktree/SegmentTermsEnum.java | 48 +-- .../java/org/apache/lucene/util/fst/FST.java | 376 ++++++++++-------- .../org/apache/lucene/util/fst/FSTEnum.java | 144 ++++--- .../org/apache/lucene/util/fst/NodeHash.java | 18 +- .../java/org/apache/lucene/util/fst/Util.java | 168 ++++---- .../org/apache/lucene/util/fst/TestFSTs.java | 20 +- .../apache/lucene/util/fst/TestFstDirect.java | 4 +- .../idversion/IDVersionSegmentTermsEnum.java | 48 +-- .../suggest/analyzing/AnalyzingSuggester.java | 2 +- .../search/suggest/analyzing/FSTUtil.java | 18 +- .../suggest/analyzing/FreeTextSuggester.java | 4 +- .../search/suggest/document/NRTSuggester.java | 2 +- .../search/suggest/fst/FSTCompletion.java | 10 +- .../suggest/fst/WFSTCompletionLookup.java | 6 +- .../org/apache/lucene/util/fst/FSTTester.java | 8 +- 31 files changed, 579 insertions(+), 523 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java index 764e1c659f1..5fffda94030 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java @@ -106,17 +106,17 @@ public class MappingCharFilter extends BaseCharFilter { // Fast pass for single character match: assert arc.isFinal(); lastMatchLen = 1; - lastMatch = arc.output; + lastMatch = arc.output(); } else { int lookahead = 0; - CharsRef output = arc.output; + CharsRef output = arc.output(); while (true) { lookahead++; if (arc.isFinal()) { // Match! (to node is final) lastMatchLen = lookahead; - lastMatch = outputs.add(output, arc.nextFinalOutput); + lastMatch = outputs.add(output, arc.nextFinalOutput()); // Greedy: keep searching to see if there's a // longer match... } @@ -133,7 +133,7 @@ public class MappingCharFilter extends BaseCharFilter { // Dead end break; } - output = outputs.add(output, arc.output); + output = outputs.add(output, arc.output()); } } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java index ffc832f267b..b3efcf73f86 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java @@ -50,10 +50,10 @@ public class NormalizeCharMap { final FST.BytesReader fstReader = map.getBytesReader(); map.getFirstArc(scratchArc); if (FST.targetHasArcs(scratchArc)) { - map.readFirstRealTargetArc(scratchArc.target, scratchArc, fstReader); + map.readFirstRealTargetArc(scratchArc.target(), scratchArc, fstReader); while(true) { - assert scratchArc.label != FST.END_LABEL; - cachedRootArcs.put(Character.valueOf((char) scratchArc.label), new FST.Arc().copyFrom(scratchArc)); + assert scratchArc.label() != FST.END_LABEL; + cachedRootArcs.put(Character.valueOf((char) scratchArc.label()), new FST.Arc().copyFrom(scratchArc)); if (scratchArc.isLast()) { break; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 6af2f12b0e3..443f010047f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -280,14 +280,14 @@ public class Dictionary { cp = Character.codePointAt(word, i, l); if (fst.findTargetArc(cp, arc, arc, bytesReader) == null) { return null; - } else if (arc.output != NO_OUTPUT) { - output = fst.outputs.add(output, arc.output); + } else if (arc.output() != NO_OUTPUT) { + output = fst.outputs.add(output, arc.output()); } } if (fst.findTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null) { return null; - } else if (arc.output != NO_OUTPUT) { - return fst.outputs.add(output, arc.output); + } else if (arc.output() != NO_OUTPUT) { + return fst.outputs.add(output, arc.output()); } else { return output; } @@ -1228,10 +1228,10 @@ public class Dictionary { if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) { break; } else { - output = fst.outputs.add(output, arc.output); + output = fst.outputs.add(output, arc.output()); } if (arc.isFinal()) { - longestOutput = fst.outputs.add(output, arc.nextFinalOutput); + longestOutput = fst.outputs.add(output, arc.nextFinalOutput()); longestMatch = j; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java index a7b4e8b2092..888d89eb19c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java @@ -311,15 +311,15 @@ final class Stemmer { int ch = word[i-1]; if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) { break; - } else if (arc.output != NO_OUTPUT) { - output = fst.outputs.add(output, arc.output); + } else if (arc.output() != NO_OUTPUT) { + output = fst.outputs.add(output, arc.output()); } } IntsRef prefixes = null; if (!arc.isFinal()) { continue; } else { - prefixes = fst.outputs.add(output, arc.nextFinalOutput); + prefixes = fst.outputs.add(output, arc.nextFinalOutput()); } for (int j = 0; j < prefixes.length; j++) { @@ -395,15 +395,15 @@ final class Stemmer { int ch = word[i]; if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) { break; - } else if (arc.output != NO_OUTPUT) { - output = fst.outputs.add(output, arc.output); + } else if (arc.output() != NO_OUTPUT) { + output = fst.outputs.add(output, arc.output()); } } IntsRef suffixes = null; if (!arc.isFinal()) { continue; } else { - suffixes = fst.outputs.add(output, arc.nextFinalOutput); + suffixes = fst.outputs.add(output, arc.nextFinalOutput()); } for (int j = 0; j < suffixes.length; j++) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java index 32423e96e93..078865fc344 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java @@ -132,11 +132,11 @@ public final class StemmerOverrideFilter extends TokenFilter { if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null) { return null; } - pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output()); bufUpto += Character.charCount(codePoint); } if (scratchArc.isFinal()) { - matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); + matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput()); } return matchOutput; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java index a51edb5dba0..6894353fa12 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java @@ -330,7 +330,7 @@ public final class SynonymFilter extends TokenFilter { BytesRef pendingOutput = fst.outputs.getNoOutput(); fst.getFirstArc(scratchArc); - assert scratchArc.output == fst.outputs.getNoOutput(); + assert scratchArc.output() == fst.outputs.getNoOutput(); int tokenCount = 0; @@ -399,7 +399,7 @@ public final class SynonymFilter extends TokenFilter { } // Accum the output - pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output()); //System.out.println(" char=" + buffer[bufUpto] + " output=" + pendingOutput + " arc.output=" + scratchArc.output); bufUpto += Character.charCount(codePoint); } @@ -407,7 +407,7 @@ public final class SynonymFilter extends TokenFilter { // OK, entire token matched; now see if this is a final // state: if (scratchArc.isFinal()) { - matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); + matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput()); matchInputLength = tokenCount; matchEndOffset = inputEndOffset; //System.out.println(" found matchLength=" + matchInputLength + " output=" + matchOutput); @@ -423,7 +423,7 @@ public final class SynonymFilter extends TokenFilter { } else { // More matching is possible -- accum the output (if // any) of the WORD_SEP arc: - pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output()); if (nextRead == nextWrite) { capture(); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java index e59e61bf723..c6cbea85323 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java @@ -290,7 +290,7 @@ public final class SynonymGraphFilter extends TokenFilter { BytesRef pendingOutput = fst.outputs.getNoOutput(); fst.getFirstArc(scratchArc); - assert scratchArc.output == fst.outputs.getNoOutput(); + assert scratchArc.output() == fst.outputs.getNoOutput(); // How many tokens in the current match int matchLength = 0; @@ -360,7 +360,7 @@ public final class SynonymGraphFilter extends TokenFilter { } // Accum the output - pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output()); bufUpto += Character.charCount(codePoint); } @@ -369,7 +369,7 @@ public final class SynonymGraphFilter extends TokenFilter { // OK, entire token matched; now see if this is a final // state in the FST (a match): if (scratchArc.isFinal()) { - matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); + matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput()); matchInputLength = matchLength; matchEndOffset = inputEndOffset; //System.out.println(" ** match"); @@ -385,7 +385,7 @@ public final class SynonymGraphFilter extends TokenFilter { } else { // More matching is possible -- accum the output (if // any) of the WORD_SEP arc: - pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output()); doFinalCapture = true; if (liveToken) { capture(); diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java index ea57d1c28c8..96d104bde1c 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java @@ -772,12 +772,12 @@ public final class JapaneseTokenizer extends Tokenizer { if (userFST.findTargetArc(ch, arc, arc, posAhead == posData.pos, userFSTReader) == null) { break; } - output += arc.output.intValue(); + output += arc.output().intValue(); if (arc.isFinal()) { if (VERBOSE) { System.out.println(" USER word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1)); } - add(userDictionary, posData, posAhead+1, output + arc.nextFinalOutput.intValue(), Type.USER, false); + add(userDictionary, posData, posAhead+1, output + arc.nextFinalOutput().intValue(), Type.USER, false); anyMatches = true; } } @@ -803,7 +803,7 @@ public final class JapaneseTokenizer extends Tokenizer { break; } - output += arc.output.intValue(); + output += arc.output().intValue(); // Optimization: for known words that are too-long // (compound), we should pre-compute the 2nd @@ -812,7 +812,7 @@ public final class JapaneseTokenizer extends Tokenizer { // match is found. if (arc.isFinal()) { - dictionary.lookupWordIds(output + arc.nextFinalOutput.intValue(), wordIdRef); + dictionary.lookupWordIds(output + arc.nextFinalOutput().intValue(), wordIdRef); if (VERBOSE) { System.out.println(" KNOWN word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1) + " " + wordIdRef.length + " wordIDs"); } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java index bf2ef57f3d6..eaa5badd17a 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java @@ -162,9 +162,9 @@ public final class UserDictionary implements Dictionary { if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) { break; // continue to next position } - output += arc.output.intValue(); + output += arc.output().intValue(); if (arc.isFinal()) { - final int finalOutput = output + arc.nextFinalOutput.intValue(); + final int finalOutput = output + arc.nextFinalOutput().intValue(); result.put(startOffset-off, segmentations[finalOutput]); found = true; } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java index 000c743842c..300740095cc 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java @@ -681,11 +681,11 @@ public final class KoreanTokenizer extends Tokenizer { if (userFST.findTargetArc(ch, arc, arc, posAhead == pos, userFSTReader) == null) { break; } - output += arc.output.intValue(); + output += arc.output().intValue(); if (arc.isFinal()) { maxPosAhead = posAhead; outputMaxPosAhead = output; - arcFinalOutMaxPosAhead = arc.nextFinalOutput.intValue(); + arcFinalOutMaxPosAhead = arc.nextFinalOutput().intValue(); anyMatches = true; } } @@ -720,7 +720,7 @@ public final class KoreanTokenizer extends Tokenizer { break; } - output += arc.output.intValue(); + output += arc.output().intValue(); // Optimization: for known words that are too-long // (compound), we should pre-compute the 2nd @@ -729,7 +729,7 @@ public final class KoreanTokenizer extends Tokenizer { // match is found. if (arc.isFinal()) { - dictionary.lookupWordIds(output + arc.nextFinalOutput.intValue(), wordIdRef); + dictionary.lookupWordIds(output + arc.nextFinalOutput().intValue(), wordIdRef); if (VERBOSE) { System.out.println(" KNOWN word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1) + " " + wordIdRef.length + " wordIDs"); } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java index fb601be6d65..e04d133335e 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java @@ -221,9 +221,9 @@ public final class UserDictionary implements Dictionary { if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) { break; // continue to next position } - output += arc.output.intValue(); + output += arc.output().intValue(); if (arc.isFinal()) { - final int finalOutput = output + arc.nextFinalOutput.intValue(); + final int finalOutput = output + arc.nextFinalOutput().intValue(); result.add(finalOutput); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java index 9434ca8928d..fc7984d5d55 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java @@ -95,7 +95,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum { f.prefix = 0; f.setState(0); f.arc = arc; - f.outputPrefix = arc.output; + f.outputPrefix = arc.output(); f.load(fr.rootCode); // for assert: @@ -168,14 +168,14 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum { // passed to findTargetArc arc = fr.index.findTargetArc(target, arc, getArc(1+idx), fstReader); assert arc != null; - output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); idx++; } f.arc = arc; f.outputPrefix = output; assert arc.isFinal(); - f.load(OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput)); + f.load(OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput())); return f; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java index e03d7e672d1..7bfaab55ffa 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java @@ -271,7 +271,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { arc = arcs[0]; assert arc.isFinal(); - output = arc.output; + output = arc.output(); targetUpto = 0; OrdsSegmentTermsEnumFrame lastFrame = stack[0]; @@ -294,9 +294,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { break; } arc = arcs[1+targetUpto]; - assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); - if (arc.output != OrdsBlockTreeTermsWriter.NO_OUTPUT) { - output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); + if (arc.output() != OrdsBlockTreeTermsWriter.NO_OUTPUT) { + output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); } if (arc.isFinal()) { lastFrame = stack[1+lastFrame.ord]; @@ -374,19 +374,19 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { // Empty string prefix must have an output (block) in the index! assert arc.isFinal(); - assert arc.output != null; + assert arc.output() != null; // if (DEBUG) { // System.out.println(" no seek state; push root frame"); // } - output = arc.output; + output = arc.output(); currentFrame = staticFrame; //term.length = 0; targetUpto = 0; - currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0); + currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0); } positioned = true; @@ -443,9 +443,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { arc = nextArc; term.setByteAt(targetUpto, (byte) targetLabel); // Aggregate output as we go: - assert arc.output != null; - if (arc.output != OrdsBlockTreeTermsWriter.NO_OUTPUT) { - output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + assert arc.output() != null; + if (arc.output() != OrdsBlockTreeTermsWriter.NO_OUTPUT) { + output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); } // if (DEBUG) { @@ -455,7 +455,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { if (arc.isFinal()) { //if (DEBUG) System.out.println(" arc is final!"); - currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto); + currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto); //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms); } } @@ -529,7 +529,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { arc = arcs[0]; assert arc.isFinal(); - output = arc.output; + output = arc.output(); targetUpto = 0; OrdsSegmentTermsEnumFrame lastFrame = stack[0]; @@ -552,14 +552,14 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { break; } arc = arcs[1+targetUpto]; - assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); + assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); // TODO: we could save the outputs in local // byte[][] instead of making new objs ever // seek; but, often the FST doesn't have any // shared bytes (but this could change if we // reverse vLong byte order) - if (arc.output != OrdsBlockTreeTermsWriter.NO_OUTPUT) { - output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + if (arc.output() != OrdsBlockTreeTermsWriter.NO_OUTPUT) { + output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); } if (arc.isFinal()) { lastFrame = stack[1+lastFrame.ord]; @@ -632,19 +632,19 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { // Empty string prefix must have an output (block) in the index! assert arc.isFinal(); - assert arc.output != null; + assert arc.output() != null; //if (DEBUG) { //System.out.println(" no seek state; push root frame"); //} - output = arc.output; + output = arc.output(); currentFrame = staticFrame; //term.length = 0; targetUpto = 0; - currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0); + currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0); } positioned = true; @@ -701,9 +701,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { term.setByteAt(targetUpto, (byte) targetLabel); arc = nextArc; // Aggregate output as we go: - assert arc.output != null; - if (arc.output != OrdsBlockTreeTermsWriter.NO_OUTPUT) { - output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + assert arc.output() != null; + if (arc.output() != OrdsBlockTreeTermsWriter.NO_OUTPUT) { + output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); } //if (DEBUG) { @@ -713,7 +713,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { if (arc.isFinal()) { //if (DEBUG) System.out.println(" arc is final!"); - currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto); + currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto); //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms); } } @@ -766,8 +766,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { } if (fr.index != null) { assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc; - if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix-1)&0xFF)) { - out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF)); + if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix-1)&0xFF)) { + out.println(" broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF)); throw new RuntimeException("seek state is broken"); } Output output = Util.get(fr.index, prefix); @@ -1052,7 +1052,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { final IntsRefBuilder result = new IntsRefBuilder(); fr.index.getFirstArc(arc); - Output output = arc.output; + Output output = arc.output(); int upto = 0; int bestUpto = 0; @@ -1069,7 +1069,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { while (true) { // System.out.println(" loop: output=" + output.startOrd + "-" + (Long.MAX_VALUE-output.endOrd) + " upto=" + upto + " arc=" + arc + " final?=" + arc.isFinal()); if (arc.isFinal()) { - final Output finalOutput = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput); + final Output finalOutput = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()); // System.out.println(" isFinal: " + finalOutput.startOrd + "-" + (Long.MAX_VALUE-finalOutput.endOrd)); if (targetOrd >= finalOutput.startOrd && targetOrd <= Long.MAX_VALUE-finalOutput.endOrd) { // Only one range should match across all arc leaving this node @@ -1082,19 +1082,19 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { if (FST.targetHasArcs(arc)) { // System.out.println(" targetHasArcs"); result.grow(1+upto); - fr.index.readFirstRealTargetArc(arc.target, arc, fstReader); + fr.index.readFirstRealTargetArc(arc.target(), arc, fstReader); - if (arc.bytesPerArc != 0 && arc.arcIdx > Integer.MIN_VALUE) { + if (arc.bytesPerArc() != 0 && arc.arcIdx() > Integer.MIN_VALUE) { // System.out.println(" array arcs"); int low = 0; - int high = arc.numArcs-1; + int high = arc.numArcs() -1; int mid = 0; //System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output); boolean found = false; while (low <= high) { mid = (low + high) >>> 1; - fstReader.setPosition(arc.posArcsStart); - fstReader.skipBytes(arc.bytesPerArc*mid); + fstReader.setPosition(arc.posArcsStart()); + fstReader.skipBytes(arc.bytesPerArc() *mid); final byte flags = fstReader.readByte(); fr.index.readLabel(fstReader); final Output minArcOutput; @@ -1116,8 +1116,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { } if (found) { - // Keep recursing - arc.arcIdx = mid-1; + // Keep recursing + arc.arcIdx(mid - 1); } else { result.setLength(bestUpto); InputOutput io = new InputOutput(); @@ -1130,8 +1130,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { fr.index.readNextRealArc(arc, fstReader); // Recurse on this arc: - result.setIntAt(upto++, arc.label); - output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + result.setIntAt(upto++, arc.label()); + output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); } else { // System.out.println(" non-array arc"); @@ -1141,14 +1141,14 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { // This is the min output we'd hit if we follow // this arc: - final Output minArcOutput = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + final Output minArcOutput = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); long endOrd = Long.MAX_VALUE - minArcOutput.endOrd; // System.out.println(" endOrd=" + endOrd + " targetOrd=" + targetOrd); if (targetOrd >= minArcOutput.startOrd && targetOrd <= endOrd) { // Recurse on this arc: output = minArcOutput; - result.setIntAt(upto++, arc.label); + result.setIntAt(upto++, arc.label()); break; } else if (targetOrd < endOrd || arc.isLast()) { result.setLength(bestUpto); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java index 12110d9a811..d653c1209dd 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java @@ -621,8 +621,8 @@ public class FSTOrdTermsReader extends FieldsProducer { @Override void decodeStats() throws IOException { final FST.Arc arc = topFrame().arc; - assert arc.nextFinalOutput == fstOutputs.getNoOutput(); - ord = arc.output; + assert arc.nextFinalOutput() == fstOutputs.getNoOutput(); + ord = arc.output(); super.decodeStats(); } @@ -675,7 +675,7 @@ public class FSTOrdTermsReader extends FieldsProducer { frame = newFrame(); label = target.bytes[upto] & 0xff; frame = loadCeilFrame(label, topFrame(), frame); - if (frame == null || frame.arc.label != label) { + if (frame == null || frame.arc.label() != label) { break; } assert isValid(frame); // target must be fetched from automaton @@ -703,15 +703,15 @@ public class FSTOrdTermsReader extends FieldsProducer { } /** Virtual frame, never pop */ - Frame loadVirtualFrame(Frame frame) throws IOException { - frame.arc.output = fstOutputs.getNoOutput(); - frame.arc.nextFinalOutput = fstOutputs.getNoOutput(); + Frame loadVirtualFrame(Frame frame) { + frame.arc.output(fstOutputs.getNoOutput()); + frame.arc.nextFinalOutput(fstOutputs.getNoOutput()); frame.state = -1; return frame; } /** Load frame for start arc(node) on fst */ - Frame loadFirstFrame(Frame frame) throws IOException { + Frame loadFirstFrame(Frame frame) { frame.arc = fst.getFirstArc(frame.arc); frame.state = 0; return frame; @@ -722,8 +722,8 @@ public class FSTOrdTermsReader extends FieldsProducer { if (!canGrow(top)) { return null; } - frame.arc = fst.readFirstRealTargetArc(top.arc.target, frame.arc, fstReader); - frame.state = fsa.step(top.state, frame.arc.label); + frame.arc = fst.readFirstRealTargetArc(top.arc.target(), frame.arc, fstReader); + frame.state = fsa.step(top.state, frame.arc.label()); //if (TEST) System.out.println(" loadExpand frame="+frame); if (frame.state == -1) { return loadNextFrame(top, frame); @@ -738,7 +738,7 @@ public class FSTOrdTermsReader extends FieldsProducer { } while (!frame.arc.isLast()) { frame.arc = fst.readNextRealArc(frame.arc, fstReader); - frame.state = fsa.step(top.state, frame.arc.label); + frame.state = fsa.step(top.state, frame.arc.label()); if (frame.state != -1) { break; } @@ -758,7 +758,7 @@ public class FSTOrdTermsReader extends FieldsProducer { if (arc == null) { return null; } - frame.state = fsa.step(top.state, arc.label); + frame.state = fsa.step(top.state, arc.label()); //if (TEST) System.out.println(" loadCeil frame="+frame); if (frame.state == -1) { return loadNextFrame(top, frame); @@ -781,8 +781,8 @@ public class FSTOrdTermsReader extends FieldsProducer { void pushFrame(Frame frame) { final FST.Arc arc = frame.arc; - arc.output = fstOutputs.add(topFrame().arc.output, arc.output); - term = grow(arc.label); + arc.output(fstOutputs.add(topFrame().arc.output(), arc.output())); + term = grow(arc.label()); level++; assert frame == stack[level]; } @@ -836,7 +836,7 @@ public class FSTOrdTermsReader extends FieldsProducer { queue.add(startArc); while (!queue.isEmpty()) { final FST.Arc arc = queue.remove(0); - final long node = arc.target; + final long node = arc.target(); //System.out.println(arc); if (FST.targetHasArcs(arc) && !seen.get((int) node)) { seen.set((int) node); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java index 43528ced1f0..41a992fafea 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java @@ -415,7 +415,7 @@ public class FSTTermsReader extends FieldsProducer { /* True when there is pending term when calling next() */ boolean pending; - /* stack to record how current term is constructed, + /* stack to record how current term is constructed, * used to accumulate metadata or rewind term: * level == term.length + 1, * == 0 when term is null */ @@ -501,19 +501,19 @@ public class FSTTermsReader extends FieldsProducer { } /** Lazily accumulate meta data, when we got a accepted term */ - void loadMetaData() throws IOException { + void loadMetaData() { FST.Arc last, next; last = stack[metaUpto].fstArc; while (metaUpto != level) { metaUpto++; next = stack[metaUpto].fstArc; - next.output = fstOutputs.add(next.output, last.output); + next.output(fstOutputs.add(next.output(), last.output())); last = next; } if (last.isFinal()) { - meta = fstOutputs.add(last.output, last.nextFinalOutput); + meta = fstOutputs.add(last.output(), last.nextFinalOutput()); } else { - meta = last.output; + meta = last.output(); } state.docFreq = meta.docFreq; state.totalTermFreq = meta.totalTermFreq; @@ -575,7 +575,7 @@ public class FSTTermsReader extends FieldsProducer { frame = newFrame(); label = target.bytes[upto] & 0xff; frame = loadCeilFrame(label, topFrame(), frame); - if (frame == null || frame.fstArc.label != label) { + if (frame == null || frame.fstArc.label() != label) { break; } assert isValid(frame); // target must be fetched from automaton @@ -603,9 +603,9 @@ public class FSTTermsReader extends FieldsProducer { } /** Virtual frame, never pop */ - Frame loadVirtualFrame(Frame frame) throws IOException { - frame.fstArc.output = fstOutputs.getNoOutput(); - frame.fstArc.nextFinalOutput = fstOutputs.getNoOutput(); + Frame loadVirtualFrame(Frame frame) { + frame.fstArc.output(fstOutputs.getNoOutput()); + frame.fstArc.nextFinalOutput(fstOutputs.getNoOutput()); frame.fsaState = -1; return frame; } @@ -622,8 +622,8 @@ public class FSTTermsReader extends FieldsProducer { if (!canGrow(top)) { return null; } - frame.fstArc = fst.readFirstRealTargetArc(top.fstArc.target, frame.fstArc, fstReader); - frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label); + frame.fstArc = fst.readFirstRealTargetArc(top.fstArc.target(), frame.fstArc, fstReader); + frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label()); //if (TEST) System.out.println(" loadExpand frame="+frame); if (frame.fsaState == -1) { return loadNextFrame(top, frame); @@ -638,7 +638,7 @@ public class FSTTermsReader extends FieldsProducer { } while (!frame.fstArc.isLast()) { frame.fstArc = fst.readNextRealArc(frame.fstArc, fstReader); - frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label); + frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label()); if (frame.fsaState != -1) { break; } @@ -658,7 +658,7 @@ public class FSTTermsReader extends FieldsProducer { if (arc == null) { return null; } - frame.fsaState = fsa.step(top.fsaState, arc.label); + frame.fsaState = fsa.step(top.fsaState, arc.label()); //if (TEST) System.out.println(" loadCeil frame="+frame); if (frame.fsaState == -1) { return loadNextFrame(top, frame); @@ -680,7 +680,7 @@ public class FSTTermsReader extends FieldsProducer { } void pushFrame(Frame frame) { - term = grow(frame.fstArc.label); + term = grow(frame.fstArc.label()); level++; //if (TEST) System.out.println(" term=" + term + " level=" + level); } @@ -737,7 +737,7 @@ public class FSTTermsReader extends FieldsProducer { queue.add(startArc); while (!queue.isEmpty()) { final FST.Arc arc = queue.remove(0); - final long node = arc.target; + final long node = arc.target(); //System.out.println(arc); if (FST.targetHasArcs(arc) && !seen.get((int) node)) { seen.set((int) node); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java index 848bb0b7528..76bfdf23dde 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java @@ -111,7 +111,7 @@ final class IntersectTermsEnum extends BaseTermsEnum { f.prefix = 0; f.setState(0); f.arc = arc; - f.outputPrefix = arc.output; + f.outputPrefix = arc.output(); f.load(fr.rootCode); // for assert: @@ -186,14 +186,14 @@ final class IntersectTermsEnum extends BaseTermsEnum { // passed to findTargetArc arc = fr.index.findTargetArc(target, arc, getArc(1+idx), fstReader); assert arc != null; - output = fstOutputs.add(output, arc.output); + output = fstOutputs.add(output, arc.output()); idx++; } f.arc = arc; f.outputPrefix = output; assert arc.isFinal(); - f.load(fstOutputs.add(output, arc.nextFinalOutput)); + f.load(fstOutputs.add(output, arc.nextFinalOutput())); return f; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java index c9d0ddf6419..92888d060c9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java @@ -351,7 +351,7 @@ final class SegmentTermsEnum extends BaseTermsEnum { arc = arcs[0]; assert arc.isFinal(); - output = arc.output; + output = arc.output(); targetUpto = 0; SegmentTermsEnumFrame lastFrame = stack[0]; @@ -374,9 +374,9 @@ final class SegmentTermsEnum extends BaseTermsEnum { break; } arc = arcs[1+targetUpto]; - assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); - if (arc.output != BlockTreeTermsReader.NO_OUTPUT) { - output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output); + assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); + if (arc.output() != BlockTreeTermsReader.NO_OUTPUT) { + output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output()); } if (arc.isFinal()) { lastFrame = stack[1+lastFrame.ord]; @@ -454,19 +454,19 @@ final class SegmentTermsEnum extends BaseTermsEnum { // Empty string prefix must have an output (block) in the index! assert arc.isFinal(); - assert arc.output != null; + assert arc.output() != null; // if (DEBUG) { // System.out.println(" no seek state; push root frame"); // } - output = arc.output; + output = arc.output(); currentFrame = staticFrame; //term.length = 0; targetUpto = 0; - currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0); + currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0); } // if (DEBUG) { @@ -521,9 +521,9 @@ final class SegmentTermsEnum extends BaseTermsEnum { arc = nextArc; term.setByteAt(targetUpto, (byte) targetLabel); // Aggregate output as we go: - assert arc.output != null; - if (arc.output != BlockTreeTermsReader.NO_OUTPUT) { - output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output); + assert arc.output() != null; + if (arc.output() != BlockTreeTermsReader.NO_OUTPUT) { + output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output()); } // if (DEBUG) { @@ -533,7 +533,7 @@ final class SegmentTermsEnum extends BaseTermsEnum { if (arc.isFinal()) { //if (DEBUG) System.out.println(" arc is final!"); - currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto); + currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto); //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms); } } @@ -608,7 +608,7 @@ final class SegmentTermsEnum extends BaseTermsEnum { arc = arcs[0]; assert arc.isFinal(); - output = arc.output; + output = arc.output(); targetUpto = 0; SegmentTermsEnumFrame lastFrame = stack[0]; @@ -631,14 +631,14 @@ final class SegmentTermsEnum extends BaseTermsEnum { break; } arc = arcs[1+targetUpto]; - assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); + assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); // TODO: we could save the outputs in local // byte[][] instead of making new objs ever // seek; but, often the FST doesn't have any // shared bytes (but this could change if we // reverse vLong byte order) - if (arc.output != BlockTreeTermsReader.NO_OUTPUT) { - output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output); + if (arc.output() != BlockTreeTermsReader.NO_OUTPUT) { + output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output()); } if (arc.isFinal()) { lastFrame = stack[1+lastFrame.ord]; @@ -711,19 +711,19 @@ final class SegmentTermsEnum extends BaseTermsEnum { // Empty string prefix must have an output (block) in the index! assert arc.isFinal(); - assert arc.output != null; + assert arc.output() != null; //if (DEBUG) { //System.out.println(" no seek state; push root frame"); //} - output = arc.output; + output = arc.output(); currentFrame = staticFrame; //term.length = 0; targetUpto = 0; - currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0); + currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0); } //if (DEBUG) { @@ -779,9 +779,9 @@ final class SegmentTermsEnum extends BaseTermsEnum { term.setByteAt(targetUpto, (byte) targetLabel); arc = nextArc; // Aggregate output as we go: - assert arc.output != null; - if (arc.output != BlockTreeTermsReader.NO_OUTPUT) { - output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output); + assert arc.output() != null; + if (arc.output() != BlockTreeTermsReader.NO_OUTPUT) { + output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output()); } //if (DEBUG) { @@ -791,7 +791,7 @@ final class SegmentTermsEnum extends BaseTermsEnum { if (arc.isFinal()) { //if (DEBUG) System.out.println(" arc is final!"); - currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto); + currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto); //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms); } } @@ -844,8 +844,8 @@ final class SegmentTermsEnum extends BaseTermsEnum { } if (fr.index != null) { assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc; - if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix-1)&0xFF)) { - out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF)); + if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix-1)&0xFF)) { + out.println(" broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF)); throw new RuntimeException("seek state is broken"); } BytesRef output = Util.get(fr.index, prefix); diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java index f308f1aae65..f9d0cd51605 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java @@ -62,24 +62,24 @@ import org.apache.lucene.util.RamUsageEstimator; */ public final class FST implements Accountable { + /** Specifies allowed range of each int input label for + * this FST. */ + public enum INPUT_TYPE {BYTE1, BYTE2, BYTE4} + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FST.class); private static final long ARC_SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Arc.class); - /** Specifies allowed range of each int input label for - * this FST. */ - public static enum INPUT_TYPE {BYTE1, BYTE2, BYTE4}; - - static final int BIT_FINAL_ARC = 1 << 0; + private static final int BIT_FINAL_ARC = 1 << 0; static final int BIT_LAST_ARC = 1 << 1; static final int BIT_TARGET_NEXT = 1 << 2; // TODO: we can free up a bit if we can nuke this: - static final int BIT_STOP_NODE = 1 << 3; + private static final int BIT_STOP_NODE = 1 << 3; /** This flag is set if the arc has an output. */ public static final int BIT_ARC_HAS_OUTPUT = 1 << 4; - static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5; + private static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5; // We use this as a marker (because this one flag is // illegal by itself ...): @@ -119,10 +119,13 @@ public final class FST implements Accountable { // non-final node w/ no arcs: private static final long NON_FINAL_END_NODE = 0; + /* Used for memory accounting */ + private int cachedArcsBytesUsed; + /** If arc has this label then that arc is final/accepted */ public static final int END_LABEL = -1; - public final INPUT_TYPE inputType; + final INPUT_TYPE inputType; // if non-null, this FST accepts the empty string and // produces this output @@ -139,62 +142,51 @@ public final class FST implements Accountable { public final Outputs outputs; - private Arc cachedRootArcs[]; + private Arc[] cachedRootArcs; /** Represents a single arc. */ public static final class Arc { - public int label; - public T output; - /** To node (ord or address) */ - public long target; + private int label; - byte flags; - public T nextFinalOutput; + private T output; + + private long target; + + private byte flags; + + private T nextFinalOutput; // address (into the byte[]), or ord/address if label == END_LABEL - long nextArc; + private long nextArc; - /** Where the first arc in the array starts; only valid if - * bytesPerArc != 0 */ - public long posArcsStart; - - /** Non-zero if this arc is part of an array, which means all - * arcs for the node are encoded with a fixed number of bytes so - * that we can random access by index. We do when there are enough - * arcs leaving one node. It wastes some bytes but gives faster - * lookups. */ - public int bytesPerArc; + private long posArcsStart; - /** Where we are in the array; only valid if bytesPerArc != 0, and the array has no holes. - * arcIdx = Integer.MIN_VALUE indicates that the arc is part of a direct array, addressed by - * label. - */ - public int arcIdx; + private int bytesPerArc; - /** How many arc, if bytesPerArc == 0. Otherwise, the size of the arc array. If the array is - * direct, this may include holes. Otherwise it is also how many arcs are in the array */ - public int numArcs; + private int arcIdx; + + private int numArcs; /** Returns this */ public Arc copyFrom(Arc other) { - label = other.label; - target = other.target; - flags = other.flags; - output = other.output; - nextFinalOutput = other.nextFinalOutput; - nextArc = other.nextArc; - bytesPerArc = other.bytesPerArc; - if (bytesPerArc != 0) { - posArcsStart = other.posArcsStart; - arcIdx = other.arcIdx; - numArcs = other.numArcs; + label = other.label(); + target = other.target(); + flags = other.flags(); + output = other.output(); + nextFinalOutput = other.nextFinalOutput(); + nextArc = other.nextArc(); + bytesPerArc = other.bytesPerArc(); + if (bytesPerArc() != 0) { + posArcsStart = other.posArcsStart(); + arcIdx = other.arcIdx(); + numArcs = other.numArcs(); } return this; } boolean flag(int flag) { - return FST.flag(flags, flag); + return FST.flag(flags(), flag); } public boolean isLast() { @@ -208,8 +200,8 @@ public final class FST implements Accountable { @Override public String toString() { StringBuilder b = new StringBuilder(); - b.append(" target=").append(target); - b.append(" label=0x").append(Integer.toHexString(label)); + b.append(" target=").append(target()); + b.append(" label=0x").append(Integer.toHexString(label())); if (flag(BIT_FINAL_ARC)) { b.append(" final"); } @@ -223,40 +215,121 @@ public final class FST implements Accountable { b.append(" stop"); } if (flag(BIT_ARC_HAS_OUTPUT)) { - b.append(" output=").append(output); + b.append(" output=").append(output()); } if (flag(BIT_ARC_HAS_FINAL_OUTPUT)) { - b.append(" nextFinalOutput=").append(nextFinalOutput); + b.append(" nextFinalOutput=").append(nextFinalOutput()); } - if (bytesPerArc != 0) { - b.append(" arcArray(idx=").append(arcIdx).append(" of ").append(numArcs).append(")"); + if (bytesPerArc() != 0) { + b.append(" arcArray(idx=").append(arcIdx()).append(" of ").append(numArcs()).append(")"); } return b.toString(); } - }; + + public int label() { + return label; + } + + public void label(int label) { + this.label = label; + } + + public T output() { + return output; + } + + public void output(T output) { + this.output = output; + } + + /** To node (ord or address) */ + public long target() { + return target; + } + + public byte flags() { + return flags; + } + + public void flags(byte flags) { + this.flags = flags; + } + + public T nextFinalOutput() { + return nextFinalOutput; + } + + public void nextFinalOutput(T output) { + nextFinalOutput = output; + } + + long nextArc() { + return nextArc; + } + + /** + * Set the position of the next arc to read + * @param nextArc the position to set + */ + public void nextArc(long nextArc) { + this.nextArc = nextArc; + } + + /** Where the first arc in the array starts; only valid if + * bytesPerArc != 0 */ + public long posArcsStart() { + return posArcsStart; + } + + /** Non-zero if this arc is part of an array, which means all + * arcs for the node are encoded with a fixed number of bytes so + * that we can random access by index. We do when there are enough + * arcs leaving one node. It wastes some bytes but gives faster + * lookups. */ + public int bytesPerArc() { + return bytesPerArc; + } + + /** Where we are in the array; only valid if bytesPerArc != 0, and the array has no holes. + * arcIdx = Integer.MIN_VALUE indicates that the arc is part of a direct array, addressed by + * label. + */ + public int arcIdx() { + return arcIdx; + } + + /** + * Set the arcIdx + * @param idx the value to set + */ + public void arcIdx(int idx) { + arcIdx = idx; + } + + /** How many arc, if bytesPerArc == 0. Otherwise, the size of the arc array. If the array is + * direct, this may include holes. Otherwise it is also how many arcs are in the array */ + public int numArcs() { + return numArcs; + } + } private static boolean flag(int flags, int bit) { return (flags & bit) != 0; } - private final int version; - - // make a new empty FST, for building; Builder invokes - // this ctor + // make a new empty FST, for building; Builder invokes this FST(INPUT_TYPE inputType, Outputs outputs, int bytesPageBits) { this.inputType = inputType; this.outputs = outputs; - version = VERSION_CURRENT; fstStore = null; bytes = new BytesStore(bytesPageBits); // pad: ensure no node gets address 0 which is reserved to mean // the stop state w/ no arcs bytes.writeByte((byte) 0); - emptyOutput = null; } - public static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28; + private static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28; /** Load a previously saved FST. */ public FST(DataInput in, Outputs outputs) throws IOException { @@ -270,9 +343,9 @@ public final class FST implements Accountable { this.fstStore = fstStore; this.outputs = outputs; - // NOTE: only reads most recent format; we don't have - // back-compat promise for FSTs (they are experimental): - version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_START, VERSION_CURRENT); + // NOTE: only reads formats VERSION_START up to VERSION_CURRENT; we don't have + // back-compat promise for FSTs (they are experimental), but we are sometimes able to offer it + CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_START, VERSION_CURRENT); if (in.readByte() == 1) { // accepts empty string // 1 KB blocks: @@ -313,10 +386,6 @@ public final class FST implements Accountable { cacheRootArcs(); } - public INPUT_TYPE getInputType() { - return inputType; - } - private long ramBytesUsed(Arc[] arcs) { long size = 0; if (arcs != null) { @@ -324,11 +393,11 @@ public final class FST implements Accountable { for (Arc arc : arcs) { if (arc != null) { size += ARC_SHALLOW_RAM_BYTES_USED; - if (arc.output != null && arc.output != outputs.getNoOutput()) { - size += outputs.ramBytesUsed(arc.output); + if (arc.output() != null && arc.output() != outputs.getNoOutput()) { + size += outputs.ramBytesUsed(arc.output()); } - if (arc.nextFinalOutput != null && arc.nextFinalOutput != outputs.getNoOutput()) { - size += outputs.ramBytesUsed(arc.nextFinalOutput); + if (arc.nextFinalOutput() != null && arc.nextFinalOutput() != outputs.getNoOutput()) { + size += outputs.ramBytesUsed(arc.nextFinalOutput()); } } } @@ -336,8 +405,6 @@ public final class FST implements Accountable { return size; } - private int cachedArcsBytesUsed; - @Override public long ramBytesUsed() { long size = BASE_RAM_BYTES_USED; @@ -380,12 +447,12 @@ public final class FST implements Accountable { if (targetHasArcs(arc)) { final BytesReader in = getBytesReader(); Arc[] arcs = (Arc[]) new Arc[0x80]; - readFirstRealTargetArc(arc.target, arc, in); + readFirstRealTargetArc(arc.target(), arc, in); int count = 0; while(true) { - assert arc.label != END_LABEL; - if (arc.label < arcs.length) { - arcs[arc.label] = new Arc().copyFrom(arc); + assert arc.label() != END_LABEL; + if (arc.label() < arcs.length) { + arcs[arc.label()] = new Arc().copyFrom(arc); } else { break; } @@ -410,7 +477,7 @@ public final class FST implements Accountable { return emptyOutput; } - void setEmptyOutput(T v) throws IOException { + void setEmptyOutput(T v) { if (emptyOutput != null) { emptyOutput = outputs.merge(emptyOutput, v); } else { @@ -433,18 +500,19 @@ public final class FST implements Accountable { ByteBuffersDataOutput ros = new ByteBuffersDataOutput(); outputs.writeFinalOutput(emptyOutput, ros); byte[] emptyOutputBytes = ros.toArrayCopy(); + int emptyLen = emptyOutputBytes.length; // reverse - final int stopAt = emptyOutputBytes.length/2; + final int stopAt = emptyLen / 2; int upto = 0; while (upto < stopAt) { final byte b = emptyOutputBytes[upto]; - emptyOutputBytes[upto] = emptyOutputBytes[emptyOutputBytes.length-upto-1]; - emptyOutputBytes[emptyOutputBytes.length-upto-1] = b; + emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1]; + emptyOutputBytes[emptyLen - upto - 1] = b; upto++; } - out.writeVInt(emptyOutputBytes.length); - out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length); + out.writeVInt(emptyLen); + out.writeBytes(emptyOutputBytes, 0, emptyLen); } else { out.writeByte((byte) 0); } @@ -517,7 +585,7 @@ public final class FST implements Accountable { /** returns true if the node at this address has any * outgoing arcs */ public static boolean targetHasArcs(Arc arc) { - return arc.target > 0; + return arc.target() > 0; } // serializes new node by appending its bytes to the end @@ -652,7 +720,7 @@ public final class FST implements Accountable { //System.out.println("write int @pos=" + (fixedArrayStart-4) + " numArcs=" + nodeIn.numArcs); // create the header // TODO: clean this up: or just rewind+reuse and deal with it - byte header[] = new byte[MAX_HEADER_SIZE]; + byte[] header = new byte[MAX_HEADER_SIZE]; ByteArrayDataOutput bad = new ByteArrayDataOutput(header); // write a "false" first arc: if (writeDirectly) { @@ -742,8 +810,7 @@ public final class FST implements Accountable { } } - /** Fills virtual 'start' arc, ie, an empty incoming arc to - * the FST's start node */ + /** Fills virtual 'start' arc, ie, an empty incoming arc to the FST's start node */ public Arc getFirstArc(Arc arc) { T NO_OUTPUT = outputs.getNoOutput(); @@ -771,18 +838,18 @@ public final class FST implements Accountable { * * @return Returns the second argument * (arc). */ - public Arc readLastTargetArc(Arc follow, Arc arc, BytesReader in) throws IOException { + Arc readLastTargetArc(Arc follow, Arc arc, BytesReader in) throws IOException { //System.out.println("readLast"); if (!targetHasArcs(follow)) { //System.out.println(" end node"); assert follow.isFinal(); arc.label = END_LABEL; arc.target = FINAL_END_NODE; - arc.output = follow.nextFinalOutput; + arc.output = follow.nextFinalOutput(); arc.flags = BIT_LAST_ARC; return arc; } else { - in.setPosition(follow.target); + in.setPosition(follow.target()); final byte b = in.readByte(); if (b == ARCS_AS_ARRAY_PACKED || b == ARCS_AS_ARRAY_WITH_GAPS) { // array: jump straight to end @@ -792,9 +859,9 @@ public final class FST implements Accountable { arc.posArcsStart = in.getPosition(); if (b == ARCS_AS_ARRAY_WITH_GAPS) { arc.arcIdx = Integer.MIN_VALUE; - arc.nextArc = arc.posArcsStart - (arc.numArcs - 1) * arc.bytesPerArc; + arc.nextArc = arc.posArcsStart() - (arc.numArcs() - 1) * arc.bytesPerArc(); } else { - arc.arcIdx = arc.numArcs - 2; + arc.arcIdx = arc.numArcs() - 2; } } else { arc.flags = b; @@ -844,25 +911,24 @@ public final class FST implements Accountable { if (follow.isFinal()) { // Insert "fake" final first arc: arc.label = END_LABEL; - arc.output = follow.nextFinalOutput; + arc.output = follow.nextFinalOutput(); arc.flags = BIT_FINAL_ARC; - if (follow.target <= 0) { + if (follow.target() <= 0) { arc.flags |= BIT_LAST_ARC; } else { // NOTE: nextArc is a node (not an address!) in this case: - arc.nextArc = follow.target; + arc.nextArc = follow.target(); } arc.target = FINAL_END_NODE; //System.out.println(" insert isFinal; nextArc=" + follow.target + " isLast=" + arc.isLast() + " output=" + outputs.outputToString(arc.output)); return arc; } else { - return readFirstRealTargetArc(follow.target, arc, in); + return readFirstRealTargetArc(follow.target(), arc, in); } } - public Arc readFirstRealTargetArc(long node, Arc arc, final BytesReader in) throws IOException { - final long address = node; - in.setPosition(address); + public Arc readFirstRealTargetArc(long nodeAddress, Arc arc, final BytesReader in) throws IOException { + in.setPosition(nodeAddress); //System.out.println(" flags=" + arc.flags); byte flags = in.readByte(); @@ -880,7 +946,7 @@ public final class FST implements Accountable { //System.out.println(" bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + " arcsStart=" + pos); } else { //arc.flags = b; - arc.nextArc = address; + arc.nextArc = nodeAddress; arc.bytesPerArc = 0; } @@ -897,7 +963,7 @@ public final class FST implements Accountable { if (!targetHasArcs(follow)) { return false; } else { - in.setPosition(follow.target); + in.setPosition(follow.target()); byte flags = in.readByte(); return flags == ARCS_AS_ARRAY_PACKED || flags == ARCS_AS_ARRAY_WITH_GAPS; } @@ -905,12 +971,12 @@ public final class FST implements Accountable { /** In-place read; returns the arc. */ public Arc readNextArc(Arc arc, BytesReader in) throws IOException { - if (arc.label == END_LABEL) { + if (arc.label() == END_LABEL) { // This was a fake inserted "final" arc - if (arc.nextArc <= 0) { + if (arc.nextArc() <= 0) { throw new IllegalArgumentException("cannot readNextArc when arc.isLast()=true"); } - return readFirstRealTargetArc(arc.nextArc, arc, in); + return readFirstRealTargetArc(arc.nextArc(), arc, in); } else { return readNextRealArc(arc, in); } @@ -918,14 +984,14 @@ public final class FST implements Accountable { /** Peeks at next arc's label; does not alter arc. Do * not call this if arc.isLast()! */ - public int readNextArcLabel(Arc arc, BytesReader in) throws IOException { + int readNextArcLabel(Arc arc, BytesReader in) throws IOException { assert !arc.isLast(); - if (arc.label == END_LABEL) { + if (arc.label() == END_LABEL) { //System.out.println(" nextArc fake " + //arc.nextArc); - long pos = arc.nextArc; + long pos = arc.nextArc(); in.setPosition(pos); final byte flags = in.readByte(); @@ -941,19 +1007,19 @@ public final class FST implements Accountable { // skip flags in.readByte(); } else { - if (arc.bytesPerArc != 0) { + if (arc.bytesPerArc() != 0) { //System.out.println(" nextArc real array"); // arcs are in an array - if (arc.arcIdx >= 0) { - in.setPosition(arc.posArcsStart); + if (arc.arcIdx() >= 0) { + in.setPosition(arc.posArcsStart()); // point at next arc, -1 to skip flags - in.skipBytes((1 + arc.arcIdx) * arc.bytesPerArc + 1); + in.skipBytes((1 + arc.arcIdx()) * arc.bytesPerArc() + 1); } else { - in.setPosition(arc.nextArc); + in.setPosition(arc.nextArc()); byte flags = in.readByte(); // skip missing arcs while (flag(flags, BIT_MISSING_ARC)) { - in.skipBytes(arc.bytesPerArc - 1); + in.skipBytes(arc.bytesPerArc() - 1); flags = in.readByte(); } } @@ -961,7 +1027,7 @@ public final class FST implements Accountable { // arcs are packed //System.out.println(" nextArc real packed"); // -1 to skip flags - in.setPosition(arc.nextArc - 1); + in.setPosition(arc.nextArc() - 1); } } return readLabel(in); @@ -975,29 +1041,30 @@ public final class FST implements Accountable { // assert !flag(arc.flags, BIT_LAST_ARC); // this is a continuing arc in a fixed array - if (arc.bytesPerArc != 0) { + if (arc.bytesPerArc() != 0) { // arcs are in an array - if (arc.arcIdx > Integer.MIN_VALUE) { + if (arc.arcIdx() > Integer.MIN_VALUE) { arc.arcIdx++; assert arc.arcIdx < arc.numArcs; - in.setPosition(arc.posArcsStart - arc.arcIdx * arc.bytesPerArc); + in.setPosition(arc.posArcsStart() - arc.arcIdx() * arc.bytesPerArc()); arc.flags = in.readByte(); } else { - assert arc.nextArc <= arc.posArcsStart && arc.nextArc > arc.posArcsStart - arc.numArcs * arc.bytesPerArc; - in.setPosition(arc.nextArc); + assert arc.nextArc() <= arc.posArcsStart() && arc.nextArc() > arc.posArcsStart() - arc.numArcs() * arc.bytesPerArc(); + in.setPosition(arc.nextArc()); arc.flags = in.readByte(); - while (flag(arc.flags, BIT_MISSING_ARC)) { + while (flag(arc.flags(), BIT_MISSING_ARC)) { // skip empty arcs - arc.nextArc -= arc.bytesPerArc; - in.skipBytes(arc.bytesPerArc - 1); + arc.nextArc = arc.nextArc() - arc.bytesPerArc(); + in.skipBytes(arc.bytesPerArc() - 1); arc.flags = in.readByte(); } } } else { // arcs are packed - in.setPosition(arc.nextArc); + in.setPosition(arc.nextArc()); arc.flags = in.readByte(); } + arc.label = readLabel(in); if (arc.flag(BIT_ARC_HAS_OUTPUT)) { @@ -1018,31 +1085,31 @@ public final class FST implements Accountable { } else { arc.target = NON_FINAL_END_NODE; } - if (arc.bytesPerArc == 0) { + if (arc.bytesPerArc() == 0) { arc.nextArc = in.getPosition(); } else { - arc.nextArc -= arc.bytesPerArc; + arc.nextArc -= arc.bytesPerArc(); } } else if (arc.flag(BIT_TARGET_NEXT)) { arc.nextArc = in.getPosition(); // TODO: would be nice to make this lazy -- maybe // caller doesn't need the target and is scanning arcs... if (!arc.flag(BIT_LAST_ARC)) { - if (arc.bytesPerArc == 0) { + if (arc.bytesPerArc() == 0) { // must scan seekToNextNode(in); } else { - in.setPosition(arc.posArcsStart); - in.skipBytes(arc.bytesPerArc * arc.numArcs); + in.setPosition(arc.posArcsStart()); + in.skipBytes(arc.bytesPerArc() * arc.numArcs()); } } arc.target = in.getPosition(); } else { arc.target = readUnpackedNodeTarget(in); - if (arc.bytesPerArc > 0 && arc.arcIdx == Integer.MIN_VALUE) { + if (arc.bytesPerArc() > 0 && arc.arcIdx() == Integer.MIN_VALUE) { // nextArc was pointing to *this* arc when we entered; advance to the next // if it is a missing arc, we will skip it later - arc.nextArc -= arc.bytesPerArc; + arc.nextArc = arc.nextArc() - arc.bytesPerArc(); } else { // in list and fixed table encodings, the next arc always follows this one arc.nextArc = in.getPosition(); @@ -1065,19 +1132,16 @@ public final class FST implements Accountable { assert cachedArc == null; } else { assert cachedArc != null; - assert cachedArc.arcIdx == result.arcIdx; - assert cachedArc.bytesPerArc == result.bytesPerArc; - assert cachedArc.flags == result.flags; - assert cachedArc.label == result.label; - if (cachedArc.bytesPerArc == 0 || cachedArc.arcIdx == Integer.MIN_VALUE) { - // in the sparse array case, this value is not valid, so don't assert it - assert cachedArc.nextArc == result.nextArc; - } - assert cachedArc.nextFinalOutput.equals(result.nextFinalOutput); - assert cachedArc.numArcs == result.numArcs; - assert cachedArc.output.equals(result.output); - assert cachedArc.posArcsStart == result.posArcsStart; - assert cachedArc.target == result.target; + assert cachedArc.arcIdx() == result.arcIdx(); + assert cachedArc.bytesPerArc() == result.bytesPerArc(); + assert cachedArc.flags() == result.flags(); + assert cachedArc.label() == result.label(); + assert (cachedArc.bytesPerArc() != 0 && cachedArc.arcIdx() != Integer.MIN_VALUE) || cachedArc.nextArc() == result.nextArc(); + assert cachedArc.nextFinalOutput().equals(result.nextFinalOutput()); + assert cachedArc.numArcs() == result.numArcs(); + assert cachedArc.output().equals(result.output()); + assert cachedArc.posArcsStart() == result.posArcsStart(); + assert cachedArc.target() == result.target(); } return true; @@ -1098,14 +1162,14 @@ public final class FST implements Accountable { if (labelToMatch == END_LABEL) { if (follow.isFinal()) { - if (follow.target <= 0) { + if (follow.target() <= 0) { arc.flags = BIT_LAST_ARC; } else { arc.flags = 0; // NOTE: nextArc is a node (not an address!) in this case: - arc.nextArc = follow.target; + arc.nextArc = follow.target(); } - arc.output = follow.nextFinalOutput; + arc.output = follow.nextFinalOutput(); arc.label = END_LABEL; return arc; } else { @@ -1114,7 +1178,7 @@ public final class FST implements Accountable { } // Short-circuit if this arc is in the root arc cache: - if (useRootArcCache && cachedRootArcs != null && follow.target == startNode && labelToMatch < cachedRootArcs.length) { + if (useRootArcCache && cachedRootArcs != null && follow.target() == startNode && labelToMatch < cachedRootArcs.length) { final Arc result = cachedRootArcs[labelToMatch]; // LUCENE-5152: detect tricky cases where caller @@ -1133,7 +1197,7 @@ public final class FST implements Accountable { return null; } - in.setPosition(follow.target); + in.setPosition(follow.target()); // System.out.println("fta label=" + (char) labelToMatch); @@ -1148,12 +1212,12 @@ public final class FST implements Accountable { int firstLabel = readLabel(in); int arcPos = labelToMatch - firstLabel; if (arcPos == 0) { - arc.nextArc = arc.posArcsStart; + arc.nextArc = arc.posArcsStart(); } else if (arcPos > 0) { - if (arcPos >= arc.numArcs) { + if (arcPos >= arc.numArcs()) { return null; } - in.setPosition(arc.posArcsStart - arc.bytesPerArc * arcPos); + in.setPosition(arc.posArcsStart() - arc.bytesPerArc() * arcPos); flags = in.readByte(); if (flag(flags, BIT_MISSING_ARC)) { return null; @@ -1172,12 +1236,12 @@ public final class FST implements Accountable { // Array is sparse; do binary search: int low = 0; - int high = arc.numArcs - 1; + int high = arc.numArcs() - 1; while (low <= high) { //System.out.println(" cycle"); int mid = (low + high) >>> 1; // +1 to skip over flags - in.setPosition(arc.posArcsStart - (arc.bytesPerArc * mid + 1)); + in.setPosition(arc.posArcsStart() - (arc.bytesPerArc() * mid + 1)); int midLabel = readLabel(in); final int cmp = midLabel - labelToMatch; if (cmp < 0) { @@ -1194,17 +1258,17 @@ public final class FST implements Accountable { } // Linear scan - readFirstRealTargetArc(follow.target, arc, in); + readFirstRealTargetArc(follow.target(), arc, in); while(true) { //System.out.println(" non-bs cycle"); // TODO: we should fix this code to not have to create // object for the output of every arc we scan... only // for the matching arc, if found - if (arc.label == labelToMatch) { + if (arc.label() == labelToMatch) { //System.out.println(" found!"); return arc; - } else if (arc.label > labelToMatch) { + } else if (arc.label() > labelToMatch) { return null; } else if (arc.isLast()) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java index c7819a118bf..1c41d31f400 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java @@ -39,12 +39,12 @@ abstract class FSTEnum { protected final FST.Arc scratchArc = new FST.Arc<>(); protected int upto; - protected int targetLength; + int targetLength; /** doFloor controls the behavior of advance: if it's true * doFloor is true, advance positions to the biggest * term before target. */ - protected FSTEnum(FST fst) { + FSTEnum(FST fst) { this.fst = fst; fstReader = fst.getBytesReader(); NO_OUTPUT = fst.outputs.getNoOutput(); @@ -60,7 +60,7 @@ abstract class FSTEnum { /** Rewinds enum state to match the shared prefix between * current term and target term */ - protected final void rewindPrefix() throws IOException { + private void rewindPrefix() throws IOException { if (upto == 0) { //System.out.println(" init"); upto = 1; @@ -138,10 +138,10 @@ abstract class FSTEnum { while(arc != null) { int targetLabel = getTargetLabel(); //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") vs targetLabel=" + targetLabel); - if (arc.bytesPerArc != 0 && arc.label != -1) { + if (arc.bytesPerArc() != 0 && arc.label() != -1) { // Arcs are in an array final FST.BytesReader in = fst.getBytesReader(); - if (arc.arcIdx == Integer.MIN_VALUE) { + if (arc.arcIdx() == Integer.MIN_VALUE) { arc = doSeekCeilArrayWithGaps(arc, targetLabel, in); } else { arc = doSeekCeilArrayPacked(arc, targetLabel, in); @@ -155,13 +155,13 @@ abstract class FSTEnum { private FST.Arc doSeekCeilArrayWithGaps(final FST.Arc arc, final int targetLabel, final FST.BytesReader in) throws IOException { // The array is addressed directly by label and may contain holes. - in.setPosition(arc.posArcsStart); + in.setPosition(arc.posArcsStart()); in.skipBytes(1); int firstLabel = fst.readLabel(in); int arcOffset = targetLabel - firstLabel; - if (arcOffset >= arc.numArcs) { + if (arcOffset >= arc.numArcs()) { // target is beyond the last arc - arc.nextArc = arc.posArcsStart - (arc.numArcs - 1) * arc.bytesPerArc; + arc.nextArc(arc.posArcsStart() - (arc.numArcs() - 1) * arc.bytesPerArc()); fst.readNextRealArc(arc, in); assert arc.isLast(); // Dead end (target is after the last arc); @@ -183,23 +183,23 @@ abstract class FSTEnum { } else { // TODO: if firstLabel == targetLabel if (arcOffset >= 0) { - arc.nextArc = arc.posArcsStart - (arc.bytesPerArc * arcOffset); + arc.nextArc(arc.posArcsStart() - (arc.bytesPerArc() * arcOffset)); } else { - arc.nextArc = arc.posArcsStart; + arc.nextArc(arc.posArcsStart()); } fst.readNextRealArc(arc, in); - if (arc.label == targetLabel) { + if (arc.label() == targetLabel) { // found -- copy pasta from below - output[upto] = fst.outputs.add(output[upto-1], arc.output); + output[upto] = fst.outputs.add(output[upto-1], arc.output()); if (targetLabel == FST.END_LABEL) { return null; } - setCurrentLabel(arc.label); + setCurrentLabel(arc.label()); incr(); return fst.readFirstTargetArc(arc, getArc(upto), fstReader); } // not found, return the next highest - assert arc.label > targetLabel; + assert arc.label() > targetLabel; pushFirst(); return null; } @@ -208,15 +208,15 @@ abstract class FSTEnum { private FST.Arc doSeekCeilArrayPacked(final FST.Arc arc, final int targetLabel, final FST.BytesReader in) throws IOException { // The array is packed -- use binary search to find the target. - int low = arc.arcIdx; - int high = arc.numArcs-1; + int low = arc.arcIdx(); + int high = arc.numArcs() -1; int mid = 0; //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel); boolean found = false; while (low <= high) { mid = (low + high) >>> 1; - in.setPosition(arc.posArcsStart); - in.skipBytes(arc.bytesPerArc * mid + 1); + in.setPosition(arc.posArcsStart()); + in.skipBytes(arc.bytesPerArc() * mid + 1); final int midLabel = fst.readLabel(in); final int cmp = midLabel - targetLabel; //System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp); @@ -234,20 +234,20 @@ abstract class FSTEnum { // the outer else clause): if (found) { // Match - arc.arcIdx = mid-1; + arc.arcIdx(mid - 1); fst.readNextRealArc(arc, in); - assert arc.arcIdx == mid; - assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid; - output[upto] = fst.outputs.add(output[upto-1], arc.output); + assert arc.arcIdx() == mid; + assert arc.label() == targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + mid; + output[upto] = fst.outputs.add(output[upto-1], arc.output()); if (targetLabel == FST.END_LABEL) { return null; } - setCurrentLabel(arc.label); + setCurrentLabel(arc.label()); incr(); return fst.readFirstTargetArc(arc, getArc(upto), fstReader); - } else if (low == arc.numArcs) { + } else if (low == arc.numArcs()) { // Dead end - arc.arcIdx = arc.numArcs-2; + arc.arcIdx(arc.numArcs() - 2); fst.readNextRealArc(arc, in); assert arc.isLast(); // Dead end (target is after the last arc); @@ -267,9 +267,9 @@ abstract class FSTEnum { upto--; } } else { - arc.arcIdx = (low > high ? low : high)-1; + arc.arcIdx(low - 1); fst.readNextRealArc(arc, in); - assert arc.label > targetLabel; + assert arc.label() > targetLabel; pushFirst(); return null; } @@ -277,16 +277,16 @@ abstract class FSTEnum { private FST.Arc doSeekCeilList(final FST.Arc arc, final int targetLabel) throws IOException { // Arcs are not array'd -- must do linear scan: - if (arc.label == targetLabel) { + if (arc.label() == targetLabel) { // recurse - output[upto] = fst.outputs.add(output[upto-1], arc.output); + output[upto] = fst.outputs.add(output[upto-1], arc.output()); if (targetLabel == FST.END_LABEL) { return null; } - setCurrentLabel(arc.label); + setCurrentLabel(arc.label()); incr(); return fst.readFirstTargetArc(arc, getArc(upto), fstReader); - } else if (arc.label > targetLabel) { + } else if (arc.label() > targetLabel) { pushFirst(); return null; } else if (arc.isLast()) { @@ -340,10 +340,10 @@ abstract class FSTEnum { //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast() + " bba=" + arc.bytesPerArc); int targetLabel = getTargetLabel(); - if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) { + if (arc.bytesPerArc() != 0 && arc.label() != FST.END_LABEL) { // Arcs are in an array final FST.BytesReader in = fst.getBytesReader(); - if (arc.arcIdx == Integer.MIN_VALUE) { + if (arc.arcIdx() == Integer.MIN_VALUE) { arc = doSeekFloorArrayWithGaps(arc, targetLabel, in); } else { arc = doSeekFloorArrayPacked(arc, targetLabel, in); @@ -356,7 +356,7 @@ abstract class FSTEnum { private FST.Arc doSeekFloorArrayWithGaps(FST.Arc arc, int targetLabel, final FST.BytesReader in) throws IOException { // The array is addressed directly by label and may contain holes. - in.setPosition(arc.posArcsStart); + in.setPosition(arc.posArcsStart()); in.skipBytes(1); int firstLabel = fst.readLabel(in); int targetOffset = targetLabel - firstLabel; @@ -368,7 +368,7 @@ abstract class FSTEnum { // First, walk backwards until we find a first arc // that's before our target label: fst.readFirstTargetArc(getArc(upto-1), arc, fstReader); - if (arc.label < targetLabel) { + if (arc.label() < targetLabel) { // Then, scan forwards to the arc just before // the targetLabel: while(!arc.isLast() && fst.readNextArcLabel(arc, in) < targetLabel) { @@ -385,39 +385,38 @@ abstract class FSTEnum { arc = getArc(upto); } } else { - if (targetOffset >= arc.numArcs) { - arc.nextArc = arc.posArcsStart - arc.bytesPerArc * (arc.numArcs - 1); + if (targetOffset >= arc.numArcs()) { + arc.nextArc(arc.posArcsStart() - arc.bytesPerArc() * (arc.numArcs() - 1)); fst.readNextRealArc(arc, in); assert arc.isLast(); - assert arc.label < targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel; + assert arc.label() < targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel; pushLast(); return null; } - arc.nextArc = arc.posArcsStart - arc.bytesPerArc * targetOffset; + arc.nextArc(arc.posArcsStart() - arc.bytesPerArc() * targetOffset); fst.readNextRealArc(arc, in); - if (arc.label == targetLabel) { + if (arc.label() == targetLabel) { // found -- copy pasta from below - output[upto] = fst.outputs.add(output[upto-1], arc.output); + output[upto] = fst.outputs.add(output[upto-1], arc.output()); if (targetLabel == FST.END_LABEL) { return null; } - setCurrentLabel(arc.label); + setCurrentLabel(arc.label()); incr(); return fst.readFirstTargetArc(arc, getArc(upto), fstReader); } // Scan backwards to find a floor arc that is not missing - for (long arcOffset = arc.posArcsStart - targetOffset * arc.bytesPerArc; arcOffset <= arc.posArcsStart; arcOffset += arc.bytesPerArc) { + for (long arcOffset = arc.posArcsStart() - targetOffset * arc.bytesPerArc(); arcOffset <= arc.posArcsStart(); arcOffset += arc.bytesPerArc()) { // TODO: we can do better here by skipping missing arcs - arc.nextArc = arcOffset; - //System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1)); + arc.nextArc(arcOffset); fst.readNextRealArc(arc, in); - if (arc.label < targetLabel) { + if (arc.label() < targetLabel) { assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel; pushLast(); return null; } } - assert false: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel; + assert false: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel; return arc; // unreachable } } @@ -425,15 +424,15 @@ abstract class FSTEnum { private FST.Arc doSeekFloorArrayPacked(FST.Arc arc, int targetLabel, final FST.BytesReader in) throws IOException { // Arcs are fixed array -- use binary search to find the target. - int low = arc.arcIdx; - int high = arc.numArcs-1; + int low = arc.arcIdx(); + int high = arc.numArcs() -1; int mid = 0; //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel); boolean found = false; while (low <= high) { mid = (low + high) >>> 1; - in.setPosition(arc.posArcsStart); - in.skipBytes(arc.bytesPerArc*mid+1); + in.setPosition(arc.posArcsStart()); + in.skipBytes(arc.bytesPerArc() * mid + 1); final int midLabel = fst.readLabel(in); final int cmp = midLabel - targetLabel; //System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp); @@ -452,15 +451,15 @@ abstract class FSTEnum { if (found) { // Match -- recurse //System.out.println(" match! arcIdx=" + mid); - arc.arcIdx = mid-1; + arc.arcIdx(mid - 1); fst.readNextRealArc(arc, in); - assert arc.arcIdx == mid; - assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid; - output[upto] = fst.outputs.add(output[upto-1], arc.output); + assert arc.arcIdx() == mid; + assert arc.label() == targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + mid; + output[upto] = fst.outputs.add(output[upto-1], arc.output()); if (targetLabel == FST.END_LABEL) { return null; } - setCurrentLabel(arc.label); + setCurrentLabel(arc.label()); incr(); return fst.readFirstTargetArc(arc, getArc(upto), fstReader); } else if (high == -1) { @@ -474,7 +473,7 @@ abstract class FSTEnum { // First, walk backwards until we find a first arc // that's before our target label: fst.readFirstTargetArc(getArc(upto-1), arc, fstReader); - if (arc.label < targetLabel) { + if (arc.label() < targetLabel) { // Then, scan forwards to the arc just before // the targetLabel: while(!arc.isLast() && fst.readNextArcLabel(arc, in) < targetLabel) { @@ -492,27 +491,26 @@ abstract class FSTEnum { } } else { // There is a floor arc: - arc.arcIdx = (low > high ? high : low)-1; - //System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1)); + arc.arcIdx(high - 1); fst.readNextRealArc(arc, in); assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel; - assert arc.label < targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel; + assert arc.label() < targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel; pushLast(); return null; } } private FST.Arc doSeekFloorList(FST.Arc arc, int targetLabel) throws IOException { - if (arc.label == targetLabel) { + if (arc.label() == targetLabel) { // Match -- recurse - output[upto] = fst.outputs.add(output[upto-1], arc.output); + output[upto] = fst.outputs.add(output[upto-1], arc.output()); if (targetLabel == FST.END_LABEL) { return null; } - setCurrentLabel(arc.label); + setCurrentLabel(arc.label()); incr(); return fst.readFirstTargetArc(arc, getArc(upto), fstReader); - } else if (arc.label > targetLabel) { + } else if (arc.label() > targetLabel) { // TODO: if each arc could somehow read the arc just // before, we can save this re-scan. The ceil case // doesn't need this because it reads the next arc @@ -521,7 +519,7 @@ abstract class FSTEnum { // First, walk backwards until we find a first arc // that's before our target label: fst.readFirstTargetArc(getArc(upto-1), arc, fstReader); - if (arc.label < targetLabel) { + if (arc.label() < targetLabel) { // Then, scan forwards to the arc just before // the targetLabel: while(!arc.isLast() && fst.readNextArcLabel(arc, fstReader) < targetLabel) { @@ -553,7 +551,7 @@ abstract class FSTEnum { } /** Seeks to exactly target term. */ - protected boolean doSeekExact() throws IOException { + boolean doSeekExact() throws IOException { // TODO: possibly caller could/should provide common // prefix length? ie this work may be redundant if @@ -584,7 +582,7 @@ abstract class FSTEnum { return false; } // Match -- recurse: - output[upto] = fst.outputs.add(output[upto-1], nextArc.output); + output[upto] = fst.outputs.add(output[upto-1], nextArc.output()); if (targetLabel == FST.END_LABEL) { //System.out.println(" return found; upto=" + upto + " output=" + output[upto] + " nextArc=" + nextArc.isLast()); return true; @@ -621,13 +619,13 @@ abstract class FSTEnum { assert arc != null; while (true) { - output[upto] = fst.outputs.add(output[upto-1], arc.output); - if (arc.label == FST.END_LABEL) { + output[upto] = fst.outputs.add(output[upto-1], arc.output()); + if (arc.label() == FST.END_LABEL) { // Final node break; } //System.out.println(" pushFirst label=" + (char) arc.label + " upto=" + upto + " output=" + fst.outputs.outputToString(output[upto])); - setCurrentLabel(arc.label); + setCurrentLabel(arc.label()); incr(); final FST.Arc nextArc = getArc(upto); @@ -644,9 +642,9 @@ abstract class FSTEnum { assert arc != null; while (true) { - setCurrentLabel(arc.label); - output[upto] = fst.outputs.add(output[upto-1], arc.output); - if (arc.label == FST.END_LABEL) { + setCurrentLabel(arc.label()); + output[upto] = fst.outputs.add(output[upto-1], arc.output()); + if (arc.label() == FST.END_LABEL) { // Final node break; } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java index c7023c13751..0de8c97b136 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java @@ -41,15 +41,15 @@ final class NodeHash { private boolean nodesEqual(Builder.UnCompiledNode node, long address) throws IOException { fst.readFirstRealTargetArc(address, scratchArc, in); - if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) { + if (scratchArc.bytesPerArc() != 0 && node.numArcs != scratchArc.numArcs()) { return false; } for(int arcUpto=0;arcUpto arc = node.arcs[arcUpto]; - if (arc.label != scratchArc.label || - !arc.output.equals(scratchArc.output) || - ((Builder.CompiledNode) arc.target).node != scratchArc.target || - !arc.nextFinalOutput.equals(scratchArc.nextFinalOutput) || + if (arc.label != scratchArc.label() || + !arc.output.equals(scratchArc.output()) || + ((Builder.CompiledNode) arc.target).node != scratchArc.target() || + !arc.nextFinalOutput.equals(scratchArc.nextFinalOutput()) || arc.isFinal != scratchArc.isFinal()) { return false; } @@ -98,10 +98,10 @@ final class NodeHash { fst.readFirstRealTargetArc(node, scratchArc, in); while(true) { // System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal() + " pos=" + in.getPosition()); - h = PRIME * h + scratchArc.label; - h = PRIME * h + (int) (scratchArc.target^(scratchArc.target>>32)); - h = PRIME * h + scratchArc.output.hashCode(); - h = PRIME * h + scratchArc.nextFinalOutput.hashCode(); + h = PRIME * h + scratchArc.label(); + h = PRIME * h + (int) (scratchArc.target() ^(scratchArc.target() >>32)); + h = PRIME * h + scratchArc.output().hashCode(); + h = PRIME * h + scratchArc.nextFinalOutput().hashCode(); if (scratchArc.isFinal()) { h += 17; } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java index d435bcfdd77..0b2a0eea503 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java @@ -55,11 +55,11 @@ public final class Util { if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) { return null; } - output = fst.outputs.add(output, arc.output); + output = fst.outputs.add(output, arc.output()); } if (arc.isFinal()) { - return fst.outputs.add(output, arc.nextFinalOutput); + return fst.outputs.add(output, arc.nextFinalOutput()); } else { return null; } @@ -75,7 +75,7 @@ public final class Util { final BytesReader fstReader = fst.getBytesReader(); // TODO: would be nice not to alloc this on every lookup - final FST.Arc arc = fst.getFirstArc(new FST.Arc()); + final FST.Arc arc = fst.getFirstArc(new FST.Arc<>()); // Accumulate output as we go T output = fst.outputs.getNoOutput(); @@ -83,11 +83,11 @@ public final class Util { if (fst.findTargetArc(input.bytes[i+input.offset] & 0xFF, arc, arc, fstReader) == null) { return null; } - output = fst.outputs.add(output, arc.output); + output = fst.outputs.add(output, arc.output()); } if (arc.isFinal()) { - return fst.outputs.add(output, arc.nextFinalOutput); + return fst.outputs.add(output, arc.nextFinalOutput()); } else { return null; } @@ -125,7 +125,7 @@ public final class Util { */ @Deprecated public static IntsRef getByOutput(FST fst, long targetOutput, BytesReader in, Arc arc, Arc scratchArc, IntsRefBuilder result) throws IOException { - long output = arc.output; + long output = arc.output(); int upto = 0; //System.out.println("reverseLookup output=" + targetOutput); @@ -133,7 +133,7 @@ public final class Util { while(true) { //System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc); if (arc.isFinal()) { - final long finalOutput = output + arc.nextFinalOutput; + final long finalOutput = output + arc.nextFinalOutput(); //System.out.println(" isFinal finalOutput=" + finalOutput); if (finalOutput == targetOutput) { result.setLength(upto); @@ -149,19 +149,19 @@ public final class Util { //System.out.println(" targetHasArcs"); result.grow(1+upto); - fst.readFirstRealTargetArc(arc.target, arc, in); + fst.readFirstRealTargetArc(arc.target(), arc, in); - if (arc.bytesPerArc != 0 && arc.arcIdx > Integer.MIN_VALUE) { + if (arc.bytesPerArc() != 0 && arc.arcIdx() > Integer.MIN_VALUE) { int low = 0; - int high = arc.numArcs-1; + int high = arc.numArcs() -1; int mid = 0; //System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output); boolean exact = false; while (low <= high) { mid = (low + high) >>> 1; - in.setPosition(arc.posArcsStart); - in.skipBytes(arc.bytesPerArc*mid); + in.setPosition(arc.posArcsStart()); + in.skipBytes(arc.bytesPerArc() *mid); final byte flags = in.readByte(); fst.readLabel(in); final long minArcOutput; @@ -182,17 +182,19 @@ public final class Util { } } + int idx; if (high == -1) { return null; } else if (exact) { - arc.arcIdx = mid-1; + idx = mid; } else { - arc.arcIdx = low-2; + idx = low - 1; } + arc.arcIdx(idx - 1); fst.readNextRealArc(arc, in); - result.setIntAt(upto++, arc.label); - output += arc.output; + result.setIntAt(upto++, arc.label()); + output += arc.output(); } else { @@ -203,13 +205,13 @@ public final class Util { // This is the min output we'd hit if we follow // this arc: - final long minArcOutput = output + arc.output; + final long minArcOutput = output + arc.output(); if (minArcOutput == targetOutput) { // Recurse on this arc: //System.out.println(" match! break"); output = minArcOutput; - result.setIntAt(upto++, arc.label); + result.setIntAt(upto++, arc.label()); break; } else if (minArcOutput > targetOutput) { if (prevArc == null) { @@ -218,8 +220,8 @@ public final class Util { } else { // Recurse on previous arc: arc.copyFrom(prevArc); - result.setIntAt(upto++, arc.label); - output += arc.output; + result.setIntAt(upto++, arc.label()); + output += arc.output(); //System.out.println(" recurse prev label=" + (char) arc.label + " output=" + output); break; } @@ -227,7 +229,7 @@ public final class Util { // Recurse on this arc: output = minArcOutput; //System.out.println(" recurse last label=" + (char) arc.label + " output=" + output); - result.setIntAt(upto++, arc.label); + result.setIntAt(upto++, arc.label()); break; } else { // Read next arc in this node: @@ -261,12 +263,7 @@ public final class Util { // Custom int payload for consumers; the NRT suggester uses this to record if this path has already enumerated a surface form public int payload; - /** Sole constructor */ - public FSTPath(T output, FST.Arc arc, IntsRefBuilder input) { - this(output, arc, input, 0, null, -1); - } - - public FSTPath(T output, FST.Arc arc, IntsRefBuilder input, float boost, CharSequence context, int payload) { + FSTPath(T output, FST.Arc arc, IntsRefBuilder input, float boost, CharSequence context, int payload) { this.arc = new FST.Arc().copyFrom(arc); this.output = output; this.input = input; @@ -275,7 +272,7 @@ public final class Util { this.payload = payload; } - public FSTPath newPath(T output, IntsRefBuilder input) { + FSTPath newPath(T output, IntsRefBuilder input) { return new FSTPath<>(output, this.arc, input, this.boost, this.context, this.payload); } @@ -289,7 +286,8 @@ public final class Util { * tie breaks by path.input. */ private static class TieBreakByInputComparator implements Comparator> { private final Comparator comparator; - public TieBreakByInputComparator(Comparator comparator) { + + TieBreakByInputComparator(Comparator comparator) { this.comparator = comparator; } @@ -318,7 +316,7 @@ public final class Util { private final Comparator comparator; private final Comparator> pathComparator; - TreeSet> queue = null; + TreeSet> queue; /** * Creates an unbounded TopNSearcher @@ -347,7 +345,7 @@ public final class Util { assert queue != null; - T output = fst.outputs.add(path.output, path.arc.output); + T output = fst.outputs.add(path.output, path.arc.output()); if (queue.size() == maxQueueDepth) { FSTPath bottom = queue.last(); @@ -357,7 +355,7 @@ public final class Util { return; } else if (comp == 0) { // Tie break by alpha sort on the input: - path.input.append(path.arc.label); + path.input.append(path.arc.label()); final int cmp = bottom.input.get().compareTo(path.input.get()); path.input.setLength(path.input.length() - 1); @@ -370,15 +368,14 @@ public final class Util { } } // Competes - } else { - // Queue isn't full yet, so any path we hit competes: } + // else ... Queue isn't full yet, so any path we hit competes: // copy over the current input to the new input // and add the arc.label to the end IntsRefBuilder newInput = new IntsRefBuilder(); newInput.copyInts(path.input.get()); - newInput.append(path.arc.label); + newInput.append(path.arc.label()); FSTPath newPath = path.newPath(output, newInput); if (acceptPartialPath(newPath)) { @@ -408,7 +405,7 @@ public final class Util { // Bootstrap: find the min starting arc while (true) { - if (allowEmptyString || path.arc.label != FST.END_LABEL) { + if (allowEmptyString || path.arc.label() != FST.END_LABEL) { addIfCompetitive(path); } if (path.arc.isLast()) { @@ -457,7 +454,7 @@ public final class Util { continue; } - if (path.arc.label == FST.END_LABEL) { + if (path.arc.label() == FST.END_LABEL) { // Empty string! path.input.setLength(path.input.length() - 1); results.add(new Result<>(path.input.get(), path.output)); @@ -485,7 +482,7 @@ public final class Util { while(true) { // tricky: instead of comparing output == 0, we must // express it via the comparator compare(output, 0) == 0 - if (comparator.compare(NO_OUTPUT, path.arc.output) == 0) { + if (comparator.compare(NO_OUTPUT, path.arc.output()) == 0) { if (queue == null) { foundZero = true; break; @@ -514,9 +511,9 @@ public final class Util { path.arc.copyFrom(scratchArc); } - if (path.arc.label == FST.END_LABEL) { + if (path.arc.label() == FST.END_LABEL) { // Add final output: - path.output = fst.outputs.add(path.output, path.arc.output); + path.output = fst.outputs.add(path.output, path.arc.output()); if (acceptResult(path)) { results.add(new Result<>(path.input.get(), path.output)); } else { @@ -524,8 +521,8 @@ public final class Util { } break; } else { - path.input.append(path.arc.label); - path.output = fst.outputs.add(path.output, path.arc.output); + path.input.append(path.arc.label()); + path.output = fst.outputs.add(path.output, path.arc.output()); if (acceptPartialPath(path) == false) { break; } @@ -641,7 +638,7 @@ public final class Util { // This is the start arc in the automaton (from the epsilon state to the first state // with outgoing transitions. - final FST.Arc startArc = fst.getFirstArc(new FST.Arc()); + final FST.Arc startArc = fst.getFirstArc(new FST.Arc<>()); // A queue of transitions to consider for the next level. final List> thisLevelQueue = new ArrayList<>(); @@ -656,7 +653,7 @@ public final class Util { // A bitset of already seen states (target offset). final BitSet seen = new BitSet(); - seen.set((int) startArc.target); + seen.set((int) startArc.target()); // Shape for states. final String stateShape = "circle"; @@ -689,16 +686,16 @@ public final class Util { final T finalOutput; if (startArc.isFinal()) { isFinal = true; - finalOutput = startArc.nextFinalOutput == NO_OUTPUT ? null : startArc.nextFinalOutput; + finalOutput = startArc.nextFinalOutput() == NO_OUTPUT ? null : startArc.nextFinalOutput(); } else { isFinal = false; finalOutput = null; } - emitDotState(out, Long.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput)); + emitDotState(out, Long.toString(startArc.target()), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput)); } - out.write(" initial -> " + startArc.target + "\n"); + out.write(" initial -> " + startArc.target() + "\n"); int level = 0; @@ -717,9 +714,9 @@ public final class Util { // scan all target arcs //System.out.println(" readFirstTarget..."); - final long node = arc.target; + final long node = arc.target(); - fst.readFirstRealTargetArc(arc.target, arc, r); + fst.readFirstRealTargetArc(arc.target(), arc, r); //System.out.println(" firstTarget: " + arc); @@ -727,7 +724,7 @@ public final class Util { //System.out.println(" cycle arc=" + arc); // Emit the unseen state and add it to the queue for the next level. - if (arc.target >= 0 && !seen.get((int) arc.target)) { + if (arc.target() >= 0 && !seen.get((int) arc.target())) { /* boolean isFinal = false; @@ -748,35 +745,35 @@ public final class Util { } final String finalOutput; - if (arc.nextFinalOutput != null && arc.nextFinalOutput != NO_OUTPUT) { - finalOutput = fst.outputs.outputToString(arc.nextFinalOutput); + if (arc.nextFinalOutput() != null && arc.nextFinalOutput() != NO_OUTPUT) { + finalOutput = fst.outputs.outputToString(arc.nextFinalOutput()); } else { finalOutput = ""; } - emitDotState(out, Long.toString(arc.target), stateShape, stateColor, finalOutput); + emitDotState(out, Long.toString(arc.target()), stateShape, stateColor, finalOutput); // To see the node address, use this instead: //emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target)); - seen.set((int) arc.target); + seen.set((int) arc.target()); nextLevelQueue.add(new FST.Arc().copyFrom(arc)); - sameLevelStates.add((int) arc.target); + sameLevelStates.add((int) arc.target()); } String outs; - if (arc.output != NO_OUTPUT) { - outs = "/" + fst.outputs.outputToString(arc.output); + if (arc.output() != NO_OUTPUT) { + outs = "/" + fst.outputs.outputToString(arc.output()); } else { outs = ""; } - if (!FST.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) { + if (!FST.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput() != NO_OUTPUT) { // Tricky special case: sometimes, due to // pruning, the builder can [sillily] produce // an FST with an arc into the final end state // (-1) but also with a next final output; in // this case we pull that output up onto this // arc - outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput) + "]"; + outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput()) + "]"; } final String arcColor; @@ -786,8 +783,8 @@ public final class Util { arcColor = "black"; } - assert arc.label != FST.END_LABEL; - out.write(" " + node + " -> " + arc.target + " [label=\"" + printableLabel(arc.label) + outs + "\"" + (arc.isFinal() ? " style=\"bold\"" : "" ) + " color=\"" + arcColor + "\"]\n"); + assert arc.label() != FST.END_LABEL; + out.write(" " + node + " -> " + arc.target() + " [label=\"" + printableLabel(arc.label()) + outs + "\"" + (arc.isFinal() ? " style=\"bold\"" : "" ) + " color=\"" + arcColor + "\"]\n"); // Break the loop if we're on the last arc of this state. if (arc.isLast()) { @@ -935,55 +932,52 @@ public final class Util { * @param arc the arc to read into in place * @param in the fst's {@link BytesReader} */ - public static Arc readCeilArc(int label, FST fst, Arc follow, - Arc arc, BytesReader in) throws IOException { - // TODO maybe this is a useful in the FST class - we could simplify some other code like FSTEnum? + public static Arc readCeilArc(int label, FST fst, Arc follow, Arc arc, BytesReader in) throws IOException { if (label == FST.END_LABEL) { if (follow.isFinal()) { - if (follow.target <= 0) { - arc.flags = FST.BIT_LAST_ARC; + if (follow.target() <= 0) { + arc.flags((byte) FST.BIT_LAST_ARC); } else { - arc.flags = 0; + arc.flags((byte) 0); // NOTE: nextArc is a node (not an address!) in this case: - arc.nextArc = follow.target; + arc.nextArc(follow.target()); } - arc.output = follow.nextFinalOutput; - arc.label = FST.END_LABEL; + arc.output(follow.nextFinalOutput()); + arc.label(FST.END_LABEL); return arc; } else { return null; } } - if (!FST.targetHasArcs(follow)) { return null; } fst.readFirstTargetArc(follow, arc, in); - if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) { - if (arc.arcIdx == Integer.MIN_VALUE) { + if (arc.bytesPerArc() != 0 && arc.label() != FST.END_LABEL) { + if (arc.arcIdx() == Integer.MIN_VALUE) { // Arcs are in an array-with-gaps - int offset = label - arc.label; - if (offset >= arc.numArcs) { + int offset = label - arc.label(); + if (offset >= arc.numArcs()) { return null; } else if (offset < 0) { return arc; } else { - arc.nextArc = arc.posArcsStart - offset * arc.bytesPerArc; + arc.nextArc(arc.posArcsStart() - offset * arc.bytesPerArc()); return fst.readNextRealArc(arc, in); } } // Arcs are packed array -- use binary search to find // the target. - int low = arc.arcIdx; - int high = arc.numArcs - 1; + int low = arc.arcIdx(); int mid = 0; + int high = arc.numArcs() - 1; // System.out.println("do arc array low=" + low + " high=" + high + // " targetLabel=" + targetLabel); while (low <= high) { mid = (low + high) >>> 1; - in.setPosition(arc.posArcsStart); - in.skipBytes(arc.bytesPerArc * mid + 1); + in.setPosition(arc.posArcsStart()); + in.skipBytes(arc.bytesPerArc() * mid + 1); final int midLabel = fst.readLabel(in); final int cmp = midLabel - label; // System.out.println(" cycle low=" + low + " high=" + high + " mid=" + @@ -993,28 +987,27 @@ public final class Util { } else if (cmp > 0) { high = mid - 1; } else { - arc.arcIdx = mid-1; + arc.arcIdx(mid - 1); return fst.readNextRealArc(arc, in); } } - if (low == arc.numArcs) { + if (low == arc.numArcs()) { // DEAD END! return null; } - - arc.arcIdx = (low > high ? high : low); - return fst.readNextRealArc(arc, in); + arc.arcIdx(high + 1); + return fst.readNextRealArc(arc, in ); } // Linear scan - fst.readFirstRealTargetArc(follow.target, arc, in); + fst.readFirstRealTargetArc(follow.target(), arc, in); while (true) { // System.out.println(" non-bs cycle"); // TODO: we should fix this code to not have to create // object for the output of every arc we scan... only // for the matching arc, if found - if (arc.label >= label) { + if (arc.label() >= label) { // System.out.println(" found!"); return arc; } else if (arc.isLast()) { @@ -1024,4 +1017,5 @@ public final class Util { } } } + } diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java index d882fae394d..8f598f63fb0 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java @@ -1201,19 +1201,19 @@ public class TestFSTs extends LuceneTestCase { private void checkStopNodes(FST fst, PositiveIntOutputs outputs) throws Exception { final Long nothing = outputs.getNoOutput(); FST.Arc startArc = fst.getFirstArc(new FST.Arc()); - assertEquals(nothing, startArc.output); - assertEquals(nothing, startArc.nextFinalOutput); + assertEquals(nothing, startArc.output()); + assertEquals(nothing, startArc.nextFinalOutput()); FST.Arc arc = fst.readFirstTargetArc(startArc, new FST.Arc(), fst.getBytesReader()); - assertEquals('a', arc.label); - assertEquals(17, arc.nextFinalOutput.longValue()); + assertEquals('a', arc.label()); + assertEquals(17, arc.nextFinalOutput().longValue()); assertTrue(arc.isFinal()); arc = fst.readNextArc(arc, fst.getBytesReader()); - assertEquals('b', arc.label); + assertEquals('b', arc.label()); assertFalse(arc.isFinal()); - assertEquals(42, arc.output.longValue()); + assertEquals(42, arc.output().longValue()); } static final Comparator minLongComparator = new Comparator () { @@ -1404,7 +1404,7 @@ public class TestFSTs extends LuceneTestCase { if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) { fail(); } - prefixOutput += arc.output; + prefixOutput += arc.output(); } final int topN = TestUtil.nextInt(random, 1, 10); @@ -1526,7 +1526,7 @@ public class TestFSTs extends LuceneTestCase { if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) { fail(); } - prefixOutput = outputs.add(prefixOutput, arc.output); + prefixOutput = outputs.add(prefixOutput, arc.output()); } final int topN = TestUtil.nextInt(random, 1, 10); @@ -1623,10 +1623,10 @@ public class TestFSTs extends LuceneTestCase { FST.BytesReader reader = fst.getBytesReader(); arc = fst.findTargetArc((int) 'm', arc, arc, reader); assertNotNull(arc); - assertEquals(new BytesRef("m"), arc.output); + assertEquals(new BytesRef("m"), arc.output()); // NOTE: illegal: - arc.output.length = 0; + arc.output().length = 0; fst.getFirstArc(arc); try { diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFstDirect.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFstDirect.java index 5ec44416768..8763d0416d7 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFstDirect.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFstDirect.java @@ -79,9 +79,9 @@ public class TestFstDirect extends LuceneTestCase { BytesRefFSTEnum fstEnum = new BytesRefFSTEnum<>(fst); int sparseArrayArcCount = 0, directArrayArcCount = 0, listArcCount = 0; while(fstEnum.next() != null) { - if (fstEnum.arcs[fstEnum.upto].bytesPerArc == 0) { + if (fstEnum.arcs[fstEnum.upto].bytesPerArc() == 0) { listArcCount ++; - } else if (fstEnum.arcs[fstEnum.upto].arcIdx == Integer.MIN_VALUE) { + } else if (fstEnum.arcs[fstEnum.upto].arcIdx() == Integer.MIN_VALUE) { directArrayArcCount ++; } else { sparseArrayArcCount ++; diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java index 7f43b3f89f0..14b742cbe9a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java @@ -277,7 +277,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { arc = arcs[0]; assert arc.isFinal(); - output = arc.output; + output = arc.output(); targetUpto = 0; IDVersionSegmentTermsEnumFrame lastFrame = stack[0]; @@ -303,9 +303,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { //if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) { //System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF)); //} - assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); - if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) { - output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); + if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) { + output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); } if (arc.isFinal()) { lastFrame = stack[1+lastFrame.ord]; @@ -404,19 +404,19 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { // Empty string prefix must have an output (block) in the index! assert arc.isFinal(); - assert arc.output != null; + assert arc.output() != null; // if (DEBUG) { // System.out.println(" no seek state; push root frame"); // } - output = arc.output; + output = arc.output(); currentFrame = staticFrame; //term.length = 0; targetUpto = 0; - currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0); + currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0); } // if (DEBUG) { @@ -517,9 +517,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { termExists = false; } // Aggregate output as we go: - assert arc.output != null; - if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) { - output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + assert arc.output() != null; + if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) { + output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); } // if (DEBUG) { @@ -529,7 +529,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { if (arc.isFinal()) { // if (DEBUG) System.out.println(" arc is final!"); - currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto); + currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto); // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms); } } @@ -619,7 +619,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { arc = arcs[0]; assert arc.isFinal(); - output = arc.output; + output = arc.output(); targetUpto = 0; IDVersionSegmentTermsEnumFrame lastFrame = stack[0]; @@ -642,14 +642,14 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { break; } arc = arcs[1+targetUpto]; - assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); + assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); // TODO: we could save the outputs in local // byte[][] instead of making new objs ever // seek; but, often the FST doesn't have any // shared bytes (but this could change if we // reverse vLong byte order) - if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) { - output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) { + output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); } if (arc.isFinal()) { lastFrame = stack[1+lastFrame.ord]; @@ -722,19 +722,19 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { // Empty string prefix must have an output (block) in the index! assert arc.isFinal(); - assert arc.output != null; + assert arc.output() != null; //if (DEBUG) { //System.out.println(" no seek state; push root frame"); //} - output = arc.output; + output = arc.output(); currentFrame = staticFrame; //term.length = 0; targetUpto = 0; - currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0); + currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0); } //if (DEBUG) { @@ -789,9 +789,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { term.setByteAt(targetUpto, (byte) targetLabel); arc = nextArc; // Aggregate output as we go: - assert arc.output != null; - if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) { - output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); + assert arc.output() != null; + if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) { + output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output()); } //if (DEBUG) { @@ -801,7 +801,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { if (arc.isFinal()) { //if (DEBUG) System.out.println(" arc is final!"); - currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto); + currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto); //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms); } } @@ -854,8 +854,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { } if (fr.index != null) { assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc; - if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix-1)&0xFF)) { - out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF)); + if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix-1)&0xFF)) { + out.println(" broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF)); throw new RuntimeException("seek state is broken"); } Pair output = Util.get(fr.index, prefix); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index cc11fe124a3..6ca21e7ee3c 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -727,7 +727,7 @@ public class AnalyzingSuggester extends Lookup implements Accountable { if (fst.findTargetArc(END_BYTE, path.fstNode, scratchArc, bytesReader) != null) { // This node has END_BYTE arc leaving, meaning it's an // "exact" match: - searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output), false, path.input); + searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output()), false, path.input); } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java index 61f2fe12af1..97ef9a62ea1 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java @@ -107,7 +107,7 @@ public class FSTUtil { newInput.append(t.min); queue.add(new Path<>(t.dest, new FST.Arc() .copyFrom(nextArc), fst.outputs - .add(path.output, nextArc.output), newInput)); + .add(path.output, nextArc.output()), newInput)); } } else { // TODO: if this transition's TO state is accepting, and @@ -119,21 +119,21 @@ public class FSTUtil { // done in AnalyzingSuggester). FST.Arc nextArc = Util.readCeilArc(min, fst, path.fstNode, scratchArc, fstReader); - while (nextArc != null && nextArc.label <= max) { - assert nextArc.label <= max; - assert nextArc.label >= min : nextArc.label + " " + while (nextArc != null && nextArc.label() <= max) { + assert nextArc.label() <= max; + assert nextArc.label() >= min : nextArc.label() + " " + min; final IntsRefBuilder newInput = new IntsRefBuilder(); newInput.copyInts(currentInput.get()); - newInput.append(nextArc.label); + newInput.append(nextArc.label()); queue.add(new Path<>(t.dest, new FST.Arc() .copyFrom(nextArc), fst.outputs - .add(path.output, nextArc.output), newInput)); - final int label = nextArc.label; // used in assert + .add(path.output, nextArc.output()), newInput)); + final int label = nextArc.label(); // used in assert nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc, fstReader); - assert nextArc == null || label < nextArc.label : "last: " + label - + " next: " + nextArc.label; + assert nextArc == null || label < nextArc.label() : "last: " + label + + " next: " + nextArc.label(); } } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index a16db8a47e7..10e9bb40593 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -599,7 +599,7 @@ public class FreeTextSuggester extends Lookup implements Accountable { @Override protected void addIfCompetitive(Util.FSTPath path) { - if (path.arc.label != separator) { + if (path.arc.label() != separator) { //System.out.println(" keep path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc); super.addIfCompetitive(path); } else { @@ -718,7 +718,7 @@ public class FreeTextSuggester extends Lookup implements Accountable { if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) { return null; } else { - output = fst.outputs.add(output, arc.output); + output = fst.outputs.add(output, arc.output()); } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java index 6a9e494c3d4..3256ead930e 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java @@ -159,7 +159,7 @@ public final class NRTSuggester implements Accountable { // We are removing dups if (path.payload == -1) { // This path didn't yet see the complete surface form; let's see if it just did with the arc output we just added: - BytesRef arcOutput = path.arc.output.output2; + BytesRef arcOutput = path.arc.output().output2; BytesRef output = path.output.output2; for(int i=0;i= num) return true; } else { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java index d6c1a97e4fa..cb87492cd09 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java @@ -186,7 +186,7 @@ public class WFSTCompletionLookup extends Lookup implements Accountable { CharsRefBuilder spare = new CharsRefBuilder(); if (exactFirst && arc.isFinal()) { spare.copyUTF8Bytes(scratch.get()); - results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput))); + results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput()))); if (--num == 0) { return results; // that was quick } @@ -227,7 +227,7 @@ public class WFSTCompletionLookup extends Lookup implements Accountable { if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) { return null; } else { - output += arc.output.longValue(); + output += arc.output().longValue(); } } @@ -250,7 +250,7 @@ public class WFSTCompletionLookup extends Lookup implements Accountable { if (result == null || !arc.isFinal()) { return null; } else { - return Integer.valueOf(decodeWeight(result + arc.nextFinalOutput)); + return Integer.valueOf(decodeWeight(result + arc.nextFinalOutput())); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java index 438a6dcd863..590841d8e78 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java @@ -220,7 +220,7 @@ public class FSTTester { return null; } } - output = fst.outputs.add(output, arc.output); + output = fst.outputs.add(output, arc.output()); } if (prefixLength != null) { @@ -253,14 +253,14 @@ public class FSTTester { arcs.clear(); // accumulate output - output = fst.outputs.add(output, arc.output); + output = fst.outputs.add(output, arc.output()); // append label - if (arc.label == FST.END_LABEL) { + if (arc.label() == FST.END_LABEL) { break; } - in.append(arc.label); + in.append(arc.label()); } return output;