From 86e7b083816f942bd2c3e0124c4008819c734003 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 20 May 2014 19:07:47 +0000 Subject: [PATCH] LUCENE-5670: add skip/FinalOutput to FST Outputs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1596369 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 +++ .../lucene/codecs/memory/FSTTermOutputs.java | 27 +++++++++++++++++++ .../lucene/util/fst/ByteSequenceOutputs.java | 8 ++++++ .../apache/lucene/util/fst/BytesStore.java | 4 +-- .../lucene/util/fst/CharSequenceOutputs.java | 8 ++++++ .../java/org/apache/lucene/util/fst/FST.java | 11 +++----- .../lucene/util/fst/ForwardBytesReader.java | 2 +- .../lucene/util/fst/IntSequenceOutputs.java | 11 ++++++++ .../org/apache/lucene/util/fst/Outputs.java | 13 +++++++++ .../apache/lucene/util/fst/PairOutputs.java | 6 +++++ .../lucene/util/fst/ReverseBytesReader.java | 2 +- .../apache/lucene/util/fst/ListOfOutputs.java | 13 +++++++++ 12 files changed, 97 insertions(+), 11 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5e24b98edec..629aed1c55c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -182,6 +182,9 @@ Optimizations to 8 (for int/float) and 16 (for long/double), for faster indexing time and smaller indices. (Robert Muir, Uwe Schindler, Mike McCandless) +* LUCENE-5670: Add skip/FinalOutput to FST Outputs. (Christian + Ziech via Mike McCandless). + Bug fixes * LUCENE-5673: MMapDirectory: Work around a "bug" in the JDK that throws diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java index 3acbde276d6..619ccddc7d4 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java @@ -299,6 +299,33 @@ class FSTTermOutputs extends Outputs { } return new TermData(longs, bytes, docFreq, totalTermFreq); } + + + @Override + public void skipOutput(DataInput in) throws IOException { + int bits = in.readByte() & 0xff; + int bit0 = bits & 1; + int bit1 = bits & 2; + int bit2 = bits & 4; + int bytesSize = (bits >>> 3); + if (bit1 > 0 && bytesSize == 0) { // determine extra length + bytesSize = in.readVInt(); + } + if (bit0 > 0) { // not all-zero case + for (int pos = 0; pos < longsSize; pos++) { + in.readVLong(); + } + } + if (bit1 > 0) { // bytes exists + in.skipBytes(bytesSize); + } + if (bit2 > 0) { // stats exist + int code = in.readVInt(); + if (hasPos && (code & 1) == 0) { + in.readVLong(); + } + } + } @Override public TermData getNoOutput() { diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java index 0f8ade45797..27427f0bda3 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java @@ -128,6 +128,14 @@ public final class ByteSequenceOutputs extends Outputs { } } + @Override + public void skipOutput(DataInput in) throws IOException { + final int len = in.readVInt(); + if (len != 0) { + in.skipBytes(len); + } + } + @Override public BytesRef getNoOutput() { return NO_OUTPUT; diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java b/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java index c7a0c898781..a64700fc8c6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java @@ -362,7 +362,7 @@ class BytesStore extends DataOutput { } @Override - public void skipBytes(int count) { + public void skipBytes(long count) { setPosition(getPosition() + count); } @@ -430,7 +430,7 @@ class BytesStore extends DataOutput { } @Override - public void skipBytes(int count) { + public void skipBytes(long count) { setPosition(getPosition() - count); } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java index c4bed386926..3435c1dbeae 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java @@ -132,6 +132,14 @@ public final class CharSequenceOutputs extends Outputs { return output; } } + + @Override + public void skipOutput(DataInput in) throws IOException { + final int len = in.readVInt(); + for(int idx=0;idx { // skip this arc: readLabel(in); if (arc.flag(BIT_ARC_HAS_OUTPUT)) { - outputs.read(in); + outputs.skipOutput(in); } if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) { - outputs.readFinalOutput(in); + outputs.skipFinalOutput(in); } if (arc.flag(BIT_STOP_NODE)) { } else if (arc.flag(BIT_TARGET_NEXT)) { @@ -1252,11 +1252,11 @@ public final class FST { readLabel(in); if (flag(flags, BIT_ARC_HAS_OUTPUT)) { - outputs.read(in); + outputs.skipOutput(in); } if (flag(flags, BIT_ARC_HAS_FINAL_OUTPUT)) { - outputs.readFinalOutput(in); + outputs.skipFinalOutput(in); } if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) { @@ -1330,9 +1330,6 @@ public final class FST { /** Returns true if this reader uses reversed bytes * under-the-hood. */ public abstract boolean reversed(); - - /** Skips bytes. */ - public abstract void skipBytes(int count); } private static class ArcAndState { diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/ForwardBytesReader.java b/lucene/core/src/java/org/apache/lucene/util/fst/ForwardBytesReader.java index 1a9417f7a9c..2365a02a4fc 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/ForwardBytesReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/ForwardBytesReader.java @@ -41,7 +41,7 @@ final class ForwardBytesReader extends FST.BytesReader { } @Override - public void skipBytes(int count) { + public void skipBytes(long count) { pos += count; } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java index 919fceaaf10..82482183097 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java @@ -131,6 +131,17 @@ public final class IntSequenceOutputs extends Outputs { return output; } } + + @Override + public void skipOutput(DataInput in) throws IOException { + final int len = in.readVInt(); + if (len == 0) { + return; + } + for(int idx=0;idx { * #write(Object, DataOutput)}. */ public abstract T read(DataInput in) throws IOException; + /** Skip the output; defaults to just calling {@link #read} + * and discarding the result. */ + public void skipOutput(DataInput in) throws IOException { + read(in); + } + /** Decode an output value previously written with {@link * #writeFinalOutput(Object, DataOutput)}. By default this * just calls {@link #read(DataInput)}. */ public T readFinalOutput(DataInput in) throws IOException { return read(in); } + + /** Skip the output previously written with {@link #writeFinalOutput}; + * defaults to just calling {@link #readFinalOutput} and discarding + * the result. */ + public void skipFinalOutput(DataInput in) throws IOException { + skipOutput(in); + } /** NOTE: this output is compared with == so you must * ensure that all methods return the single object if diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java index b9d5da6e093..ca5fe763717 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java @@ -148,6 +148,12 @@ public class PairOutputs extends Outputs> { B output2 = outputs2.read(in); return newPair(output1, output2); } + + @Override + public void skipOutput(DataInput in) throws IOException { + outputs1.skipOutput(in); + outputs2.skipOutput(in); + } @Override public Pair getNoOutput() { diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/ReverseBytesReader.java b/lucene/core/src/java/org/apache/lucene/util/fst/ReverseBytesReader.java index f50ddd8ad92..59d76f0d1c6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/ReverseBytesReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/ReverseBytesReader.java @@ -39,7 +39,7 @@ final class ReverseBytesReader extends FST.BytesReader { } @Override - public void skipBytes(int count) { + public void skipBytes(long count) { pos -= count; } diff --git a/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java b/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java index b4f41c30912..88fc5432d49 100644 --- a/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java +++ b/lucene/misc/src/java/org/apache/lucene/util/fst/ListOfOutputs.java @@ -122,6 +122,11 @@ public final class ListOfOutputs extends Outputs { public Object read(DataInput in) throws IOException { return outputs.read(in); } + + @Override + public void skipOutput(DataInput in) throws IOException { + outputs.skipOutput(in); + } @Override public Object readFinalOutput(DataInput in) throws IOException { @@ -136,6 +141,14 @@ public final class ListOfOutputs extends Outputs { return outputList; } } + + @Override + public void skipFinalOutput(DataInput in) throws IOException { + int count = in.readVInt(); + for(int i=0;i