From afee9af13f2627c1e9fb3498d826d7a227c60ea7 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 26 Sep 2014 02:22:18 +0000 Subject: [PATCH] LUCENE-5969: take bitvector out back and shoot it git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1627701 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/lucene40/BitVector.java | 0 .../lucene40/Lucene40LiveDocsFormat.java | 0 .../org/apache/lucene/codecs/CodecUtil.java | 79 +++++++++++- .../lucene/codecs/lucene40/package.html | 25 ---- .../lucene/codecs/lucene50/Lucene50Codec.java | 3 +- .../lucene50/Lucene50FieldInfosFormat.java | 4 +- .../lucene50/Lucene50FieldInfosReader.java | 9 +- .../lucene50/Lucene50FieldInfosWriter.java | 3 +- .../lucene50/Lucene50LiveDocsFormat.java | 115 ++++++++++++++++++ .../org/apache/lucene/util/FixedBitSet.java | 2 +- .../lucene/index/TestIndexFileDeleter.java | 2 +- 11 files changed, 197 insertions(+), 45 deletions(-) rename lucene/{core => backward-codecs}/src/java/org/apache/lucene/codecs/lucene40/BitVector.java (100%) rename lucene/{core => backward-codecs}/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java (100%) delete mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/BitVector.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/BitVector.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java index 40e9214c015..d46beed81f3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java @@ -80,8 +80,7 @@ public final class CodecUtil { * @throws IOException If there is an I/O error writing to the underlying medium. * @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127 characters in length */ - public static void writeHeader(DataOutput out, String codec, int version) - throws IOException { + public static void writeHeader(DataOutput out, String codec, int version) throws IOException { BytesRef bytes = new BytesRef(codec); if (bytes.length != codec.length() || bytes.length >= 128) { throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]"); @@ -90,6 +89,38 @@ public final class CodecUtil { out.writeString(codec); out.writeInt(version); } + + /** + * Writes a codec header for a per-segment, which records both a string to + * identify the file, a version number, and the unique ID of the segment. + * This header can be parsed and validated with + * {@link #checkSegmentHeader(DataInput, String, int, int, String) checkSegmentHeader()}. + *

+ * CodecSegmentHeader --> CodecHeader,SegmentID + *

+ *

+ * Note that the length of a segment header depends only upon the + * name of the codec, so this length can be computed at any time + * with {@link #headerLength(String)}. + * + * @param out Output stream + * @param codec String to identify this file. It should be simple ASCII, + * less than 128 characters in length. + * @param segmentID Unique identifier for the segment + * @param version Version number + * @throws IOException If there is an I/O error writing to the underlying medium. + * @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127 characters in length + */ + // nocommit: fix javadocs, add segmentLength() + public static void writeSegmentHeader(DataOutput out, String codec, int version, String segmentID) throws IOException { + writeHeader(out, codec, version); + // nocommit: improve encoding of this ID + out.writeString(segmentID); + } /** * Computes the length of a codec header. @@ -129,9 +160,7 @@ public final class CodecUtil { * @throws IOException If there is an I/O error reading from the underlying medium. * @see #writeHeader(DataOutput, String, int) */ - public static int checkHeader(DataInput in, String codec, int minVersion, int maxVersion) - throws IOException { - + public static int checkHeader(DataInput in, String codec, int minVersion, int maxVersion) throws IOException { // Safety to guard against reading a bogus string: final int actualHeader = in.readInt(); if (actualHeader != CODEC_MAGIC) { @@ -161,6 +190,46 @@ public final class CodecUtil { return actualVersion; } + /** + * Reads and validates a header previously written with + * {@link #writeSegmentHeader(DataOutput, String, int, String)}. + *

+ * When reading a file, supply the expected codec, + * expected version range (minVersion to maxVersion), + * and segment ID. + * + * @param in Input stream, positioned at the point where the + * header was previously written. Typically this is located + * at the beginning of the file. + * @param codec The expected codec name. + * @param minVersion The minimum supported expected version number. + * @param maxVersion The maximum supported expected version number. + * @param segmentID The expected segment this file belongs to. + * @return The actual version found, when a valid header is found + * that matches codec, with an actual version + * where minVersion <= actual <= maxVersion, + * and matching segmentID + * Otherwise an exception is thrown. + * @throws CorruptIndexException If the first four bytes are not + * {@link #CODEC_MAGIC}, or if the actual codec found is + * not codec, or if the segmentID + * does not match. + * @throws IndexFormatTooOldException If the actual version is less + * than minVersion. + * @throws IndexFormatTooNewException If the actual version is greater + * than maxVersion. + * @throws IOException If there is an I/O error reading from the underlying medium. + * @see #writeSegmentHeader(DataOutput, String, int, String) + */ + public static int checkSegmentHeader(DataInput in, String codec, int minVersion, int maxVersion, String segmentID) throws IOException { + int version = checkHeader(in, codec, minVersion, maxVersion); + String id = in.readString(); + if (!id.equals(segmentID)) { + throw new CorruptIndexException("file mismatch, expected segment id=" + segmentID + ", got=" + id, in); + } + return version; + } + /** * Writes a codec footer, which records both a checksum * algorithm ID and a checksum. This footer can diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html deleted file mode 100644 index 7959cc0f464..00000000000 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - -Lucene 4.0 file format. - - diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java index fdef7d68888..f2c78a917fb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java @@ -27,7 +27,6 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat; import org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat; import org.apache.lucene.codecs.lucene49.Lucene49NormsFormat; @@ -49,7 +48,7 @@ public class Lucene50Codec extends Codec { private final TermVectorsFormat vectorsFormat = new Lucene42TermVectorsFormat(); private final FieldInfosFormat fieldInfosFormat = new Lucene50FieldInfosFormat(); private final SegmentInfoFormat segmentInfosFormat = new Lucene50SegmentInfoFormat(); - private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java index 2d9f5ec0a82..18fb60984e0 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java @@ -36,8 +36,7 @@ import org.apache.lucene.store.DataOutput; * FieldBits,DocValuesBits,DocValuesGen,Attributes> FieldsCount,Footer

*

Data types: *