LUCENE-5969: take bitvector out back and shoot it

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1627701 13f79535-47bb-0310-9956-ffa450edef68
2014-09-26 02:22:18 +00:00 · 2014-09-26 02:22:18 +00:00 · afee9af13f
parent 82a41114a7
commit afee9af13f
11 changed files with 197 additions and 45 deletions
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java
--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@ -80,8 +80,7 @@ public final class CodecUtil {
   * @throws IOException If there is an I/O error writing to the underlying medium.
   * @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127 characters in length
   */
-  public static void writeHeader(DataOutput out, String codec, int version)
+  public static void writeHeader(DataOutput out, String codec, int version) throws IOException {
    throws IOException {
    BytesRef bytes = new BytesRef(codec);
    if (bytes.length != codec.length() || bytes.length >= 128) {
      throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]");
@ -91,6 +90,38 @@ public final class CodecUtil {
    out.writeInt(version);
  }
  /**
   * Writes a codec header for a per-segment, which records both a string to
   * identify the file, a version number, and the unique ID of the segment. 
   * This header can be parsed and validated with 
   * {@link #checkSegmentHeader(DataInput, String, int, int, String) checkSegmentHeader()}.
   * <p>
   * CodecSegmentHeader --&gt; CodecHeader,SegmentID
   * <ul>
   *    <li>CodecHeader --&gt; {@link #writeHeader}
   *    <li>SegmentID   --&gt; {@link DataOutput#writeString String}.
   *        Unique identifier for the segment.
   * </ul>
   * <p>
   * Note that the length of a segment header depends only upon the
   * name of the codec, so this length can be computed at any time
   * with {@link #headerLength(String)}.
   * 
   * @param out Output stream
   * @param codec String to identify this file. It should be simple ASCII, 
   *              less than 128 characters in length.
   * @param segmentID Unique identifier for the segment
   * @param version Version number
   * @throws IOException If there is an I/O error writing to the underlying medium.
   * @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127 characters in length
   */
  // nocommit: fix javadocs, add segmentLength()
  public static void writeSegmentHeader(DataOutput out, String codec, int version, String segmentID) throws IOException {
    writeHeader(out, codec, version);
    // nocommit: improve encoding of this ID
    out.writeString(segmentID);
  }
  /**
   * Computes the length of a codec header.
   * 
@ -129,9 +160,7 @@ public final class CodecUtil {
   * @throws IOException If there is an I/O error reading from the underlying medium.
   * @see #writeHeader(DataOutput, String, int)
   */
-  public static int checkHeader(DataInput in, String codec, int minVersion, int maxVersion)
+  public static int checkHeader(DataInput in, String codec, int minVersion, int maxVersion) throws IOException {
    throws IOException {
    // Safety to guard against reading a bogus string:
    final int actualHeader = in.readInt();
    if (actualHeader != CODEC_MAGIC) {
@ -161,6 +190,46 @@ public final class CodecUtil {
    return actualVersion;
  }
  /**
   * Reads and validates a header previously written with 
   * {@link #writeSegmentHeader(DataOutput, String, int, String)}.
   * <p>
   * When reading a file, supply the expected <code>codec</code>,
   * expected version range (<code>minVersion to maxVersion</code>),
   * and segment ID.
   * 
   * @param in Input stream, positioned at the point where the
   *        header was previously written. Typically this is located
   *        at the beginning of the file.
   * @param codec The expected codec name.
   * @param minVersion The minimum supported expected version number.
   * @param maxVersion The maximum supported expected version number.
   * @param segmentID The expected segment this file belongs to.
   * @return The actual version found, when a valid header is found 
   *         that matches <code>codec</code>, with an actual version 
   *         where <code>minVersion <= actual <= maxVersion</code>, 
   *         and matching <code>segmentID</code>
   *         Otherwise an exception is thrown.
   * @throws CorruptIndexException If the first four bytes are not
   *         {@link #CODEC_MAGIC}, or if the actual codec found is
   *         not <code>codec</code>, or if the <code>segmentID</code>
   *         does not match.
   * @throws IndexFormatTooOldException If the actual version is less 
   *         than <code>minVersion</code>.
   * @throws IndexFormatTooNewException If the actual version is greater 
   *         than <code>maxVersion</code>.
   * @throws IOException If there is an I/O error reading from the underlying medium.
   * @see #writeSegmentHeader(DataOutput, String, int, String)
   */
  public static int checkSegmentHeader(DataInput in, String codec, int minVersion, int maxVersion, String segmentID) throws IOException {
    int version = checkHeader(in, codec, minVersion, maxVersion);
    String id = in.readString();
    if (!id.equals(segmentID)) {
      throw new CorruptIndexException("file mismatch, expected segment id=" + segmentID + ", got=" + id, in);
    }
    return version;
  }
  /**
   * Writes a codec footer, which records both a checksum
   * algorithm ID and a checksum. This footer can
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html
@ -1,25 +0,0 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html>
 <head>
   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 </head>
 <body>
 Lucene 4.0 file format.
 </body>
 </html>
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java
@ -27,7 +27,6 @@ import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
 import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat;
 import org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat;
 import org.apache.lucene.codecs.lucene49.Lucene49NormsFormat;
@ -49,7 +48,7 @@ public class Lucene50Codec extends Codec {
  private final TermVectorsFormat vectorsFormat = new Lucene42TermVectorsFormat();
  private final FieldInfosFormat fieldInfosFormat = new Lucene50FieldInfosFormat();
  private final SegmentInfoFormat segmentInfosFormat = new Lucene50SegmentInfoFormat();
-  private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat();
+  private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
  private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
    @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java
@ -36,8 +36,7 @@ import org.apache.lucene.store.DataOutput;
 * FieldBits,DocValuesBits,DocValuesGen,Attributes&gt; <sup>FieldsCount</sup>,Footer</p>
 * <p>Data types:
 * <ul>
- *   <li>Header --&gt; {@link CodecUtil#checkHeader CodecHeader}</li>
+ *   <li>Header --&gt; {@link CodecUtil#checkSegmentHeader SegmentHeader}</li>
 *   <li>SegmentID --&gt; {@link DataOutput#writeString String}</li>
 *   <li>FieldsCount --&gt; {@link DataOutput#writeVInt VInt}</li>
 *   <li>FieldName --&gt; {@link DataOutput#writeString String}</li>
 *   <li>FieldBits, DocValuesBits --&gt; {@link DataOutput#writeByte Byte}</li>
@ -49,7 +48,6 @@ import org.apache.lucene.store.DataOutput;
 * </p>
 * Field Descriptions:
 * <ul>
 *   <li>SegmentID: {@link SegmentInfo#getId()} this file belongs to</li>
 *   <li>FieldsCount: the number of fields in this file.</li>
 *   <li>FieldName: name of the field as a UTF-8 String.</li>
 *   <li>FieldNumber: the field's number. Note that unlike previous versions of
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosReader.java
@ -54,13 +54,10 @@ final class Lucene50FieldInfosReader extends FieldInfosReader {
      Throwable priorE = null;
      FieldInfo infos[] = null;
      try {
-        CodecUtil.checkHeader(input, Lucene50FieldInfosFormat.CODEC_NAME, 
+        CodecUtil.checkSegmentHeader(input, Lucene50FieldInfosFormat.CODEC_NAME, 
                                     Lucene50FieldInfosFormat.FORMAT_START, 
-                                     Lucene50FieldInfosFormat.FORMAT_CURRENT);
+                                     Lucene50FieldInfosFormat.FORMAT_CURRENT,
-        String id = input.readString();
+                                     segmentInfo.getId());
        if (!id.equals(segmentInfo.getId())) {
          throw new CorruptIndexException("file mismatch, expected segment id=" + segmentInfo.getId() + ", got=" + id, input);
        }
        final int size = input.readVInt(); //read in the size
        infos = new FieldInfo[size];
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosWriter.java
@ -47,8 +47,7 @@ final class Lucene50FieldInfosWriter extends FieldInfosWriter {
  public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
    final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene50FieldInfosFormat.EXTENSION);
    try (IndexOutput output = directory.createOutput(fileName, context)) {
-      CodecUtil.writeHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT);
+      CodecUtil.writeSegmentHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId());
      output.writeString(segmentInfo.getId());
      output.writeVInt(infos.size());
      for (FieldInfo fi : infos) {
        fi.checkConsistency();
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
@ -0,0 +1,115 @@
 package org.apache.lucene.codecs.lucene50;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.Collection;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.LiveDocsFormat;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.MutableBits;
 /** 
 * Lucene 5.0 live docs format 
 * <p>
 * <p>The .liv file is optional, and only exists when a segment contains
 * deletions.</p>
 * <p>Although per-segment, this file is maintained exterior to compound segment
 * files.</p>
 * <p>Deletions (.liv) --&gt; SegmentHeader,Bits</p>
 * <ul>
 *   <li>SegmentHeader --&gt; {@link CodecUtil#writeSegmentHeader SegmentHeader}</li>
 *   <li>Bits --&gt; &lt;{@link DataOutput#writeLong Int64}&gt; <sup>LongCount</sup></li>
 * </ul>
 */
 public class Lucene50LiveDocsFormat extends LiveDocsFormat {
  /** extension of live docs */
  private static final String EXTENSION = "liv";
  /** codec of live docs */
  private static final String CODEC_NAME = "Lucene50LiveDocs";
  /** supported version range */
  private static final int VERSION_START = 0;
  private static final int VERSION_CURRENT = VERSION_START;
  @Override
  public MutableBits newLiveDocs(int size) throws IOException {
    FixedBitSet bits = new FixedBitSet(size);
    bits.set(0, size);
    return bits;
  }
  @Override
  public MutableBits newLiveDocs(Bits existing) throws IOException {
    FixedBitSet fbs = (FixedBitSet) existing;
    return fbs.clone();
  }
  @Override
  public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
    String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, info.getDelGen());
    final int length = info.info.getDocCount();
    try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) {
      Throwable priorE = null;
      try {
        CodecUtil.checkSegmentHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT, info.info.getId());
        long data[] = new long[FixedBitSet.bits2words(length)];
        for (int i = 0; i < data.length; i++) {
          data[i] = input.readLong();
        }
        return new FixedBitSet(data, length);
      } catch (Throwable exception) {
        priorE = exception;
      } finally {
        CodecUtil.checkFooter(input, priorE);
      }
    }
    throw new AssertionError();
  }
  @Override
  public void writeLiveDocs(MutableBits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException {
    String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, info.getNextDelGen());
    long data[] = ((FixedBitSet) bits).getBits();
    try (IndexOutput output = dir.createOutput(name, context)) {
      CodecUtil.writeSegmentHeader(output, CODEC_NAME, VERSION_CURRENT, info.info.getId());
      for (int i = 0; i < data.length; i++) {
        output.writeLong(data[i]);
      }
      CodecUtil.writeFooter(output);
    }
  }
  @Override
  public void files(SegmentCommitInfo info, Collection<String> files) throws IOException {
    if (info.hasDeletions()) {
      files.add(IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, info.getDelGen()));
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
@ -31,7 +31,7 @@ import org.apache.lucene.search.DocIdSetIterator;
 * 
 * @lucene.internal
 */
-public final class FixedBitSet extends DocIdSet implements Bits {
+public final class FixedBitSet extends DocIdSet implements MutableBits {
  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
@ -96,7 +96,7 @@ public class TestIndexFileDeleter extends LuceneTestCase {
    */
    // TODO: fix this test better
-    String ext = Codec.getDefault().getName().equals("SimpleText") ? ".liv" : ".del";
+    String ext = ".liv";
    // Create a bogus separate del file for a
    // segment that already has a separate del file: