From 5280476fcbfd706c0c9b44b7e29b8e6d6ea51b07 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 19 Jan 2012 17:15:45 +0000 Subject: [PATCH 01/33] create branch for LUCENE-3661 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233476 13f79535-47bb-0310-9956-ffa450edef68 From de495a3c0daef9b55f8d3288ffe3e8f6d7500342 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 19 Jan 2012 17:16:25 +0000 Subject: [PATCH 02/33] LUCENE-3661: dump my current state git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233479 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/codecs/Codec.java | 3 ++ .../apache/lucene/codecs/LiveDocsFormat.java | 34 +++++++++++++++ .../codecs/appending/AppendingCodec.java | 8 ++++ .../lucene/codecs/lucene3x/Lucene3xCodec.java | 10 +++++ .../{util => codecs/lucene40}/BitVector.java | 6 ++- .../lucene/codecs/lucene40/Lucene40Codec.java | 8 ++++ .../lucene40/Lucene40LiveDocsFormat.java | 42 +++++++++++++++++++ .../codecs/simpletext/SimpleTextCodec.java | 9 ++++ .../index/DocumentsWriterPerThread.java | 2 +- .../index/FreqProxTermsWriterPerField.java | 2 +- .../org/apache/lucene/index/IndexWriter.java | 2 +- .../org/apache/lucene/index/MergePolicy.java | 2 +- .../apache/lucene/index/SegmentReader.java | 8 ++-- .../lucene/index/SegmentWriteState.java | 2 +- .../org/apache/lucene/util/MutableBits.java | 22 ++++++++++ .../lucene40}/TestBitVector.java | 4 +- .../apache/lucene/index/TestAddIndexes.java | 7 ++++ .../lucene/index/TestIndexReaderReopen.java | 1 - 18 files changed, 160 insertions(+), 12 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java rename lucene/src/java/org/apache/lucene/{util => codecs/lucene40}/BitVector.java (98%) create mode 100644 lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java create mode 100644 lucene/src/java/org/apache/lucene/util/MutableBits.java rename lucene/src/test/org/apache/lucene/{util => codecs/lucene40}/TestBitVector.java (98%) diff --git a/lucene/src/java/org/apache/lucene/codecs/Codec.java b/lucene/src/java/org/apache/lucene/codecs/Codec.java index 6d5514a4406..5c134465510 100644 --- a/lucene/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/Codec.java @@ -75,6 +75,9 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { /** Encodes/decodes document normalization values */ public abstract NormsFormat normsFormat(); + /** Encodes/decodes live docs */ + public abstract LiveDocsFormat liveDocsFormat(); + /** looks up a codec by name */ public static Codec forName(String name) { return loader.lookup(name); diff --git a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java new file mode 100644 index 00000000000..e3e5e467a1d --- /dev/null +++ b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java @@ -0,0 +1,34 @@ +package org.apache.lucene.codecs; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.MutableBits; + +public abstract class LiveDocsFormat { + public abstract MutableBits newLiveDocs(int size) throws IOException; + public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException; + public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException; + public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; +} diff --git a/lucene/src/java/org/apache/lucene/codecs/appending/AppendingCodec.java b/lucene/src/java/org/apache/lucene/codecs/appending/AppendingCodec.java index d5fd74101db..f3385946765 100644 --- a/lucene/src/java/org/apache/lucene/codecs/appending/AppendingCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/appending/AppendingCodec.java @@ -20,6 +20,7 @@ package org.apache.lucene.codecs.appending; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfosFormat; @@ -28,6 +29,7 @@ import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40Codec; import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat; import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat; +import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat; import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat; @@ -50,6 +52,7 @@ public class AppendingCodec extends Codec { private final TermVectorsFormat vectors = new Lucene40TermVectorsFormat(); private final DocValuesFormat docValues = new Lucene40DocValuesFormat(); private final NormsFormat norms = new Lucene40NormsFormat(); + private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat(); @Override public PostingsFormat postingsFormat() { @@ -85,4 +88,9 @@ public class AppendingCodec extends Codec { public NormsFormat normsFormat() { return norms; } + + @Override + public LiveDocsFormat liveDocsFormat() { + return liveDocs; + } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index 7d196be1f04..beaf19481ca 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -23,6 +23,7 @@ import java.util.Set; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PerDocConsumer; import org.apache.lucene.codecs.PerDocProducer; @@ -30,6 +31,7 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfosFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; @@ -57,6 +59,9 @@ public class Lucene3xCodec extends Codec { private final NormsFormat normsFormat = new Lucene3xNormsFormat(); + // TODO: this should really be a different impl + private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); + // 3.x doesn't support docvalues private final DocValuesFormat docValuesFormat = new DocValuesFormat() { @Override @@ -107,4 +112,9 @@ public class Lucene3xCodec extends Codec { public NormsFormat normsFormat() { return normsFormat; } + + @Override + public LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } } diff --git a/lucene/src/java/org/apache/lucene/util/BitVector.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java similarity index 98% rename from lucene/src/java/org/apache/lucene/util/BitVector.java rename to lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java index 39f749d3abe..7b4aff06a0e 100644 --- a/lucene/src/java/org/apache/lucene/util/BitVector.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java @@ -1,4 +1,4 @@ -package org.apache.lucene.util; +package org.apache.lucene.codecs.lucene40; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -25,6 +25,8 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.MutableBits; /** Optimized implementation of a vector of bits. This is more-or-less like * java.util.BitSet, but also includes the following: @@ -37,7 +39,7 @@ import org.apache.lucene.store.IndexOutput; * * @lucene.internal */ -public final class BitVector implements Cloneable, Bits { +public final class BitVector implements Cloneable, MutableBits { private byte[] bits; private int size; diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java index b0e9626f3ea..8b2279d9692 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java @@ -20,6 +20,7 @@ package org.apache.lucene.codecs.lucene40; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfosFormat; @@ -42,6 +43,8 @@ public class Lucene40Codec extends Codec { private final DocValuesFormat docValuesFormat = new Lucene40DocValuesFormat(); private final SegmentInfosFormat infosFormat = new Lucene40SegmentInfosFormat(); private final NormsFormat normsFormat = new Lucene40NormsFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override public PostingsFormat getPostingsFormatForField(String field) { @@ -87,6 +90,11 @@ public class Lucene40Codec extends Codec { public NormsFormat normsFormat() { return normsFormat; } + + @Override + public LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } /** Returns the postings format that should be used for writing * new segments of field. diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java new file mode 100644 index 00000000000..768d3d7d7b3 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -0,0 +1,42 @@ +package org.apache.lucene.codecs.lucene40; + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.MutableBits; + +public class Lucene40LiveDocsFormat extends LiveDocsFormat { + + @Override + public MutableBits newLiveDocs(int size) throws IOException { + BitVector bitVector = new BitVector(size); + bitVector.invertAll(); + return bitVector; + } + + @Override + public Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException { + // nocommit: compute filename here + return new BitVector(dir, info.getDelFileName(), context); + } + + @Override + public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException { + // nocommit: compute filename here + // nocommit: this api is ugly... + ((BitVector)bits).write(dir, info.getDelFileName(), context); + } + + @Override + public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + // nocommit: compute filename here + if (info.hasDeletions()) { + files.add(info.getDelFileName()); + } + } +} diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java index e6a08671755..b407595562f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java @@ -20,12 +20,14 @@ package org.apache.lucene.codecs.simpletext; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfosFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat; +import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; /** * plain text index format. @@ -43,6 +45,8 @@ public final class SimpleTextCodec extends Codec { private final DocValuesFormat docValues = new Lucene40DocValuesFormat(); // TODO: need a plain-text impl (using the above) private final NormsFormat normsFormat = new SimpleTextNormsFormat(); + // TODO: need a plain-text impl + private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat(); public SimpleTextCodec() { super("SimpleText"); @@ -82,4 +86,9 @@ public final class SimpleTextCodec extends Codec { public NormsFormat normsFormat() { return normsFormat; } + + @Override + public LiveDocsFormat liveDocsFormat() { + return liveDocs; + } } diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index f279de07c21..f4b5b9c0e9e 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -25,12 +25,12 @@ import java.text.NumberFormat; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Counter; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java index 30a8028cb73..802d11f35bc 100644 --- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java @@ -27,8 +27,8 @@ import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.PostingsConsumer; import org.apache.lucene.codecs.TermStats; import org.apache.lucene.codecs.TermsConsumer; +import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.RamUsageEstimator; diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 51d7ff868b7..6d6ec3166dc 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -34,6 +34,7 @@ import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -48,7 +49,6 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.MergeInfo; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Constants; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; diff --git a/lucene/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/src/java/org/apache/lucene/index/MergePolicy.java index 728a67233f2..58d7f05ea79 100644 --- a/lucene/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/MergePolicy.java @@ -22,9 +22,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MergeInfo; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.SetOnce.AlreadySetException; import org.apache.lucene.util.SetOnce; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 7bda25bbba8..0962e56c1f2 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -23,9 +23,9 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Bits; /** @@ -36,7 +36,7 @@ public final class SegmentReader extends IndexReader { private final SegmentInfo si; private final ReaderContext readerContext = new AtomicReaderContext(this); - private final BitVector liveDocs; + private final Bits liveDocs; // Normally set to si.docCount - si.delDocCount, unless we // were created as an NRT reader from IW, in which case IW @@ -56,7 +56,7 @@ public final class SegmentReader extends IndexReader { try { if (si.hasDeletions()) { // NOTE: the bitvector is stored using the regular directory, not cfs - liveDocs = new BitVector(directory(), si.getDelFileName(), new IOContext(IOContext.READ, true)); + liveDocs = si.getCodec().liveDocsFormat().readLiveDocs(directory(), si, new IOContext(IOContext.READ, true)); } else { assert si.getDelCount() == 0; liveDocs = null; @@ -124,7 +124,9 @@ public final class SegmentReader extends IndexReader { return liveDocs; } + // nocommit private boolean checkLiveCounts(boolean isNRT) throws IOException { + BitVector liveDocs = (BitVector) this.liveDocs; if (liveDocs != null) { if (liveDocs.size() != si.docCount) { throw new CorruptIndexException("document count mismatch: deleted docs count " + liveDocs.size() + " vs segment doc count " + si.docCount + " segment=" + si.name); diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java index 5898ca51667..358124ccb93 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -18,9 +18,9 @@ package org.apache.lucene.index; */ import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.InfoStream; /** diff --git a/lucene/src/java/org/apache/lucene/util/MutableBits.java b/lucene/src/java/org/apache/lucene/util/MutableBits.java new file mode 100644 index 00000000000..6f5cb96fe44 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/MutableBits.java @@ -0,0 +1,22 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public interface MutableBits extends Bits { + +} diff --git a/lucene/src/test/org/apache/lucene/util/TestBitVector.java b/lucene/src/test/org/apache/lucene/codecs/lucene40/TestBitVector.java similarity index 98% rename from lucene/src/test/org/apache/lucene/util/TestBitVector.java rename to lucene/src/test/org/apache/lucene/codecs/lucene40/TestBitVector.java index d45f58cb3b0..8a66bb6a83b 100644 --- a/lucene/src/test/org/apache/lucene/util/TestBitVector.java +++ b/lucene/src/test/org/apache/lucene/codecs/lucene40/TestBitVector.java @@ -1,4 +1,4 @@ -package org.apache.lucene.util; +package org.apache.lucene.codecs.lucene40; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -22,6 +22,8 @@ import java.io.IOException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; /** * TestBitVector tests the BitVector, obviously. diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index 9758b773157..ebf57e245a8 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -27,6 +27,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfosFormat; @@ -35,6 +36,7 @@ import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40Codec; import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat; import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat; +import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat; import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfosFormat; import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; @@ -1156,6 +1158,11 @@ public class TestAddIndexes extends LuceneTestCase { public NormsFormat normsFormat() { return new Lucene40NormsFormat(); } + + @Override + public LiveDocsFormat liveDocsFormat() { + return new Lucene40LiveDocsFormat(); + } } /* diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java index e54bb97e80d..9075ed8997f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java @@ -42,7 +42,6 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; From da3dbb0e0c3903ae408b7dff68454fcab7aae800 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 19 Jan 2012 17:41:14 +0000 Subject: [PATCH 03/33] move some bitvector -> mutablebits git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233498 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/DocumentsWriterPerThread.java | 9 ++++----- .../apache/lucene/index/FreqProxTermsWriterPerField.java | 6 +++--- lucene/src/java/org/apache/lucene/index/IndexWriter.java | 5 ++++- .../java/org/apache/lucene/index/SegmentWriteState.java | 4 ++-- lucene/src/java/org/apache/lucene/util/MutableBits.java | 4 +++- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index f4b5b9c0e9e..dd413da75f4 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -25,7 +25,6 @@ import java.text.NumberFormat; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; @@ -36,6 +35,7 @@ import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.MutableBits; public class DocumentsWriterPerThread { @@ -114,10 +114,10 @@ public class DocumentsWriterPerThread { static class FlushedSegment { final SegmentInfo segmentInfo; final BufferedDeletes segmentDeletes; - final BitVector liveDocs; + final MutableBits liveDocs; private FlushedSegment(SegmentInfo segmentInfo, - BufferedDeletes segmentDeletes, BitVector liveDocs) { + BufferedDeletes segmentDeletes, MutableBits liveDocs) { this.segmentInfo = segmentInfo; this.segmentDeletes = segmentDeletes; this.liveDocs = liveDocs; @@ -448,8 +448,7 @@ public class DocumentsWriterPerThread { // happens when an exception is hit processing that // doc, eg if analyzer has some problem w/ the text): if (pendingDeletes.docIDs.size() > 0) { - flushState.liveDocs = new BitVector(numDocsInRAM); - flushState.liveDocs.invertAll(); + flushState.liveDocs = codec.liveDocsFormat().newLiveDocs(numDocsInRAM); for(int delDocID : pendingDeletes.docIDs) { flushState.liveDocs.clear(delDocID); } diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java index 802d11f35bc..ad3502d409d 100644 --- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java @@ -27,7 +27,6 @@ import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.PostingsConsumer; import org.apache.lucene.codecs.TermStats; import org.apache.lucene.codecs.TermsConsumer; -import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; @@ -461,9 +460,10 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem // Mark it deleted. TODO: we could also skip // writing its postings; this would be // deterministic (just for this Term's docs). + + // nocommit: totally wrong to do this reach-around here, and this way if (state.liveDocs == null) { - state.liveDocs = new BitVector(state.numDocs); - state.liveDocs.invertAll(); + state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.numDocs); } state.liveDocs.clear(docID); } diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 6d6ec3166dc..43a686f9b38 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -2267,7 +2267,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // shortly-to-be-opened SegmentReader and let it // carry the changes; there's no reason to use // filesystem as intermediary here. - flushedSegment.liveDocs.write(directory, delFileName, context); + + SegmentInfo info = flushedSegment.segmentInfo; + Codec codec = info.getCodec(); + codec.liveDocsFormat().writeLiveDocs(flushedSegment.liveDocs, directory, info, context); success2 = true; } finally { if (!success2) { diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java index 358124ccb93..04211a5b6f0 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -18,10 +18,10 @@ package org.apache.lucene.index; */ import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.MutableBits; /** * @lucene.experimental @@ -41,7 +41,7 @@ public class SegmentWriteState { public final BufferedDeletes segDeletes; // Lazily created: - public BitVector liveDocs; + public MutableBits liveDocs; public final Codec codec; public final String segmentSuffix; diff --git a/lucene/src/java/org/apache/lucene/util/MutableBits.java b/lucene/src/java/org/apache/lucene/util/MutableBits.java index 6f5cb96fe44..6dd3efad21a 100644 --- a/lucene/src/java/org/apache/lucene/util/MutableBits.java +++ b/lucene/src/java/org/apache/lucene/util/MutableBits.java @@ -18,5 +18,7 @@ package org.apache.lucene.util; */ public interface MutableBits extends Bits { - + public void clear(int bit); + // nocommit: remove this from this interface somehow? (used by DWPT infostream at least) + public int count(); } From e2a4b862603fa874a0d0b112e756f3fab7440156 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 19 Jan 2012 18:10:51 +0000 Subject: [PATCH 04/33] LUCENE-3661: bitvector->mutablebits in indexwriter git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233507 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/lucene40/BitVector.java | 2 +- .../org/apache/lucene/index/IndexWriter.java | 29 ++++++++++--------- .../org/apache/lucene/index/MergePolicy.java | 4 +-- .../apache/lucene/index/SegmentReader.java | 7 +++-- .../org/apache/lucene/util/MutableBits.java | 7 ++++- 5 files changed, 28 insertions(+), 21 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java index 7b4aff06a0e..e26422500f9 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java @@ -68,7 +68,7 @@ public final class BitVector implements Cloneable, MutableBits { } @Override - public Object clone() { + public BitVector clone() { byte[] copyBits = new byte[bits.length]; System.arraycopy(bits, 0, copyBits, 0, bits.length); BitVector clone = new BitVector(copyBits, size); diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 43a686f9b38..36e9a123bd5 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -34,7 +34,6 @@ import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -52,6 +51,7 @@ import org.apache.lucene.store.MergeInfo; import org.apache.lucene.util.Constants; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.MutableBits; import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.TwoPhaseCommit; @@ -416,7 +416,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // docs, and it's copy-on-write (cloned whenever we need // to change it but it's been shared to an external NRT // reader). - public BitVector liveDocs; + public MutableBits liveDocs; // How many further deletions we've done against // liveDocs vs when we loaded it or last wrote it: @@ -486,7 +486,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { if (reader == null) { reader = new SegmentReader(info, config.getReaderTermsIndexDivisor(), context); if (liveDocs == null) { - liveDocs = (BitVector) reader.getLiveDocs(); + // nocommit: nuke cast + liveDocs = (MutableBits) reader.getLiveDocs(); } //System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool"); } @@ -513,7 +514,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } else { mergeReader = new SegmentReader(info, -1, context); if (liveDocs == null) { - liveDocs = (BitVector) mergeReader.getLiveDocs(); + liveDocs = (MutableBits) mergeReader.getLiveDocs(); } } } @@ -567,7 +568,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } } - public synchronized void initWritableLiveDocs() { + public synchronized void initWritableLiveDocs() throws IOException { assert Thread.holdsLock(IndexWriter.this); //System.out.println("initWritableLivedocs seg=" + info + " liveDocs=" + liveDocs + " shared=" + shared); if (shared) { @@ -577,10 +578,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // change it: if (liveDocs == null) { //System.out.println("create BV seg=" + info); - liveDocs = new BitVector(info.docCount); - liveDocs.setAll(); + liveDocs = info.getCodec().liveDocsFormat().newLiveDocs(info.docCount); } else { - liveDocs = (BitVector) liveDocs.clone(); + liveDocs = liveDocs.clone(); } shared = false; } else { @@ -588,7 +588,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } } - public synchronized BitVector getReadOnlyLiveDocs() { + // nocommit: if this is read-only live docs, why doesn't it return Bits?! + public synchronized MutableBits getReadOnlyLiveDocs() { //System.out.println("getROLiveDocs seg=" + info); assert Thread.holdsLock(IndexWriter.this); shared = true; @@ -618,7 +619,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { final String delFileName = info.getDelFileName(); boolean success = false; try { - liveDocs.write(dir, delFileName, IOContext.DEFAULT); + info.getCodec().liveDocsFormat().writeLiveDocs(liveDocs, dir, info, IOContext.DEFAULT); success = true; } finally { if (!success) { @@ -3035,8 +3036,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { SegmentInfo info = sourceSegments.get(i); minGen = Math.min(info.getBufferedDeletesGen(), minGen); final int docCount = info.docCount; - final BitVector prevLiveDocs = merge.readerLiveDocs.get(i); - final BitVector currentLiveDocs; + final MutableBits prevLiveDocs = merge.readerLiveDocs.get(i); + final MutableBits currentLiveDocs; ReadersAndLiveDocs rld = readerPool.get(info, false); // We enrolled in mergeInit: assert rld != null; @@ -3576,7 +3577,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } merge.readers = new ArrayList(); - merge.readerLiveDocs = new ArrayList(); + merge.readerLiveDocs = new ArrayList(); // This is try/finally to make sure merger's readers are // closed: @@ -3595,7 +3596,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { assert reader != null; // Carefully pull the most recent live docs: - final BitVector liveDocs; + final MutableBits liveDocs; synchronized(this) { // Must sync to ensure BufferedDeletesStream // cannot change liveDocs/pendingDeleteCount while diff --git a/lucene/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/src/java/org/apache/lucene/index/MergePolicy.java index 58d7f05ea79..40b56d56449 100644 --- a/lucene/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/MergePolicy.java @@ -22,9 +22,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import org.apache.lucene.codecs.lucene40.BitVector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MergeInfo; +import org.apache.lucene.util.MutableBits; import org.apache.lucene.util.SetOnce.AlreadySetException; import org.apache.lucene.util.SetOnce; @@ -74,7 +74,7 @@ public abstract class MergePolicy implements java.io.Closeable { int maxNumSegments = -1; // used by IndexWriter public long estimatedMergeBytes; // used by IndexWriter List readers; // used by IndexWriter - List readerLiveDocs; // used by IndexWriter + List readerLiveDocs; // used by IndexWriter public final List segments; public final int totalDocCount; boolean aborted; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 0962e56c1f2..e58280f69d3 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -23,10 +23,11 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; -import org.apache.lucene.codecs.lucene40.BitVector; +import org.apache.lucene.codecs.lucene40.BitVector; // nocommit: move asserts/checks to codec import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.MutableBits; /** * @lucene.experimental @@ -92,7 +93,7 @@ public final class SegmentReader extends IndexReader { assert si.hasDeletions(); // ... but load our own deleted docs: - liveDocs = new BitVector(si.dir, si.getDelFileName(), context); + liveDocs = si.getCodec().liveDocsFormat().readLiveDocs(si.dir, si, context); numDocs = si.docCount - si.getDelCount(); assert checkLiveCounts(false); @@ -105,7 +106,7 @@ public final class SegmentReader extends IndexReader { // SegmentReader and using the provided in-memory // liveDocs. Used by IndexWriter to provide a new NRT // reader: - SegmentReader(SegmentReader parent, BitVector liveDocs, int numDocs) throws IOException { + SegmentReader(SegmentReader parent, MutableBits liveDocs, int numDocs) throws IOException { this.si = parent.si; parent.core.incRef(); this.core = parent.core; diff --git a/lucene/src/java/org/apache/lucene/util/MutableBits.java b/lucene/src/java/org/apache/lucene/util/MutableBits.java index 6dd3efad21a..5a4c253af44 100644 --- a/lucene/src/java/org/apache/lucene/util/MutableBits.java +++ b/lucene/src/java/org/apache/lucene/util/MutableBits.java @@ -17,8 +17,13 @@ package org.apache.lucene.util; * limitations under the License. */ -public interface MutableBits extends Bits { +public interface MutableBits extends Bits,Cloneable { public void clear(int bit); // nocommit: remove this from this interface somehow? (used by DWPT infostream at least) public int count(); + + // nocommit: are these truly necessary? + public boolean getAndSet(int bit); + public boolean getAndClear(int bit); + public MutableBits clone(); } From 413823ceaf5f0473e9a11c400de35915a284b954 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 19 Jan 2012 18:16:13 +0000 Subject: [PATCH 05/33] LUCENE-3661: bitvector->mutablebits in some strings/javadocs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233508 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/index/BufferedDeletesStream.java | 2 +- lucene/src/java/org/apache/lucene/index/IndexWriter.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java index 47bd8ea5598..88e531fd0eb 100644 --- a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java +++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java @@ -147,7 +147,7 @@ class BufferedDeletesStream { }; /** Resolves the buffered deleted Term/Query/docIDs, into - * actual deleted docIDs in the liveDocs BitVector for + * actual deleted docIDs in the liveDocs MutableBits for * each SegmentReader. */ public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, List infos) throws IOException { final long t0 = System.currentTimeMillis(); diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 36e9a123bd5..c94c47862fe 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -633,7 +633,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } } assert (info.docCount - liveDocs.count()) == info.getDelCount() + pendingDeleteCount: - "delete count mismatch during commit: seg=" + info + " info.delCount=" + info.getDelCount() + " vs BitVector=" + (info.docCount-liveDocs.count() + " pendingDelCount=" + pendingDeleteCount); + "delete count mismatch during commit: seg=" + info + " info.delCount=" + info.getDelCount() + " vs MutableBits=" + (info.docCount-liveDocs.count() + " pendingDelCount=" + pendingDeleteCount); info.setDelCount(info.getDelCount() + pendingDeleteCount); pendingDeleteCount = 0; return true; @@ -2206,7 +2206,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { /** * Prepares the {@link SegmentInfo} for the new flushed segment and persists - * the deleted documents {@link BitVector}. Use + * the deleted documents {@link MutableBits}. Use * {@link #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes)} to * publish the returned {@link SegmentInfo} together with its segment private * delete packet. From cfdd1f1c2d25b99a418221292f797b5d8813984b Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 19 Jan 2012 18:52:27 +0000 Subject: [PATCH 06/33] LUCENE-3661: remove bitvector from segmentreader git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233530 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/SegmentReader.java | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index e58280f69d3..2b1577a0fc1 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -23,7 +23,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; -import org.apache.lucene.codecs.lucene40.BitVector; // nocommit: move asserts/checks to codec import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; @@ -125,24 +124,21 @@ public final class SegmentReader extends IndexReader { return liveDocs; } - // nocommit private boolean checkLiveCounts(boolean isNRT) throws IOException { - BitVector liveDocs = (BitVector) this.liveDocs; + MutableBits liveDocs = (MutableBits) this.liveDocs; if (liveDocs != null) { - if (liveDocs.size() != si.docCount) { - throw new CorruptIndexException("document count mismatch: deleted docs count " + liveDocs.size() + " vs segment doc count " + si.docCount + " segment=" + si.name); + if (liveDocs.length() != si.docCount) { + throw new CorruptIndexException("document count mismatch: deleted docs count " + liveDocs.length() + " vs segment doc count " + si.docCount + " segment=" + si.name); } - final int recomputedCount = liveDocs.getRecomputedCount(); - // Verify BitVector is self consistent: - assert liveDocs.count() == recomputedCount : "live count=" + liveDocs.count() + " vs recomputed count=" + recomputedCount; + final int count = liveDocs.count(); // Verify our docCount matches: - assert numDocs == recomputedCount : - "delete count mismatch: numDocs=" + numDocs + " vs BitVector=" + (si.docCount-recomputedCount); + assert numDocs == count : + "delete count mismatch: numDocs=" + numDocs + " vs BitVector=" + (si.docCount-count); - assert isNRT || si.docCount - si.getDelCount() == recomputedCount : - "si.docCount=" + si.docCount + "si.getDelCount()=" + si.getDelCount() + " recomputedCount=" + recomputedCount; + assert isNRT || si.docCount - si.getDelCount() == count : + "si.docCount=" + si.docCount + "si.getDelCount()=" + si.getDelCount() + " recomputedCount=" + count; } return true; From 2227b6e668bbcc893c9ffdc889fb0a4600a4ab2a Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 19 Jan 2012 18:54:14 +0000 Subject: [PATCH 07/33] LUCENE-3661: remove bitvector from string git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233531 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/SegmentReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 2b1577a0fc1..3649ba9545c 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -135,7 +135,7 @@ public final class SegmentReader extends IndexReader { // Verify our docCount matches: assert numDocs == count : - "delete count mismatch: numDocs=" + numDocs + " vs BitVector=" + (si.docCount-count); + "delete count mismatch: numDocs=" + numDocs + " vs MutableBits=" + (si.docCount-count); assert isNRT || si.docCount - si.getDelCount() == count : "si.docCount=" + si.docCount + "si.getDelCount()=" + si.getDelCount() + " recomputedCount=" + count; From c2ad31a7023951792a89813d16858c8d57ef20f9 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 19 Jan 2012 18:57:35 +0000 Subject: [PATCH 08/33] LUCENE-3661: generalize from bitvector->livedocs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233532 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/TestIndexWriterOnDiskFull.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java index 4a7eb3b7f92..2729b949106 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java @@ -20,6 +20,7 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; @@ -453,13 +454,13 @@ public class TestIndexWriterOnDiskFull extends LuceneTestCase { } StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { - if ("org.apache.lucene.index.SegmentMerger".equals(trace[i].getClassName()) && "mergeTerms".equals(trace[i].getMethodName()) && !didFail1) { + if (SegmentMerger.class.getName().equals(trace[i].getClassName()) && "mergeTerms".equals(trace[i].getMethodName()) && !didFail1) { didFail1 = true; throw new IOException("fake disk full during mergeTerms"); } - if ("org.apache.lucene.util.BitVector".equals(trace[i].getClassName()) && "write".equals(trace[i].getMethodName()) && !didFail2) { + if (LiveDocsFormat.class.getName().equals(trace[i].getClassName()) && "writeLiveDocs".equals(trace[i].getMethodName()) && !didFail2) { didFail2 = true; - throw new IOException("fake disk full while writing BitVector"); + throw new IOException("fake disk full while writing LiveDocs"); } } } From 2dee41b88e90d2f2d5351fd7ea11acd7b8ce6e2e Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 01:46:07 +0000 Subject: [PATCH 09/33] LUCENE-3661: remove SI.getDelFileName git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233709 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene40/Lucene40LiveDocsFormat.java | 27 ++++++++++--- .../org/apache/lucene/index/CheckIndex.java | 11 +++-- .../org/apache/lucene/index/IndexWriter.java | 40 +++++-------------- .../org/apache/lucene/index/SegmentInfo.java | 10 ----- 4 files changed, 36 insertions(+), 52 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java index 768d3d7d7b3..76850fc1e14 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.util.Set; import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -21,22 +22,36 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { @Override public Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException { - // nocommit: compute filename here - return new BitVector(dir, info.getDelFileName(), context); + String filename = IndexFileNames.fileNameFromGeneration(info.name, IndexFileNames.DELETES_EXTENSION, info.getDelGen()); + return new BitVector(dir, filename, context); } @Override public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException { - // nocommit: compute filename here // nocommit: this api is ugly... - ((BitVector)bits).write(dir, info.getDelFileName(), context); + String filename = IndexFileNames.fileNameFromGeneration(info.name, IndexFileNames.DELETES_EXTENSION, info.getDelGen()); + + // nocommit: is it somehow cleaner to still have IW do this try/finally/delete stuff and add abort() instead? + boolean success = false; + try { + ((BitVector)bits).write(dir, filename, context); + success = true; + } finally { + if (!success) { + try { + dir.deleteFile(filename); + } catch (Throwable t) { + // suppress this so we keep throwing the + // original exception + } + } + } } @Override public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - // nocommit: compute filename here if (info.hasDeletions()) { - files.add(info.getDelFileName()); + files.add(IndexFileNames.fileNameFromGeneration(info.name, IndexFileNames.DELETES_EXTENSION, info.getDelGen())); } } } diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index efac32a9c5c..a993fb8f27c 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -174,8 +174,8 @@ public class CheckIndex { /** True if this segment has pending deletions. */ public boolean hasDeletions; - /** Name of the current deletions file name. */ - public String deletionsFileName; + /** Current deletions generation. */ + public long deletionsGen; /** Number of deleted documents. */ public int numDeleted; @@ -526,15 +526,14 @@ public class CheckIndex { segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile(); } - final String delFileName = info.getDelFileName(); - if (delFileName == null){ + if (info.hasDeletions()) { msg(" no deletions"); segInfoStat.hasDeletions = false; } else{ - msg(" has deletions [delFileName=" + delFileName + "]"); + msg(" has deletions [delGen=" + info.getDelGen() + "]"); segInfoStat.hasDeletions = true; - segInfoStat.deletionsFileName = delFileName; + segInfoStat.deletionsGen = info.getDelGen(); } if (infoStream != null) infoStream.print(" test: open reader........."); diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index c94c47862fe..512157f063a 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -616,7 +616,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // We can write directly to the actual name (vs to a // .tmp & renaming it) because the file is not live // until segments file is written: - final String delFileName = info.getDelFileName(); boolean success = false; try { info.getCodec().liveDocsFormat().writeLiveDocs(liveDocs, dir, info, IOContext.DEFAULT); @@ -624,12 +623,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } finally { if (!success) { info.reset(sav); - try { - dir.deleteFile(delFileName); - } catch (Throwable t) { - // Suppress this so we keep throwing the - // original exception - } } } assert (info.docCount - liveDocs.count()) == info.getDelCount() + pendingDeleteCount: @@ -2257,32 +2250,19 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { assert delCount > 0; newSegment.setDelCount(delCount); newSegment.advanceDelGen(); - final String delFileName = newSegment.getDelFileName(); if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "flush: write " + delCount + " deletes to " + delFileName); + infoStream.message("IW", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.getDelGen()); } - boolean success2 = false; - try { - // TODO: in the NRT case it'd be better to hand - // this del vector over to the - // shortly-to-be-opened SegmentReader and let it - // carry the changes; there's no reason to use - // filesystem as intermediary here. + + // TODO: in the NRT case it'd be better to hand + // this del vector over to the + // shortly-to-be-opened SegmentReader and let it + // carry the changes; there's no reason to use + // filesystem as intermediary here. - SegmentInfo info = flushedSegment.segmentInfo; - Codec codec = info.getCodec(); - codec.liveDocsFormat().writeLiveDocs(flushedSegment.liveDocs, directory, info, context); - success2 = true; - } finally { - if (!success2) { - try { - directory.deleteFile(delFileName); - } catch (Throwable t) { - // suppress this so we keep throwing the - // original exception - } - } - } + SegmentInfo info = flushedSegment.segmentInfo; + Codec codec = info.getCodec(); + codec.liveDocsFormat().writeLiveDocs(flushedSegment.liveDocs, directory, info, context); } success = true; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index 1da48086864..d5b01d6d38d 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -326,16 +326,6 @@ public final class SegmentInfo implements Cloneable { return si; } - public String getDelFileName() { - if (delGen == NO) { - // In this case we know there is no deletion filename - // against this segment - return null; - } else { - return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); - } - } - /** * @deprecated separate norms are not supported in >= 4.0 */ From 6117558c44f99f6cf3c163cc3258ff78e4389512 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 02:18:59 +0000 Subject: [PATCH 10/33] LUCENE-3661: remove .del from IndexFileNames git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233726 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/codecs/LiveDocsFormat.java | 2 +- .../lucene40/Lucene40LiveDocsFormat.java | 11 +++++--- .../apache/lucene/index/IndexFileNames.java | 4 --- .../org/apache/lucene/index/IndexWriter.java | 28 +++++++++---------- .../org/apache/lucene/index/SegmentInfo.java | 7 +++-- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java index e3e5e467a1d..3bbb1142697 100644 --- a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java @@ -30,5 +30,5 @@ public abstract class LiveDocsFormat { public abstract MutableBits newLiveDocs(int size) throws IOException; public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException; public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException; - public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; + public abstract void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java index 76850fc1e14..42fcdf68a54 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -13,6 +13,9 @@ import org.apache.lucene.util.MutableBits; public class Lucene40LiveDocsFormat extends LiveDocsFormat { + /** Extension of deletes */ + static final String DELETES_EXTENSION = "del"; + @Override public MutableBits newLiveDocs(int size) throws IOException { BitVector bitVector = new BitVector(size); @@ -22,14 +25,14 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { @Override public Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException { - String filename = IndexFileNames.fileNameFromGeneration(info.name, IndexFileNames.DELETES_EXTENSION, info.getDelGen()); + String filename = IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()); return new BitVector(dir, filename, context); } @Override public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException { // nocommit: this api is ugly... - String filename = IndexFileNames.fileNameFromGeneration(info.name, IndexFileNames.DELETES_EXTENSION, info.getDelGen()); + String filename = IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()); // nocommit: is it somehow cleaner to still have IW do this try/finally/delete stuff and add abort() instead? boolean success = false; @@ -49,9 +52,9 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { if (info.hasDeletions()) { - files.add(IndexFileNames.fileNameFromGeneration(info.name, IndexFileNames.DELETES_EXTENSION, info.getDelGen())); + files.add(IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen())); } } } diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java index 080a0478d05..1bcb493b1d8 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java +++ b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java @@ -57,9 +57,6 @@ public final class IndexFileNames { /** Extension of compound file for doc store files*/ public static final String COMPOUND_FILE_STORE_EXTENSION = "cfx"; - /** Extension of deletes */ - public static final String DELETES_EXTENSION = "del"; - /** * This array contains all filename extensions used by * Lucene's index files, with one exception, namely the @@ -70,7 +67,6 @@ public final class IndexFileNames { public static final String INDEX_EXTENSIONS[] = new String[] { COMPOUND_FILE_EXTENSION, COMPOUND_FILE_ENTRIES_EXTENSION, - DELETES_EXTENSION, GEN_EXTENSION, COMPOUND_FILE_STORE_EXTENSION, }; diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 512157f063a..fada43524e8 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -30,7 +30,6 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; -import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; @@ -4071,11 +4070,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { Collection files = info.files(); CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); try { + assert assertNoSeparateFiles(files, directory, info); for (String file : files) { - assert !IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION) - : ".del file is not allowed in .cfs: " + file; - assert !isSeparateNormsFile(file) - : "separate norms file (.s[0-9]+) is not allowed in .cfs: " + file; directory.copy(cfsDir, file, file, context); checkAbort.work(directory.fileLength(file)); } @@ -4088,15 +4084,19 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { /** - * Returns true if the given filename ends with the separate norms file - * pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}. - * @deprecated only for asserting + * used only by assert: checks that filenames about to be put in cfs belong. */ - @Deprecated - private static boolean isSeparateNormsFile(String filename) { - int idx = filename.lastIndexOf('.'); - if (idx == -1) return false; - String ext = filename.substring(idx + 1); - return Pattern.matches("s[0-9]+", ext); + private static boolean assertNoSeparateFiles(Collection files, + Directory dir, SegmentInfo info) throws IOException { + // maybe this is overkill, but codec naming clashes would be bad. + Set separateFiles = new HashSet(); + Codec codec = info.getCodec(); + codec.normsFormat().separateFiles(dir, info, separateFiles); + codec.liveDocsFormat().separateFiles(dir, info, separateFiles); + + for (String file : files) { + assert !separateFiles.contains(file) : file + " should not go in CFS!"; + } + return true; } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index d5b01d6d38d..824ba9d0abc 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -495,9 +495,10 @@ public final class SegmentInfo implements Cloneable { } } - String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); - if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) { - fileSet.add(delFileName); + // because deletions are stored outside CFS, we must check deletes here + // note: before the WTF logic was: delFileName != null && (hasDeletions() || fileExists(delFileName))... + if (hasDeletions()) { + codec.liveDocsFormat().separateFiles(dir, this, fileSet); } // because separate norm files are unconditionally stored outside cfs, From 5c920cf73aacc9a2bd2eea854964522517f7dd46 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Fri, 20 Jan 2012 14:35:01 +0000 Subject: [PATCH 11/33] LUCENE-3661: cut back to Bits (from MutableBits) in some places in IW git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233931 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/IndexWriter.java | 47 ++++++++----------- .../org/apache/lucene/index/MergePolicy.java | 4 +- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index fada43524e8..078165f7b82 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -47,6 +47,7 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.MergeInfo; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.Constants; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; @@ -588,7 +589,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } // nocommit: if this is read-only live docs, why doesn't it return Bits?! - public synchronized MutableBits getReadOnlyLiveDocs() { + public synchronized Bits getReadOnlyLiveDocs() { //System.out.println("getROLiveDocs seg=" + info); assert Thread.holdsLock(IndexWriter.this); shared = true; @@ -2993,7 +2994,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * saves the resulting deletes file (incrementing the * delete generation for merge.info). If no deletes were * flushed, no new deletes file is saved. */ - synchronized private ReadersAndLiveDocs commitMergedDeletes(MergePolicy.OneMerge merge) throws IOException { + synchronized private ReadersAndLiveDocs commitMergedDeletes(MergePolicy.OneMerge merge, MergeState mergeState) throws IOException { assert testPoint("startCommitMergeDeletes"); @@ -3015,8 +3016,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { SegmentInfo info = sourceSegments.get(i); minGen = Math.min(info.getBufferedDeletesGen(), minGen); final int docCount = info.docCount; - final MutableBits prevLiveDocs = merge.readerLiveDocs.get(i); - final MutableBits currentLiveDocs; + final Bits prevLiveDocs = merge.readerLiveDocs.get(i); + final Bits currentLiveDocs; ReadersAndLiveDocs rld = readerPool.get(info, false); // We enrolled in mergeInit: assert rld != null; @@ -3035,7 +3036,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // newly flushed deletes but mapping them to the new // docIDs. - if (currentLiveDocs.count() < prevLiveDocs.count()) { + if (currentLiveDocs != prevLiveDocs) { // This means this segment received new deletes // since we started the merge, so we // must merge them: @@ -3054,8 +3055,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } } } else { - assert currentLiveDocs.count() == prevLiveDocs.count(): "currentLiveDocs.count()==" + currentLiveDocs.count() + " vs prevLiveDocs.count()=" + prevLiveDocs.count() + " info=" + info; - docUpto += currentLiveDocs.count(); + final int readerDocCount; + if (i == sourceSegments.size()-1) { + readerDocCount = mergeState.mergedDocCount - mergeState.docBase[i]; + } else { + readerDocCount = mergeState.docBase[i+1] - mergeState.docBase[i]; + } + docUpto += readerDocCount; } } else if (currentLiveDocs != null) { // This segment had no deletes before but now it @@ -3096,7 +3102,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { return mergedDeletes; } - synchronized private boolean commitMerge(MergePolicy.OneMerge merge) throws IOException { + synchronized private boolean commitMerge(MergePolicy.OneMerge merge, MergeState mergeState) throws IOException { assert testPoint("startCommitMerge"); @@ -3123,7 +3129,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { return false; } - final ReadersAndLiveDocs mergedDeletes = commitMergedDeletes(merge); + final ReadersAndLiveDocs mergedDeletes = commitMergedDeletes(merge, mergeState); assert mergedDeletes == null || mergedDeletes.pendingDeleteCount != 0; @@ -3556,13 +3562,12 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } merge.readers = new ArrayList(); - merge.readerLiveDocs = new ArrayList(); + merge.readerLiveDocs = new ArrayList(); // This is try/finally to make sure merger's readers are // closed: boolean success = false; try { - int totDocCount = 0; int segUpto = 0; while(segUpto < sourceSegments.size()) { @@ -3575,7 +3580,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { assert reader != null; // Carefully pull the most recent live docs: - final MutableBits liveDocs; + final Bits liveDocs; synchronized(this) { // Must sync to ensure BufferedDeletesStream // cannot change liveDocs/pendingDeleteCount while @@ -3596,19 +3601,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { merge.readerLiveDocs.add(liveDocs); merge.readers.add(reader); - if (liveDocs == null || liveDocs.count() > 0) { - merger.add(reader, liveDocs); - totDocCount += liveDocs == null ? reader.maxDoc() : liveDocs.count(); - } else { - //System.out.println(" skip seg: fully deleted"); - } + merger.add(reader, liveDocs); segUpto++; } - if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "merge: total " + totDocCount + " docs"); - } - merge.checkAborted(directory); // This is where all the work happens: @@ -3619,11 +3615,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { merge.info.setCodec(codec); if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "merge codec=" + codec); + infoStream.message("IW", "merge codec=" + codec + " docCount=" + mergedDocCount); } - assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount; - // Very important to do this before opening the reader // because codec must know if prox was written for // this segment: @@ -3709,7 +3703,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // Force READ context because we merge deletes onto // this reader: - if (!commitMerge(merge)) { + if (!commitMerge(merge, mergeState)) { // commitMerge will return false if this merge was aborted return 0; } @@ -3767,7 +3761,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { /** @lucene.internal */ public synchronized String segString(SegmentInfo info) throws IOException { - StringBuilder buffer = new StringBuilder(); return info.toString(info.dir, numDeletedDocs(info) - info.getDelCount()); } diff --git a/lucene/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/src/java/org/apache/lucene/index/MergePolicy.java index 40b56d56449..dea748cf6e6 100644 --- a/lucene/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/MergePolicy.java @@ -24,7 +24,7 @@ import java.util.Map; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MergeInfo; -import org.apache.lucene.util.MutableBits; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.SetOnce.AlreadySetException; import org.apache.lucene.util.SetOnce; @@ -74,7 +74,7 @@ public abstract class MergePolicy implements java.io.Closeable { int maxNumSegments = -1; // used by IndexWriter public long estimatedMergeBytes; // used by IndexWriter List readers; // used by IndexWriter - List readerLiveDocs; // used by IndexWriter + List readerLiveDocs; // used by IndexWriter public final List segments; public final int totalDocCount; boolean aborted; From f6b18248a9f556f90f2400db109f44a53febd9b5 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 14:47:42 +0000 Subject: [PATCH 12/33] LUCENE-3661: nuke obselete nocommit, thanks Mike git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233935 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/IndexWriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 078165f7b82..2d0837b2863 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -588,7 +588,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } } - // nocommit: if this is read-only live docs, why doesn't it return Bits?! public synchronized Bits getReadOnlyLiveDocs() { //System.out.println("getROLiveDocs seg=" + info); assert Thread.holdsLock(IndexWriter.this); From 52ab0d610ed8b396ac6f4c2657f1d1bc7675a9f3 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 15:14:14 +0000 Subject: [PATCH 13/33] LUCENE-3661: clean this up to use IOUtils method git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233945 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/lucene40/Lucene40LiveDocsFormat.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java index 42fcdf68a54..7e5f6f09a3e 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -9,6 +9,7 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.MutableBits; public class Lucene40LiveDocsFormat extends LiveDocsFormat { @@ -34,19 +35,14 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { // nocommit: this api is ugly... String filename = IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()); - // nocommit: is it somehow cleaner to still have IW do this try/finally/delete stuff and add abort() instead? + // nocommit: test if we really need this boolean success = false; try { ((BitVector)bits).write(dir, filename, context); success = true; } finally { if (!success) { - try { - dir.deleteFile(filename); - } catch (Throwable t) { - // suppress this so we keep throwing the - // original exception - } + IOUtils.deleteFilesIgnoringExceptions(dir, filename); } } } From af670f2025835e8bc8d8b37084f573acd4629880 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 16:00:35 +0000 Subject: [PATCH 14/33] LUCENE-3661: simpletext deletes git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233975 13f79535-47bb-0310-9956-ffa450edef68 --- .../codecs/simpletext/SimpleTextCodec.java | 4 +- .../simpletext/SimpleTextLiveDocsFormat.java | 197 ++++++++++++++++++ 2 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java index b407595562f..39b53e4f2ef 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java @@ -27,7 +27,6 @@ import org.apache.lucene.codecs.SegmentInfosFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat; -import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; /** * plain text index format. @@ -45,8 +44,7 @@ public final class SimpleTextCodec extends Codec { private final DocValuesFormat docValues = new Lucene40DocValuesFormat(); // TODO: need a plain-text impl (using the above) private final NormsFormat normsFormat = new SimpleTextNormsFormat(); - // TODO: need a plain-text impl - private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat(); + private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat(); public SimpleTextCodec() { super("SimpleText"); diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java new file mode 100644 index 00000000000..46981c47fd6 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -0,0 +1,197 @@ +package org.apache.lucene.codecs.simpletext; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.BitSet; +import java.util.Set; + +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.MutableBits; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.UnicodeUtil; + +/** + * reads/writes plaintext live docs + *

+ * FOR RECREATIONAL USE ONLY + * @lucene.experimental + */ +public class SimpleTextLiveDocsFormat extends LiveDocsFormat { + + static final String LIVEDOCS_EXTENSION = "liv"; + + final static BytesRef SIZE = new BytesRef("size "); + final static BytesRef DOC = new BytesRef(" doc "); + final static BytesRef END = new BytesRef("END"); + + @Override + public MutableBits newLiveDocs(int size) throws IOException { + return new SimpleTextBits(size); + } + + @Override + public Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException { + assert info.hasDeletions(); + BytesRef scratch = new BytesRef(); + CharsRef scratchUTF16 = new CharsRef(); + + String fileName = IndexFileNames.fileNameFromGeneration(info.name, LIVEDOCS_EXTENSION, info.getDelGen()); + IndexInput in = null; + boolean success = false; + try { + in = dir.openInput(fileName, context); + + SimpleTextUtil.readLine(in, scratch); + assert StringHelper.startsWith(scratch, SIZE); + int size = parseIntAt(scratch, SIZE.length, scratchUTF16); + + BitSet bits = new BitSet(size); + + SimpleTextUtil.readLine(in, scratch); + while (!scratch.equals(END)) { + assert StringHelper.startsWith(scratch, DOC); + int docid = parseIntAt(scratch, DOC.length, scratchUTF16); + bits.set(docid); + SimpleTextUtil.readLine(in, scratch); + } + + success = true; + return new SimpleTextBits(bits, size); + } finally { + if (success) { + IOUtils.close(in); + } else { + IOUtils.closeWhileHandlingException(in); + } + } + } + + private int parseIntAt(BytesRef bytes, int offset, CharsRef scratch) throws IOException { + UnicodeUtil.UTF8toUTF16(bytes.bytes, bytes.offset+offset, bytes.length-offset, scratch); + return ArrayUtil.parseInt(scratch.chars, 0, scratch.length); + } + + @Override + public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException { + BitSet set = ((SimpleTextBits) bits).bits; + int size = bits.length(); + BytesRef scratch = new BytesRef(); + + String fileName = IndexFileNames.fileNameFromGeneration(info.name, LIVEDOCS_EXTENSION, info.getDelGen()); + IndexOutput out = null; + boolean success = false; + try { + out = dir.createOutput(fileName, context); + SimpleTextUtil.write(out, SIZE); + SimpleTextUtil.write(out, Integer.toString(size), scratch); + SimpleTextUtil.writeNewline(out); + + for (int i = set.nextSetBit(0); i >= 0; i=set.nextSetBit(i + 1)) { + SimpleTextUtil.write(out, DOC); + SimpleTextUtil.write(out, Integer.toString(i), scratch); + SimpleTextUtil.writeNewline(out); + } + + SimpleTextUtil.write(out, END); + SimpleTextUtil.writeNewline(out); + success = true; + } finally { + if (success) { + IOUtils.close(out); + } else { + IOUtils.closeWhileHandlingException(out); + IOUtils.deleteFilesIgnoringExceptions(dir, fileName); + } + } + } + + @Override + public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { + if (info.hasDeletions()) { + files.add(IndexFileNames.fileNameFromGeneration(info.name, LIVEDOCS_EXTENSION, info.getDelGen())); + } + } + + static class SimpleTextBits implements MutableBits { + final BitSet bits; + final int size; + + SimpleTextBits(int size) { + this.size = size; + bits = new BitSet(size); + bits.set(0, size); + } + + SimpleTextBits(BitSet bits, int size) { + this.bits = bits; + this.size = size; + } + + @Override + public boolean get(int index) { + return bits.get(index); + } + + @Override + public int length() { + return size; + } + + @Override + public void clear(int bit) { + bits.clear(bit); + } + + @Override + public int count() { + return bits.cardinality(); + } + + @Override + public boolean getAndSet(int bit) { + boolean v = bits.get(bit); + bits.set(bit); + return v; + } + + @Override + public boolean getAndClear(int bit) { + boolean v = bits.get(bit); + bits.clear(bit); + return v; + } + + @Override + public SimpleTextBits clone() { + BitSet clonedBits = (BitSet) bits.clone(); + return new SimpleTextBits(clonedBits, size); + } + } +} From 0a09551f34d0e2952dd46c4287fb40981c139df5 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 16:15:29 +0000 Subject: [PATCH 15/33] LUCENE-3661: hack test for simpletext case git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233981 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/TestBackwardsCompatibility.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 6def7697165..9929a07172b 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -497,17 +497,25 @@ public class TestBackwardsCompatibility extends LuceneTestCase { writer.deleteDocuments(searchTerm); writer.close(); - // Now verify file names: + // Now verify file names... TODO: fix this test better, we could populate from + // separateFiles() or something. String[] expected = new String[] {"_0.cfs", "_0.cfe", "_0_1.del", "segments_2", "segments.gen"}; + + String[] expectedSimpleText = new String[] {"_0.cfs", "_0.cfe", + "_0_1.liv", + "segments_2", + "segments.gen"}; String[] actual = dir.listAll(); Arrays.sort(expected); + Arrays.sort(expectedSimpleText); Arrays.sort(actual); - if (!Arrays.equals(expected, actual)) { - fail("incorrect filenames in index: expected:\n " + asString(expected) + "\n actual:\n " + asString(actual)); + if (!Arrays.equals(expected, actual) && !Arrays.equals(expectedSimpleText, actual)) { + fail("incorrect filenames in index: expected:\n " + asString(expected) + + "\n or " + asString(expectedSimpleText) + "\n actual:\n " + asString(actual)); } dir.close(); } finally { From d95b8943c6af8964a425998f872fccabfc79991f Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 16:40:02 +0000 Subject: [PATCH 16/33] LUCENE-3661: hack test for simpletext git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1233994 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/TestIndexFileDeleter.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java index 74b09631484..61f5b61df21 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java @@ -87,17 +87,20 @@ public class TestIndexFileDeleter extends LuceneTestCase { } */ + // TODO: fix this test better + String ext = Codec.getDefault().getName().equals("SimpleText") ? ".liv" : ".del"; + // Create a bogus separate del file for a // segment that already has a separate del file: - copyFile(dir, "_0_1.del", "_0_2.del"); + copyFile(dir, "_0_1" + ext, "_0_2" + ext); // Create a bogus separate del file for a // segment that does not yet have a separate del file: - copyFile(dir, "_0_1.del", "_1_1.del"); + copyFile(dir, "_0_1" + ext, "_1_1" + ext); // Create a bogus separate del file for a // non-existent segment: - copyFile(dir, "_0_1.del", "_188_1.del"); + copyFile(dir, "_0_1" + ext, "_188_1" + ext); // Create a bogus segment file: copyFile(dir, "_0.cfs", "_188.cfs"); From aadd4725cc656f5343863440e4904064b50926a6 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 16:55:47 +0000 Subject: [PATCH 17/33] LUCENE-3661: get tests passing with simpletext git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1234002 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/TestIndexWriterDelete.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java index 8a4c5313890..526094ee401 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java @@ -961,7 +961,8 @@ public class TestIndexWriterDelete extends LuceneTestCase { } w.updateDocument(delTerm, doc); // Eventually segment 0 should get a del docs: - if (dir.fileExists("_0_1.del")) { + // TODO: fix this test + if (dir.fileExists("_0_1.del") || dir.fileExists("_0_1.liv") ) { if (VERBOSE) { System.out.println("TEST: deletes created @ count=" + count); } @@ -1006,7 +1007,8 @@ public class TestIndexWriterDelete extends LuceneTestCase { } w.updateDocument(delTerm, doc); // Eventually segment 0 should get a del docs: - if (dir.fileExists("_0_1.del")) { + // TODO: fix this test + if (dir.fileExists("_0_1.del") || dir.fileExists("_0_1.liv")) { break; } count++; @@ -1052,7 +1054,8 @@ public class TestIndexWriterDelete extends LuceneTestCase { doc.add(newField("body", sb.toString(), TextField.TYPE_UNSTORED)); w.updateDocument(new Term("id", ""+id), doc); docsInSegment.incrementAndGet(); - if (dir.fileExists("_0_1.del")) { + // TODO: fix this test + if (dir.fileExists("_0_1.del") || dir.fileExists("_0_1.liv")) { if (VERBOSE) { System.out.println("TEST: deletes created @ id=" + id); } From 7a3542f16a3073faa5f6d0b7f3f38fe400aa27c5 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 18:10:22 +0000 Subject: [PATCH 18/33] LUCENE-3661: clean up how we handle the case of files outside of CFS git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1234051 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/codecs/Codec.java | 5 +++++ .../apache/lucene/codecs/lucene40/BitVector.java | 4 +++- .../org/apache/lucene/index/IndexWriter.java | 4 +--- .../org/apache/lucene/index/SegmentInfo.java | 16 +++------------- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/Codec.java b/lucene/src/java/org/apache/lucene/codecs/Codec.java index 5c134465510..3206a86a401 100644 --- a/lucene/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/Codec.java @@ -54,6 +54,11 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { normsFormat().files(dir, info, files); } + public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { + liveDocsFormat().separateFiles(dir, info, files); + normsFormat().separateFiles(dir, info, files); + } + /** Encodes/decodes postings */ public abstract PostingsFormat postingsFormat(); diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java index e26422500f9..b2fa3ec26a5 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/BitVector.java @@ -39,7 +39,9 @@ import org.apache.lucene.util.MutableBits; * * @lucene.internal */ -public final class BitVector implements Cloneable, MutableBits { +// pkg-private: if this thing is generally useful then it can go back in .util, +// but the serialization must be here underneath the codec. +final class BitVector implements Cloneable, MutableBits { private byte[] bits; private int size; diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 2d0837b2863..2479fb04c0f 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -4082,9 +4082,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { Directory dir, SegmentInfo info) throws IOException { // maybe this is overkill, but codec naming clashes would be bad. Set separateFiles = new HashSet(); - Codec codec = info.getCodec(); - codec.normsFormat().separateFiles(dir, info, separateFiles); - codec.liveDocsFormat().separateFiles(dir, info, separateFiles); + info.getCodec().separateFiles(dir, info, separateFiles); for (String file : files) { assert !separateFiles.contains(file) : file + " should not go in CFS!"; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index 824ba9d0abc..d3b3caeddff 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -484,6 +484,9 @@ public final class SegmentInfo implements Cloneable { } else { codec.files(dir, this, fileSet); } + + // regardless of compound file setting: these files are always in the directory + codec.separateFiles(dir, this, fileSet); if (docStoreOffset != -1) { // We are sharing doc stores (stored fields, term @@ -495,19 +498,6 @@ public final class SegmentInfo implements Cloneable { } } - // because deletions are stored outside CFS, we must check deletes here - // note: before the WTF logic was: delFileName != null && (hasDeletions() || fileExists(delFileName))... - if (hasDeletions()) { - codec.liveDocsFormat().separateFiles(dir, this, fileSet); - } - - // because separate norm files are unconditionally stored outside cfs, - // we must explicitly ask for their filenames if we might have separate norms: - // remove this when 3.x indexes are no longer supported - if (normGen != null) { - codec.normsFormat().separateFiles(dir, this, fileSet); - } - files = new ArrayList(fileSet); return files; From ac9bb797e11edefbf0e4d36c8a5d5b70e901e6e0 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 20 Jan 2012 18:36:14 +0000 Subject: [PATCH 19/33] LUCENE-3661: remove unnecessary method git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1234056 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java | 7 ------- lucene/src/java/org/apache/lucene/util/MutableBits.java | 1 - 2 files changed, 8 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index 46981c47fd6..0acf4599403 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -174,13 +174,6 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { return bits.cardinality(); } - @Override - public boolean getAndSet(int bit) { - boolean v = bits.get(bit); - bits.set(bit); - return v; - } - @Override public boolean getAndClear(int bit) { boolean v = bits.get(bit); diff --git a/lucene/src/java/org/apache/lucene/util/MutableBits.java b/lucene/src/java/org/apache/lucene/util/MutableBits.java index 5a4c253af44..e32e639e802 100644 --- a/lucene/src/java/org/apache/lucene/util/MutableBits.java +++ b/lucene/src/java/org/apache/lucene/util/MutableBits.java @@ -23,7 +23,6 @@ public interface MutableBits extends Bits,Cloneable { public int count(); // nocommit: are these truly necessary? - public boolean getAndSet(int bit); public boolean getAndClear(int bit); public MutableBits clone(); } From f6c6f5dd4cfb4bda27a666b2a478ff9507c238bc Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 21 Jan 2012 16:18:11 +0000 Subject: [PATCH 20/33] LUCENE-3661: remove duplciate delete-file-handling git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1234357 13f79535-47bb-0310-9956-ffa450edef68 --- .../codecs/lucene40/Lucene40LiveDocsFormat.java | 13 +------------ .../codecs/simpletext/SimpleTextLiveDocsFormat.java | 1 - 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java index 7e5f6f09a3e..57ba73fac06 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -9,7 +9,6 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.MutableBits; public class Lucene40LiveDocsFormat extends LiveDocsFormat { @@ -34,17 +33,7 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException { // nocommit: this api is ugly... String filename = IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()); - - // nocommit: test if we really need this - boolean success = false; - try { - ((BitVector)bits).write(dir, filename, context); - success = true; - } finally { - if (!success) { - IOUtils.deleteFilesIgnoringExceptions(dir, filename); - } - } + ((BitVector)bits).write(dir, filename, context); } @Override diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index 0acf4599403..52aa471c5bf 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -127,7 +127,6 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { IOUtils.close(out); } else { IOUtils.closeWhileHandlingException(out); - IOUtils.deleteFilesIgnoringExceptions(dir, fileName); } } } From 7c04dc5ed17ef2566c5707185fd1b95fc085d4ea Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 26 Jan 2012 00:04:06 +0000 Subject: [PATCH 21/33] LUCENE-3661: remove unnecessary method git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1235991 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java | 7 ------- lucene/src/java/org/apache/lucene/index/IndexWriter.java | 3 ++- lucene/src/java/org/apache/lucene/util/MutableBits.java | 2 -- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index 52aa471c5bf..7f4b28b9c55 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -173,13 +173,6 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { return bits.cardinality(); } - @Override - public boolean getAndClear(int bit) { - boolean v = bits.get(bit); - bits.clear(bit); - return v; - } - @Override public SimpleTextBits clone() { BitSet clonedBits = (BitSet) bits.clone(); diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 2479fb04c0f..37d34582d1f 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -527,8 +527,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { public synchronized boolean delete(int docID) { assert liveDocs != null; assert docID >= 0 && docID < liveDocs.length(); - final boolean didDelete = liveDocs.getAndClear(docID); + final boolean didDelete = liveDocs.get(docID); if (didDelete) { + liveDocs.clear(docID); pendingDeleteCount++; //System.out.println(" new del seg=" + info + " docID=" + docID + " pendingDelCount=" + pendingDeleteCount + " totDelCount=" + (info.docCount-liveDocs.count())); } diff --git a/lucene/src/java/org/apache/lucene/util/MutableBits.java b/lucene/src/java/org/apache/lucene/util/MutableBits.java index e32e639e802..80bac229787 100644 --- a/lucene/src/java/org/apache/lucene/util/MutableBits.java +++ b/lucene/src/java/org/apache/lucene/util/MutableBits.java @@ -22,7 +22,5 @@ public interface MutableBits extends Bits,Cloneable { // nocommit: remove this from this interface somehow? (used by DWPT infostream at least) public int count(); - // nocommit: are these truly necessary? - public boolean getAndClear(int bit); public MutableBits clone(); } From 566c1359347f8980b8a2c18895f0bcf8953fb442 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 27 Jan 2012 18:14:00 +0000 Subject: [PATCH 22/33] LUCENE-3661: track count instead of relying on codec to count() in its livedocs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1236793 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/DocumentsWriterPerThread.java | 7 +++++-- .../apache/lucene/index/FreqProxTermsWriterPerField.java | 5 ++++- lucene/src/java/org/apache/lucene/index/IndexWriter.java | 2 +- .../java/org/apache/lucene/index/SegmentWriteState.java | 2 ++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index dd413da75f4..1949410459b 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -115,12 +115,14 @@ public class DocumentsWriterPerThread { final SegmentInfo segmentInfo; final BufferedDeletes segmentDeletes; final MutableBits liveDocs; + final int delCount; private FlushedSegment(SegmentInfo segmentInfo, - BufferedDeletes segmentDeletes, MutableBits liveDocs) { + BufferedDeletes segmentDeletes, MutableBits liveDocs, int delCount) { this.segmentInfo = segmentInfo; this.segmentDeletes = segmentDeletes; this.liveDocs = liveDocs; + this.delCount = delCount; } } @@ -452,6 +454,7 @@ public class DocumentsWriterPerThread { for(int delDocID : pendingDeletes.docIDs) { flushState.liveDocs.clear(delDocID); } + flushState.delCountOnFlush = pendingDeletes.docIDs.size(); pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID); pendingDeletes.docIDs.clear(); } @@ -503,7 +506,7 @@ public class DocumentsWriterPerThread { doAfterFlush(); success = true; - return new FlushedSegment(newSegment, segmentDeletes, flushState.liveDocs); + return new FlushedSegment(newSegment, segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush); } finally { if (!success) { if (segment != null) { diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java index ad3502d409d..0642486ddea 100644 --- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java @@ -465,7 +465,10 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem if (state.liveDocs == null) { state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.numDocs); } - state.liveDocs.clear(docID); + if (state.liveDocs.get(docID)) { + state.delCountOnFlush++; + state.liveDocs.clear(docID); + } } totTF += termDocFreq; diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 37d34582d1f..42e9303a0f5 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -2246,7 +2246,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // Must write deleted docs after the CFS so we don't // slurp the del file into CFS: if (flushedSegment.liveDocs != null) { - final int delCount = flushedSegment.segmentInfo.docCount - flushedSegment.liveDocs.count(); + final int delCount = flushedSegment.delCount; assert delCount > 0; newSegment.setDelCount(delCount); newSegment.advanceDelGen(); diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java index 04211a5b6f0..db5a086569e 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -32,6 +32,7 @@ public class SegmentWriteState { public final String segmentName; public final FieldInfos fieldInfos; public final int numDocs; + public int delCountOnFlush; // Deletes to apply while we are flushing the segment. A // Term is enrolled in here if it was deleted at one @@ -83,5 +84,6 @@ public class SegmentWriteState { codec = state.codec; this.segmentSuffix = segmentSuffix; segDeletes = state.segDeletes; + delCountOnFlush = state.delCountOnFlush; } } From e1a808d489ffc14c5806e3fc848969edbbfabb45 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sat, 28 Jan 2012 10:43:52 +0000 Subject: [PATCH 23/33] drop 100% deleted segments before merging git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237038 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/MappingMultiDocsEnum.java | 1 + .../lucene/codecs/lucene40/values/Floats.java | 2 +- .../lucene/index/BufferedDeletesStream.java | 2 +- .../org/apache/lucene/index/IndexWriter.java | 19 ++-- .../org/apache/lucene/index/MergeState.java | 10 +- .../apache/lucene/index/SegmentMerger.java | 100 +++++++++++++----- 6 files changed, 91 insertions(+), 43 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java b/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java index 0f60e13def6..0319e1da1ce 100644 --- a/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java +++ b/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java @@ -78,6 +78,7 @@ public final class MappingMultiDocsEnum extends DocsEnum { current = subs[upto].docsEnum; currentBase = mergeState.docBase[reader]; currentMap = mergeState.docMaps[reader]; + assert currentMap == null || currentMap.length == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.length + " vs " + subs[upto].slice.length; } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java index f205505e5fa..e96334c9b76 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java @@ -109,7 +109,7 @@ public class Floats { throws IOException { super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type); arrayTemplate = DocValuesArray.TEMPLATES.get(type); - assert size == 4 || size == 8; + assert size == 4 || size == 8: "wrong size=" + size + " type=" + type + " id=" + id; } @Override diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java index 88e531fd0eb..5b6e661df3f 100644 --- a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java +++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java @@ -206,7 +206,7 @@ class BufferedDeletesStream { delIDX--; } else if (packet != null && segGen == packet.delGen()) { - assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet"; + assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen; //System.out.println(" eq"); // Lock order: IW -> BD -> RP diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index e5e9dafe278..ea674677d42 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -3036,10 +3036,17 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // newly flushed deletes but mapping them to the new // docIDs. + // Since we copy-on-write, if any new deletes were + // applied after merging has started, we can just + // check if the before/after liveDocs have changed. + // If so, we must carefully merge the liveDocs one + // doc at a time: if (currentLiveDocs != prevLiveDocs) { + // This means this segment received new deletes // since we started the merge, so we // must merge them: + final int startDocUpto = docUpto; for(int j=0;j readers; // Readers & liveDocs being merged - public int[][] docMaps; // Maps docIDs around deletions - public int[] docBase; // New docID base per reader - public int mergedDocCount; // Total # merged docs + public List readers; // Readers & liveDocs being merged + public int[][] docMaps; // Maps docIDs around deletions + public int[] docBase; // New docID base per reader + public Map segmentDocCounts; // Non-deleted docCount per reader + public int mergedDocCount; // Total # merged docs public CheckAbort checkAbort; public InfoStream infoStream; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 044ac3ebf58..58b08b6f074 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -104,12 +104,7 @@ final class SegmentMerger { // IndexWriter.close(false) takes to actually stop the // threads. - final int numReaders = mergeState.readers.size(); - // Remap docIDs - mergeState.docMaps = new int[numReaders][]; - mergeState.docBase = new int[numReaders]; - mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders]; - mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders]; + setDocMaps(); mergeFieldInfos(); setMatchingSegmentReaders(); @@ -283,37 +278,44 @@ final class SegmentMerger { } } - private final void mergeTerms(SegmentWriteState segmentWriteState) throws CorruptIndexException, IOException { - int docBase = 0; - - final List fields = new ArrayList(); - final List slices = new ArrayList(); + private int[] shrink(int[] in, int size) { + final int[] newArray = new int[size]; + System.arraycopy(in, 0, newArray, 0, size); + return newArray; + } - for(MergeState.IndexReaderAndLiveDocs r : mergeState.readers) { - final Fields f = r.reader.fields(); - final int maxDoc = r.reader.maxDoc(); - if (f != null) { - slices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size())); - fields.add(f); - } - docBase += maxDoc; - } + private int[][] shrink(int[][] in, int size) { + final int[][] newArray = new int[size][]; + System.arraycopy(in, 0, newArray, 0, size); + return newArray; + } + // NOTE: removes any "all deleted" readers from mergeState.readers + private void setDocMaps() throws IOException { final int numReaders = mergeState.readers.size(); - docBase = 0; + // Remap docIDs + mergeState.docMaps = new int[numReaders][]; + mergeState.docBase = new int[numReaders]; + mergeState.segmentDocCounts = new HashMap(); + mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders]; + mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders]; - for(int i=0;i fields = new ArrayList(); + final List slices = new ArrayList(); + + int docBase = 0; + + for(int readerIndex=0;readerIndex Date: Sat, 28 Jan 2012 13:23:40 +0000 Subject: [PATCH 24/33] LUCENE-3661: remove MutableBits.count() git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237057 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene40/Lucene40LiveDocsFormat.java | 10 ++- .../simpletext/SimpleTextLiveDocsFormat.java | 5 -- .../index/DocumentsWriterPerThread.java | 2 +- .../org/apache/lucene/index/IndexWriter.java | 20 +---- .../apache/lucene/index/SegmentMerger.java | 2 + .../apache/lucene/index/SegmentReader.java | 24 ----- .../org/apache/lucene/util/MutableBits.java | 5 +- .../apache/lucene/index/TestMixedCodecs.java | 87 +++++++++++++++++++ 8 files changed, 100 insertions(+), 55 deletions(-) create mode 100644 lucene/src/test/org/apache/lucene/index/TestMixedCodecs.java diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java index 57ba73fac06..7ff98b3c9a0 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -26,14 +26,20 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { @Override public Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException { String filename = IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()); - return new BitVector(dir, filename, context); + final BitVector liveDocs = new BitVector(dir, filename, context); + assert liveDocs.count() == info.docCount - info.getDelCount(); + assert liveDocs.length() == info.docCount; + return liveDocs; } @Override public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException { // nocommit: this api is ugly... String filename = IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()); - ((BitVector)bits).write(dir, filename, context); + final BitVector liveDocs = (BitVector) bits; + assert liveDocs.count() == info.docCount - info.getDelCount(); + assert liveDocs.length() == info.docCount; + liveDocs.write(dir, filename, context); } @Override diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index 7f4b28b9c55..66695d9eb83 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -168,11 +168,6 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { bits.clear(bit); } - @Override - public int count() { - return bits.cardinality(); - } - @Override public SimpleTextBits clone() { BitSet clonedBits = (BitSet) bits.clone(); diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 1949410459b..30b0c6d937a 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -477,7 +477,7 @@ public class DocumentsWriterPerThread { pendingDeletes.terms.clear(); final SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, flushState.codec, fieldInfos.asReadOnly()); if (infoStream.isEnabled("DWPT")) { - infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : (flushState.numDocs - flushState.liveDocs.count())) + " deleted docs"); + infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : (flushState.numDocs - flushState.delCountOnFlush)) + " deleted docs"); infoStream.message("DWPT", "new segment has " + (newSegment.getHasVectors() ? "vectors" : "no vectors")); infoStream.message("DWPT", "flushedFiles=" + newSegment.files()); infoStream.message("DWPT", "flushed codec=" + newSegment.getCodec()); diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index ea674677d42..03debc878f1 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -468,17 +468,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { return reader != null || mergeReader != null; } - // Called only from assert - private boolean countsMatch() { - if (liveDocs == null) { - assert pendingDeleteCount == 0; - } else { - assert liveDocs.count() == info.docCount - info.getDelCount() - pendingDeleteCount : - "liveDocs.count()=" + liveDocs.count() + " info.docCount=" + info.docCount + " info.delCount=" + info.getDelCount() + " pendingDelCount=" + pendingDeleteCount; - } - return true; - } - // Get reader for searching/deleting public synchronized SegmentReader getReader(IOContext context) throws IOException { //System.out.println(" livedocs=" + rld.liveDocs); @@ -559,7 +548,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { getReader(context).decRef(); assert reader != null; } - assert countsMatch(); shared = true; if (liveDocs != null) { return new SegmentReader(reader, liveDocs, info.docCount - info.getDelCount() - pendingDeleteCount); @@ -593,7 +581,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { //System.out.println("getROLiveDocs seg=" + info); assert Thread.holdsLock(IndexWriter.this); shared = true; - assert countsMatch(); //if (liveDocs != null) { //System.out.println(" liveCount=" + liveDocs.count()); //} @@ -612,6 +599,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // Save in case we need to rollback on failure: final SegmentInfo sav = (SegmentInfo) info.clone(); info.advanceDelGen(); + info.setDelCount(info.getDelCount() + pendingDeleteCount); // We can write directly to the actual name (vs to a // .tmp & renaming it) because the file is not live @@ -625,9 +613,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { info.reset(sav); } } - assert (info.docCount - liveDocs.count()) == info.getDelCount() + pendingDeleteCount: - "delete count mismatch during commit: seg=" + info + " info.delCount=" + info.getDelCount() + " vs MutableBits=" + (info.docCount-liveDocs.count() + " pendingDelCount=" + pendingDeleteCount); - info.setDelCount(info.getDelCount() + pendingDeleteCount); pendingDeleteCount = 0; return true; } else { @@ -3046,7 +3031,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // This means this segment received new deletes // since we started the merge, so we // must merge them: - final int startDocUpto = docUpto; for(int j=0;j deleted = new HashSet(); + while(deleted.size() < NUM_DOCS/2) { + final Integer toDelete = random.nextInt(NUM_DOCS); + if (!deleted.contains(toDelete)) { + deleted.add(toDelete); + w.deleteDocuments(new Term("id", String.valueOf(toDelete))); + if (random.nextInt(17) == 6) { + final IndexReader r = w.getReader(); + assertEquals(NUM_DOCS - deleted.size(), r.numDocs()); + r.close(); + } + } + } + + w.close(); + dir.close(); + } +} From b44abb2aaf4d3623cd96698ab76b9f6e04d173f1 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 28 Jan 2012 14:34:18 +0000 Subject: [PATCH 25/33] LUCENE-3661: removable MutableBits.clone() git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237064 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/codecs/LiveDocsFormat.java | 1 + .../lucene/codecs/lucene40/Lucene40LiveDocsFormat.java | 7 ++++++- .../lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java | 6 ++++++ lucene/src/java/org/apache/lucene/index/IndexWriter.java | 7 ++++--- lucene/src/java/org/apache/lucene/util/MutableBits.java | 3 +-- 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java index 3bbb1142697..dffea6c4ffd 100644 --- a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java @@ -28,6 +28,7 @@ import org.apache.lucene.util.MutableBits; public abstract class LiveDocsFormat { public abstract MutableBits newLiveDocs(int size) throws IOException; + public abstract MutableBits newLiveDocs(Bits existing) throws IOException; public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException; public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException; public abstract void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException; diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java index 7ff98b3c9a0..16a6dc3d220 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -23,6 +23,12 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { return bitVector; } + @Override + public MutableBits newLiveDocs(Bits existing) throws IOException { + final BitVector liveDocs = (BitVector) existing; + return liveDocs.clone(); + } + @Override public Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException { String filename = IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()); @@ -34,7 +40,6 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { @Override public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException { - // nocommit: this api is ugly... String filename = IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()); final BitVector liveDocs = (BitVector) bits; assert liveDocs.count() == info.docCount - info.getDelCount(); diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index 66695d9eb83..f2ae9514974 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -56,6 +56,12 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { return new SimpleTextBits(size); } + @Override + public MutableBits newLiveDocs(Bits existing) throws IOException { + final SimpleTextBits bits = (SimpleTextBits) existing; + return bits.clone(); + } + @Override public Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException { assert info.hasDeletions(); diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 03debc878f1..fe37827696f 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -33,6 +33,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -475,7 +476,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { if (reader == null) { reader = new SegmentReader(info, config.getReaderTermsIndexDivisor(), context); if (liveDocs == null) { - // nocommit: nuke cast liveDocs = (MutableBits) reader.getLiveDocs(); } //System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool"); @@ -565,11 +565,12 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // SegmentReader sharing the current liveDocs // instance; must now make a private clone so we can // change it: + LiveDocsFormat liveDocsFormat = info.getCodec().liveDocsFormat(); if (liveDocs == null) { //System.out.println("create BV seg=" + info); - liveDocs = info.getCodec().liveDocsFormat().newLiveDocs(info.docCount); + liveDocs = liveDocsFormat.newLiveDocs(info.docCount); } else { - liveDocs = liveDocs.clone(); + liveDocs = liveDocsFormat.newLiveDocs(liveDocs); } shared = false; } else { diff --git a/lucene/src/java/org/apache/lucene/util/MutableBits.java b/lucene/src/java/org/apache/lucene/util/MutableBits.java index 6283be7cc9c..66a69400b6b 100644 --- a/lucene/src/java/org/apache/lucene/util/MutableBits.java +++ b/lucene/src/java/org/apache/lucene/util/MutableBits.java @@ -17,7 +17,6 @@ package org.apache.lucene.util; * limitations under the License. */ -public interface MutableBits extends Bits, Cloneable { +public interface MutableBits extends Bits { public void clear(int bit); - public MutableBits clone(); } From 77d161f043093eaddfdb0a6a06da8c5cf91e30ee Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 28 Jan 2012 14:35:56 +0000 Subject: [PATCH 26/33] LUCENE-3661: make this nocommit a TODO, i am out of ideas git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237065 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/FreqProxTermsWriterPerField.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java index 0642486ddea..ba75c9d9a62 100644 --- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java @@ -461,7 +461,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem // writing its postings; this would be // deterministic (just for this Term's docs). - // nocommit: totally wrong to do this reach-around here, and this way + // TODO: can we do this reach-around in a cleaner way???? if (state.liveDocs == null) { state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.numDocs); } From 59b122522b093988232f5f27972a725142045bed Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 28 Jan 2012 14:43:04 +0000 Subject: [PATCH 27/33] LUCENE-3661: remove nocommit git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237066 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/index/SegmentMerger.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 8ba8ee74063..3d6d1e5a5e0 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -104,11 +104,12 @@ final class SegmentMerger { // IndexWriter.close(false) takes to actually stop the // threads. - setDocMaps(); + mergeState.mergedDocCount = setDocMaps(); mergeFieldInfos(); setMatchingSegmentReaders(); - mergeState.mergedDocCount = mergeFields(); + int numMerged = mergeFields(); + assert numMerged == mergeState.mergedDocCount; final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, segment, mergeState.fieldInfos, mergeState.mergedDocCount, termIndexInterval, codec, null, context); mergeTerms(segmentWriteState); @@ -119,7 +120,7 @@ final class SegmentMerger { } if (mergeState.fieldInfos.hasVectors()) { - int numMerged = mergeVectors(); + numMerged = mergeVectors(); assert numMerged == mergeState.mergedDocCount; } @@ -291,7 +292,7 @@ final class SegmentMerger { } // NOTE: removes any "all deleted" readers from mergeState.readers - private void setDocMaps() throws IOException { + private int setDocMaps() throws IOException { final int numReaders = mergeState.readers.size(); // Remap docIDs @@ -308,8 +309,6 @@ final class SegmentMerger { final MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(i); - // nocommit -- assert that final doc count == - // mergedDocCount from stored fields and term vectors mergeState.docBase[i] = docBase; final int maxDoc = reader.reader.maxDoc(); final int docCount; @@ -359,6 +358,8 @@ final class SegmentMerger { mergeState.docMaps = shrink(mergeState.docMaps, numReadersLeft); mergeState.docBase = shrink(mergeState.docBase, numReadersLeft); } + + return docBase; } private final void mergeTerms(SegmentWriteState segmentWriteState) throws CorruptIndexException, IOException { From 8684b78e1e2802e705dd8daaeb6a9dbec9e4dc93 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 28 Jan 2012 14:59:48 +0000 Subject: [PATCH 28/33] LUCENE-3661: javadocs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237067 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/codecs/Codec.java | 8 ++++++++ .../src/java/org/apache/lucene/codecs/LiveDocsFormat.java | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/lucene/src/java/org/apache/lucene/codecs/Codec.java b/lucene/src/java/org/apache/lucene/codecs/Codec.java index 3206a86a401..20df870558c 100644 --- a/lucene/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/Codec.java @@ -22,6 +22,7 @@ import java.util.Set; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.util.NamedSPILoader; +import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; /** @@ -43,7 +44,11 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { return name; } + /** Populates files with all filenames needed for + * the info segment. + */ public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + assert (dir instanceof CompoundFileDirectory) == false; postingsFormat().files(dir, info, "", files); storedFieldsFormat().files(dir, info, files); termVectorsFormat().files(dir, info, files); @@ -54,6 +59,9 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { normsFormat().files(dir, info, files); } + /** Populates files with any filenames that are + * stored outside of CFS for the info segment. + */ public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { liveDocsFormat().separateFiles(dir, info, files); normsFormat().separateFiles(dir, info, files); diff --git a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java index dffea6c4ffd..f1b654153c1 100644 --- a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java @@ -26,10 +26,16 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; import org.apache.lucene.util.MutableBits; +/** Format for live/deleted documents + * @lucene.experimental */ public abstract class LiveDocsFormat { + /** creates a new mutablebits, with all bits set, for the specified size */ public abstract MutableBits newLiveDocs(int size) throws IOException; + /** creates a new mutablebits of the same bits set and size of existing */ public abstract MutableBits newLiveDocs(Bits existing) throws IOException; + /** reads bits from a file */ public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException; + /** writes bits to a file */ public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException; public abstract void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException; } From 1e538244af5e6869bd571ecd77557f70930d80e5 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 28 Jan 2012 15:13:36 +0000 Subject: [PATCH 29/33] LUCENE-3661: make Lucene3x codec really completely readonly git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237070 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/lucene3x/Lucene3xCodec.java | 17 ++++++++++-- .../codecs/preflexrw/PreFlexRWCodec.java | 26 +++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index beaf19481ca..78aac059053 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -30,6 +30,7 @@ import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfosFormat; import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; @@ -37,6 +38,8 @@ import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.MutableBits; /** * Supports the Lucene 3.x index format (readonly) @@ -49,7 +52,12 @@ public class Lucene3xCodec extends Codec { private final PostingsFormat postingsFormat = new Lucene3xPostingsFormat(); // TODO: this should really be a different impl - private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat(); + private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat() { + @Override + public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException { + throw new UnsupportedOperationException("this codec can only be used for reading"); + } + }; private final TermVectorsFormat vectorsFormat = new Lucene3xTermVectorsFormat(); @@ -60,7 +68,12 @@ public class Lucene3xCodec extends Codec { private final NormsFormat normsFormat = new Lucene3xNormsFormat(); // TODO: this should really be a different impl - private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat() { + @Override + public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException { + throw new UnsupportedOperationException("this codec can only be used for reading"); + } + }; // 3.x doesn't support docvalues private final DocValuesFormat docValuesFormat = new DocValuesFormat() { diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java index 9d784b12ee5..aface166018 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java @@ -18,11 +18,15 @@ package org.apache.lucene.codecs.preflexrw; */ import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfosFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; +import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.util.LuceneTestCase; /** @@ -35,6 +39,10 @@ public class PreFlexRWCodec extends Lucene3xCodec { private final FieldInfosFormat fieldInfos = new PreFlexRWFieldInfosFormat(); private final TermVectorsFormat termVectors = new PreFlexRWTermVectorsFormat(); private final SegmentInfosFormat segmentInfos = new PreFlexRWSegmentInfosFormat(); + // TODO: this should really be a different impl + private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat(); + // TODO: this should really be a different impl + private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat(); @Override public PostingsFormat postingsFormat() { @@ -80,4 +88,22 @@ public class PreFlexRWCodec extends Lucene3xCodec { return super.termVectorsFormat(); } } + + @Override + public LiveDocsFormat liveDocsFormat() { + if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { + return liveDocs; + } else { + return super.liveDocsFormat(); + } + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { + return storedFields; + } else { + return super.storedFieldsFormat(); + } + } } From 59bdbb04c04ea1bdee45122213d5896a3573bed1 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 28 Jan 2012 15:32:13 +0000 Subject: [PATCH 30/33] LUCENE-3661: remove MutableBits from SegmentReader, add back nocommit git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237075 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/IndexWriter.java | 1 + lucene/src/java/org/apache/lucene/index/SegmentReader.java | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index fe37827696f..da8da39d92b 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -476,6 +476,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { if (reader == null) { reader = new SegmentReader(info, config.getReaderTermsIndexDivisor(), context); if (liveDocs == null) { + // nocommit: still don't like this cast, gotta be a cleaner way. liveDocs = (MutableBits) reader.getLiveDocs(); } //System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool"); diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 32e916c9d06..b1da9ce0ea1 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -26,7 +26,6 @@ import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.MutableBits; /** * @lucene.experimental @@ -103,7 +102,7 @@ public final class SegmentReader extends IndexReader { // SegmentReader and using the provided in-memory // liveDocs. Used by IndexWriter to provide a new NRT // reader: - SegmentReader(SegmentReader parent, MutableBits liveDocs, int numDocs) throws IOException { + SegmentReader(SegmentReader parent, Bits liveDocs, int numDocs) throws IOException { this.si = parent.si; parent.core.incRef(); this.core = parent.core; From 509ad87610cf3f95361794428699b18a0824b4e8 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 28 Jan 2012 15:47:08 +0000 Subject: [PATCH 31/33] LUCENE-3661: fix TODO, pass core to these SR ctors and remove code duplciation git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237077 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/DirectoryReader.java | 4 ++- .../org/apache/lucene/index/IndexWriter.java | 2 +- .../apache/lucene/index/SegmentReader.java | 31 +++++-------------- 3 files changed, 11 insertions(+), 26 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index 6a9bcd1115f..a39e9039dd0 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -166,7 +166,9 @@ final class DirectoryReader extends BaseMultiReader { } else { readerShared[i] = false; // Steal the ref returned by SegmentReader ctor: - newReaders[i] = new SegmentReader(infos.info(i), newReaders[i], IOContext.READ); + assert infos.info(i).dir == newReaders[i].getSegmentInfo().dir; + assert infos.info(i).hasDeletions(); + newReaders[i] = new SegmentReader(infos.info(i), newReaders[i].core, IOContext.READ); } } success = true; diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index da8da39d92b..ac32d27fecc 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -551,7 +551,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } shared = true; if (liveDocs != null) { - return new SegmentReader(reader, liveDocs, info.docCount - info.getDelCount() - pendingDeleteCount); + return new SegmentReader(reader.getSegmentInfo(), reader.core, liveDocs, info.docCount - info.getDelCount() - pendingDeleteCount); } else { reader.incRef(); return reader; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index b1da9ce0ea1..02ad8451076 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -42,7 +42,7 @@ public final class SegmentReader extends IndexReader { // tells us the docCount: private final int numDocs; - private final SegmentCoreReaders core; + final SegmentCoreReaders core; /** * @throws CorruptIndexException if the index is corrupt @@ -74,38 +74,21 @@ public final class SegmentReader extends IndexReader { } } - // TODO: really these next 2 ctors could take - // SegmentCoreReaders... that's all we do w/ the parent - // SR: - // Create new SegmentReader sharing core from a previous // SegmentReader and loading new live docs from a new // deletes file. Used by openIfChanged. - SegmentReader(SegmentInfo si, SegmentReader parent, IOContext context) throws IOException { - assert si.dir == parent.getSegmentInfo().dir; - this.si = si; - - // It's no longer possible to unDeleteAll, so, we can - // only be created if we have deletions: - assert si.hasDeletions(); - - // ... but load our own deleted docs: - liveDocs = si.getCodec().liveDocsFormat().readLiveDocs(si.dir, si, context); - numDocs = si.docCount - si.getDelCount(); - - // We share core w/ parent: - parent.core.incRef(); - core = parent.core; + SegmentReader(SegmentInfo si, SegmentCoreReaders core, IOContext context) throws IOException { + this(si, core, si.getCodec().liveDocsFormat().readLiveDocs(si.dir, si, context), si.docCount - si.getDelCount()); } // Create new SegmentReader sharing core from a previous // SegmentReader and using the provided in-memory // liveDocs. Used by IndexWriter to provide a new NRT // reader: - SegmentReader(SegmentReader parent, Bits liveDocs, int numDocs) throws IOException { - this.si = parent.si; - parent.core.incRef(); - this.core = parent.core; + SegmentReader(SegmentInfo si, SegmentCoreReaders core, Bits liveDocs, int numDocs) throws IOException { + this.si = si; + this.core = core; + core.incRef(); assert liveDocs != null; this.liveDocs = liveDocs; From 1b8d8b435088dcd53544f27913af21e1e457745f Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sat, 28 Jan 2012 17:59:45 +0000 Subject: [PATCH 32/33] LUCENE-3661: simplify del count tracking during merge git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237114 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/IndexWriter.java | 37 ++++++++++++++++--- .../org/apache/lucene/index/MergeState.java | 2 - .../apache/lucene/index/SegmentMerger.java | 30 --------------- 3 files changed, 31 insertions(+), 38 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index ac32d27fecc..07418ad8465 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -447,6 +447,24 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { return rc > myRefCounts; } + // Call only from assert! + public synchronized boolean verifyDocCounts() { + int count; + if (liveDocs != null) { + count = 0; + for(int docID=0;docID(); mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders]; mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders]; @@ -332,16 +319,6 @@ final class SegmentMerger { docMap = null; } - if (reader.reader instanceof SegmentReader) { - mergeState.segmentDocCounts.put(((SegmentReader) reader.reader).getSegmentInfo(), docCount); - } - - if (docCount == 0) { - // Skip this reader (all docs are deleted): - mergeState.readers.remove(i); - continue; - } - mergeState.docMaps[i] = docMap; docBase += docCount; @@ -352,13 +329,6 @@ final class SegmentMerger { i++; } - final int numReadersLeft = mergeState.readers.size(); - - if (numReadersLeft < mergeState.docMaps.length) { - mergeState.docMaps = shrink(mergeState.docMaps, numReadersLeft); - mergeState.docBase = shrink(mergeState.docBase, numReadersLeft); - } - return docBase; } From 8933cfe1810be4e4a5b5c709ef63c5166826905d Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 28 Jan 2012 18:22:49 +0000 Subject: [PATCH 33/33] LUCENE-3661: nuke nocommit for real, split SimpleText to use read-only bits impls whenever it can git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237127 13f79535-47bb-0310-9956-ffa450edef68 --- .../simpletext/SimpleTextLiveDocsFormat.java | 32 ++++++++++--------- .../org/apache/lucene/index/IndexWriter.java | 11 +++---- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index f2ae9514974..a7d19d3613e 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -53,13 +53,13 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { @Override public MutableBits newLiveDocs(int size) throws IOException { - return new SimpleTextBits(size); + return new SimpleTextMutableBits(size); } @Override public MutableBits newLiveDocs(Bits existing) throws IOException { final SimpleTextBits bits = (SimpleTextBits) existing; - return bits.clone(); + return new SimpleTextMutableBits((BitSet)bits.bits.clone(), bits.size); } @Override @@ -144,16 +144,11 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { } } - static class SimpleTextBits implements MutableBits { + // read-only + static class SimpleTextBits implements Bits { final BitSet bits; final int size; - SimpleTextBits(int size) { - this.size = size; - bits = new BitSet(size); - bits.set(0, size); - } - SimpleTextBits(BitSet bits, int size) { this.bits = bits; this.size = size; @@ -168,16 +163,23 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { public int length() { return size; } + } + + // read-write + static class SimpleTextMutableBits extends SimpleTextBits implements MutableBits { + SimpleTextMutableBits(int size) { + this(new BitSet(size), size); + bits.set(0, size); + } + + SimpleTextMutableBits(BitSet bits, int size) { + super(bits, size); + } + @Override public void clear(int bit) { bits.clear(bit); } - - @Override - public SimpleTextBits clone() { - BitSet clonedBits = (BitSet) bits.clone(); - return new SimpleTextBits(clonedBits, size); - } } } diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 07418ad8465..68c7b3160af 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -417,7 +417,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // docs, and it's copy-on-write (cloned whenever we need // to change it but it's been shared to an external NRT // reader). - public MutableBits liveDocs; + public Bits liveDocs; // How many further deletions we've done against // liveDocs vs when we loaded it or last wrote it: @@ -494,8 +494,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { if (reader == null) { reader = new SegmentReader(info, config.getReaderTermsIndexDivisor(), context); if (liveDocs == null) { - // nocommit: still don't like this cast, gotta be a cleaner way. - liveDocs = (MutableBits) reader.getLiveDocs(); + liveDocs = reader.getLiveDocs(); } //System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool"); } @@ -522,7 +521,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } else { mergeReader = new SegmentReader(info, -1, context); if (liveDocs == null) { - liveDocs = (MutableBits) mergeReader.getLiveDocs(); + liveDocs = mergeReader.getLiveDocs(); } } } @@ -538,7 +537,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { assert !shared; final boolean didDelete = liveDocs.get(docID); if (didDelete) { - liveDocs.clear(docID); + ((MutableBits) liveDocs).clear(docID); pendingDeleteCount++; //System.out.println(" new del seg=" + info + " docID=" + docID + " pendingDelCount=" + pendingDeleteCount + " totDelCount=" + (info.docCount-liveDocs.count())); } @@ -627,7 +626,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // until segments file is written: boolean success = false; try { - info.getCodec().liveDocsFormat().writeLiveDocs(liveDocs, dir, info, IOContext.DEFAULT); + info.getCodec().liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, dir, info, IOContext.DEFAULT); success = true; } finally { if (!success) {