diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java similarity index 94% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java index 0a0c47692db..ebe76ae614e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene50; +package org.apache.lucene.backward_codecs.lucene50; import java.io.IOException; import java.util.Collection; @@ -107,6 +107,11 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat { return new FixedBitSet(data, length); } + /** + * Note: although this format is only used on older versions, we need to keep the write logic in + * addition to the read logic. When we delete documents that live in an older segment, we write to + * the live docs for that segment. + */ @Override public void writeLiveDocs( Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java index c259fb5848b..e34502eae1f 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.backward_codecs.lucene70; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; @@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java index 66604612c69..f39ffa74199 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.backward_codecs.lucene80; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat; @@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java index 49383e3cf07..0b3ffb728dd 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java @@ -17,6 +17,7 @@ package org.apache.lucene.backward_codecs.lucene84; import java.util.Objects; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; @@ -36,7 +37,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java index 7d51c67d51a..db025737ffb 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java @@ -18,6 +18,7 @@ package org.apache.lucene.backward_codecs.lucene86; import java.util.Objects; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; import org.apache.lucene.codecs.Codec; @@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java index b254fa6bcec..8543de6b817 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java @@ -18,6 +18,7 @@ package org.apache.lucene.backward_codecs.lucene87; import java.util.Objects; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.CompoundFormat; @@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java similarity index 87% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java index ebf83949936..5cd1bc038a1 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java @@ -14,16 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene50; +package org.apache.lucene.backward_codecs.lucene50; +import org.apache.lucene.backward_codecs.lucene86.Lucene86RWCodec; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.BaseLiveDocsFormatTestCase; -import org.apache.lucene.util.TestUtil; public class TestLucene50LiveDocsFormat extends BaseLiveDocsFormatTestCase { @Override protected Codec getCodec() { - return TestUtil.getDefaultCodec(); + return new Lucene86RWCodec(); } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java index d89b6481471..7c75d5a26f7 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java @@ -1835,6 +1835,29 @@ public class TestBackwardsCompatibility extends LuceneTestCase { dir.close(); } + public void testDeletes() throws Exception { + Path oldIndexDir = createTempDir("dvupdates"); + TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); + Directory dir = newFSDirectory(oldIndexDir); + verifyUsesDefaultCodec(dir, dvUpdatesIndex); + + IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + int maxDoc = writer.getDocStats().maxDoc; + writer.deleteDocuments(new Term("id", "1")); + if (random().nextBoolean()) { + writer.commit(); + } + + writer.forceMerge(1); + writer.commit(); + assertEquals(maxDoc - 1, writer.getDocStats().maxDoc); + + writer.close(); + dir.close(); + } + public void testSoftDeletes() throws Exception { Path oldIndexDir = createTempDir("dvupdates"); TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java index be3a9e79112..625059259ec 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java @@ -31,7 +31,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; @@ -73,7 +72,7 @@ public class Lucene90Codec extends Codec { private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat(); private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat(); private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat(); - private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat(); private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); private final PostingsFormat defaultFormat; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java new file mode 100644 index 00000000000..e5496a96928 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene90; + +import java.io.IOException; +import java.util.Collection; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; + +/** + * Lucene 9.0 live docs format + * + *
The .liv file is optional, and only exists when a segment contains deletions. + * + *
Although per-segment, this file is maintained exterior to compound segment files. + * + *
Deletions (.liv) --> IndexHeader,Generation,Bits + * + *