diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java similarity index 94% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java index 0a0c47692db..ebe76ae614e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene50; +package org.apache.lucene.backward_codecs.lucene50; import java.io.IOException; import java.util.Collection; @@ -107,6 +107,11 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat { return new FixedBitSet(data, length); } + /** + * Note: although this format is only used on older versions, we need to keep the write logic in + * addition to the read logic. When we delete documents that live in an older segment, we write to + * the live docs for that segment. + */ @Override public void writeLiveDocs( Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java index c259fb5848b..e34502eae1f 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.backward_codecs.lucene70; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; @@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java index 66604612c69..f39ffa74199 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.backward_codecs.lucene80; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat; @@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java index 49383e3cf07..0b3ffb728dd 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java @@ -17,6 +17,7 @@ package org.apache.lucene.backward_codecs.lucene84; import java.util.Objects; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; @@ -36,7 +37,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java index 7d51c67d51a..db025737ffb 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java @@ -18,6 +18,7 @@ package org.apache.lucene.backward_codecs.lucene86; import java.util.Objects; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; import org.apache.lucene.codecs.Codec; @@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java index b254fa6bcec..8543de6b817 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java @@ -18,6 +18,7 @@ package org.apache.lucene.backward_codecs.lucene87; import java.util.Objects; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.CompoundFormat; @@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java similarity index 87% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java index ebf83949936..5cd1bc038a1 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java @@ -14,16 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene50; +package org.apache.lucene.backward_codecs.lucene50; +import org.apache.lucene.backward_codecs.lucene86.Lucene86RWCodec; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.BaseLiveDocsFormatTestCase; -import org.apache.lucene.util.TestUtil; public class TestLucene50LiveDocsFormat extends BaseLiveDocsFormatTestCase { @Override protected Codec getCodec() { - return TestUtil.getDefaultCodec(); + return new Lucene86RWCodec(); } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java index d89b6481471..7c75d5a26f7 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java @@ -1835,6 +1835,29 @@ public class TestBackwardsCompatibility extends LuceneTestCase { dir.close(); } + public void testDeletes() throws Exception { + Path oldIndexDir = createTempDir("dvupdates"); + TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); + Directory dir = newFSDirectory(oldIndexDir); + verifyUsesDefaultCodec(dir, dvUpdatesIndex); + + IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + int maxDoc = writer.getDocStats().maxDoc; + writer.deleteDocuments(new Term("id", "1")); + if (random().nextBoolean()) { + writer.commit(); + } + + writer.forceMerge(1); + writer.commit(); + assertEquals(maxDoc - 1, writer.getDocStats().maxDoc); + + writer.close(); + dir.close(); + } + public void testSoftDeletes() throws Exception { Path oldIndexDir = createTempDir("dvupdates"); TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java index be3a9e79112..625059259ec 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java @@ -31,7 +31,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; @@ -73,7 +72,7 @@ public class Lucene90Codec extends Codec { private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat(); private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat(); private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat(); - private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat(); private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); private final PostingsFormat defaultFormat; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java new file mode 100644 index 00000000000..e5496a96928 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene90; + +import java.io.IOException; +import java.util.Collection; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; + +/** + * Lucene 9.0 live docs format + * + *

The .liv file is optional, and only exists when a segment contains deletions. + * + *

Although per-segment, this file is maintained exterior to compound segment files. + * + *

Deletions (.liv) --> IndexHeader,Generation,Bits + * + *

+ */ +public final class Lucene90LiveDocsFormat extends LiveDocsFormat { + + /** extension of live docs */ + private static final String EXTENSION = "liv"; + + /** codec of live docs */ + private static final String CODEC_NAME = "Lucene90LiveDocs"; + + /** supported version range */ + private static final int VERSION_START = 0; + + private static final int VERSION_CURRENT = VERSION_START; + + /** Sole constructor. */ + public Lucene90LiveDocsFormat() {} + + @Override + public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) + throws IOException { + long gen = info.getDelGen(); + String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen); + final int length = info.info.maxDoc(); + try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) { + Throwable priorE = null; + try { + CodecUtil.checkIndexHeader( + input, + CODEC_NAME, + VERSION_START, + VERSION_CURRENT, + info.info.getId(), + Long.toString(gen, Character.MAX_RADIX)); + + FixedBitSet fbs = readFixedBitSet(input, length); + + if (fbs.length() - fbs.cardinality() != info.getDelCount()) { + throw new CorruptIndexException( + "bits.deleted=" + + (fbs.length() - fbs.cardinality()) + + " info.delcount=" + + info.getDelCount(), + input); + } + return fbs.asReadOnlyBits(); + } catch (Throwable exception) { + priorE = exception; + } finally { + CodecUtil.checkFooter(input, priorE); + } + } + throw new AssertionError(); + } + + private FixedBitSet readFixedBitSet(IndexInput input, int length) throws IOException { + long data[] = new long[FixedBitSet.bits2words(length)]; + for (int i = 0; i < data.length; i++) { + data[i] = input.readLong(); + } + return new FixedBitSet(data, length); + } + + @Override + public void writeLiveDocs( + Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) + throws IOException { + long gen = info.getNextDelGen(); + String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen); + int delCount; + try (IndexOutput output = dir.createOutput(name, context)) { + + CodecUtil.writeIndexHeader( + output, + CODEC_NAME, + VERSION_CURRENT, + info.info.getId(), + Long.toString(gen, Character.MAX_RADIX)); + + delCount = writeBits(output, bits); + + CodecUtil.writeFooter(output); + } + if (delCount != info.getDelCount() + newDelCount) { + throw new CorruptIndexException( + "bits.deleted=" + + delCount + + " info.delcount=" + + info.getDelCount() + + " newdelcount=" + + newDelCount, + name); + } + } + + private int writeBits(IndexOutput output, Bits bits) throws IOException { + int delCount = 0; + final int longCount = FixedBitSet.bits2words(bits.length()); + for (int i = 0; i < longCount; ++i) { + long currentBits = 0; + for (int j = i << 6, end = Math.min(j + 63, bits.length() - 1); j <= end; ++j) { + if (bits.get(j)) { + currentBits |= 1L << j; // mod 64 + } else { + delCount += 1; + } + } + output.writeLong(currentBits); + } + return delCount; + } + + @Override + public void files(SegmentCommitInfo info, Collection files) throws IOException { + if (info.hasDeletions()) { + files.add(IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, info.getDelGen())); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java index 6bc4f5d4801..b7a9d4ad268 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java @@ -174,7 +174,7 @@ * loaded into main memory for fast access. Whereas stored values are generally intended for * summary results from searches, per-document values are useful for things like scoring * factors. - *
  • {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}. An + *
  • {@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live documents}. An * optional file indicating which documents are live. *
  • {@link org.apache.lucene.codecs.lucene86.Lucene86PointsFormat Point values}. Optional pair * of files, recording dimensionally indexed fields, to enable fast numeric range filtering @@ -300,7 +300,7 @@ * Contains term vector data. * * - * {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents} + * {@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live Documents} * .liv * Info about what documents are live * diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene90LiveDocsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene90LiveDocsFormat.java new file mode 100644 index 00000000000..b4c734bd372 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene90LiveDocsFormat.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene50; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.BaseLiveDocsFormatTestCase; +import org.apache.lucene.util.TestUtil; + +public class TestLucene90LiveDocsFormat extends BaseLiveDocsFormatTestCase { + + @Override + protected Codec getCodec() { + return TestUtil.getDefaultCodec(); + } +}