mirror of https://github.com/apache/lucene.git
LUCENE-9705: Create Lucene90LiveDocsFormat (#2274)
For now this is just a copy of Lucene50LiveDocsFormat. The existing Lucene50LiveDocsFormat was moved to backwards-codecs.
This commit is contained in:
parent
7fd64aabcc
commit
f0a2f1fe03
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene50;
|
||||
package org.apache.lucene.backward_codecs.lucene50;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
@ -107,6 +107,11 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
|
|||
return new FixedBitSet(data, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: although this format is only used on older versions, we need to keep the write logic in
|
||||
* addition to the read logic. When we delete documents that live in an older segment, we write to
|
||||
* the live docs for that segment.
|
||||
*/
|
||||
@Override
|
||||
public void writeLiveDocs(
|
||||
Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context)
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene70;
|
||||
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
|
||||
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
|
||||
|
@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
|
|||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene80;
|
||||
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat;
|
||||
|
@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
|
|||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.backward_codecs.lucene84;
|
||||
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
|
||||
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
|
||||
|
@ -36,7 +37,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
|
|||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
|
||||
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.lucene.backward_codecs.lucene86;
|
||||
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
|
@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
|
|||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
|
||||
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.lucene.backward_codecs.lucene87;
|
||||
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CompoundFormat;
|
||||
|
@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
|
|||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
|
||||
|
|
|
@ -14,16 +14,16 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene50;
|
||||
package org.apache.lucene.backward_codecs.lucene50;
|
||||
|
||||
import org.apache.lucene.backward_codecs.lucene86.Lucene86RWCodec;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.BaseLiveDocsFormatTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestLucene50LiveDocsFormat extends BaseLiveDocsFormatTestCase {
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return TestUtil.getDefaultCodec();
|
||||
return new Lucene86RWCodec();
|
||||
}
|
||||
}
|
|
@ -1835,6 +1835,29 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testDeletes() throws Exception {
|
||||
Path oldIndexDir = createTempDir("dvupdates");
|
||||
TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
|
||||
Directory dir = newFSDirectory(oldIndexDir);
|
||||
verifyUsesDefaultCodec(dir, dvUpdatesIndex);
|
||||
|
||||
IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
int maxDoc = writer.getDocStats().maxDoc;
|
||||
writer.deleteDocuments(new Term("id", "1"));
|
||||
if (random().nextBoolean()) {
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
writer.forceMerge(1);
|
||||
writer.commit();
|
||||
assertEquals(maxDoc - 1, writer.getDocStats().maxDoc);
|
||||
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSoftDeletes() throws Exception {
|
||||
Path oldIndexDir = createTempDir("dvupdates");
|
||||
TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
|
|||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
|
||||
|
@ -73,7 +72,7 @@ public class Lucene90Codec extends Codec {
|
|||
private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
|
||||
private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat();
|
||||
private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat();
|
||||
private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
|
||||
private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat();
|
||||
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
|
||||
private final PostingsFormat defaultFormat;
|
||||
|
||||
|
|
|
@ -0,0 +1,165 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene90;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentCommitInfo;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* Lucene 9.0 live docs format
|
||||
*
|
||||
* <p>The .liv file is optional, and only exists when a segment contains deletions.
|
||||
*
|
||||
* <p>Although per-segment, this file is maintained exterior to compound segment files.
|
||||
*
|
||||
* <p>Deletions (.liv) --> IndexHeader,Generation,Bits
|
||||
*
|
||||
* <ul>
|
||||
* <li>SegmentHeader --> {@link CodecUtil#writeIndexHeader IndexHeader}
|
||||
* <li>Bits --> <{@link DataOutput#writeLong Int64}> <sup>LongCount</sup>
|
||||
* </ul>
|
||||
*/
|
||||
public final class Lucene90LiveDocsFormat extends LiveDocsFormat {
|
||||
|
||||
/** extension of live docs */
|
||||
private static final String EXTENSION = "liv";
|
||||
|
||||
/** codec of live docs */
|
||||
private static final String CODEC_NAME = "Lucene90LiveDocs";
|
||||
|
||||
/** supported version range */
|
||||
private static final int VERSION_START = 0;
|
||||
|
||||
private static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene90LiveDocsFormat() {}
|
||||
|
||||
@Override
|
||||
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context)
|
||||
throws IOException {
|
||||
long gen = info.getDelGen();
|
||||
String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
|
||||
final int length = info.info.maxDoc();
|
||||
try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) {
|
||||
Throwable priorE = null;
|
||||
try {
|
||||
CodecUtil.checkIndexHeader(
|
||||
input,
|
||||
CODEC_NAME,
|
||||
VERSION_START,
|
||||
VERSION_CURRENT,
|
||||
info.info.getId(),
|
||||
Long.toString(gen, Character.MAX_RADIX));
|
||||
|
||||
FixedBitSet fbs = readFixedBitSet(input, length);
|
||||
|
||||
if (fbs.length() - fbs.cardinality() != info.getDelCount()) {
|
||||
throw new CorruptIndexException(
|
||||
"bits.deleted="
|
||||
+ (fbs.length() - fbs.cardinality())
|
||||
+ " info.delcount="
|
||||
+ info.getDelCount(),
|
||||
input);
|
||||
}
|
||||
return fbs.asReadOnlyBits();
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
CodecUtil.checkFooter(input, priorE);
|
||||
}
|
||||
}
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
private FixedBitSet readFixedBitSet(IndexInput input, int length) throws IOException {
|
||||
long data[] = new long[FixedBitSet.bits2words(length)];
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
data[i] = input.readLong();
|
||||
}
|
||||
return new FixedBitSet(data, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeLiveDocs(
|
||||
Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context)
|
||||
throws IOException {
|
||||
long gen = info.getNextDelGen();
|
||||
String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
|
||||
int delCount;
|
||||
try (IndexOutput output = dir.createOutput(name, context)) {
|
||||
|
||||
CodecUtil.writeIndexHeader(
|
||||
output,
|
||||
CODEC_NAME,
|
||||
VERSION_CURRENT,
|
||||
info.info.getId(),
|
||||
Long.toString(gen, Character.MAX_RADIX));
|
||||
|
||||
delCount = writeBits(output, bits);
|
||||
|
||||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
if (delCount != info.getDelCount() + newDelCount) {
|
||||
throw new CorruptIndexException(
|
||||
"bits.deleted="
|
||||
+ delCount
|
||||
+ " info.delcount="
|
||||
+ info.getDelCount()
|
||||
+ " newdelcount="
|
||||
+ newDelCount,
|
||||
name);
|
||||
}
|
||||
}
|
||||
|
||||
private int writeBits(IndexOutput output, Bits bits) throws IOException {
|
||||
int delCount = 0;
|
||||
final int longCount = FixedBitSet.bits2words(bits.length());
|
||||
for (int i = 0; i < longCount; ++i) {
|
||||
long currentBits = 0;
|
||||
for (int j = i << 6, end = Math.min(j + 63, bits.length() - 1); j <= end; ++j) {
|
||||
if (bits.get(j)) {
|
||||
currentBits |= 1L << j; // mod 64
|
||||
} else {
|
||||
delCount += 1;
|
||||
}
|
||||
}
|
||||
output.writeLong(currentBits);
|
||||
}
|
||||
return delCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(SegmentCommitInfo info, Collection<String> files) throws IOException {
|
||||
if (info.hasDeletions()) {
|
||||
files.add(IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, info.getDelGen()));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -174,7 +174,7 @@
|
|||
* loaded into main memory for fast access. Whereas stored values are generally intended for
|
||||
* summary results from searches, per-document values are useful for things like scoring
|
||||
* factors.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}. An
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live documents}. An
|
||||
* optional file indicating which documents are live.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene86.Lucene86PointsFormat Point values}. Optional pair
|
||||
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
|
||||
|
@ -300,7 +300,7 @@
|
|||
* <td>Contains term vector data.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents}</td>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live Documents}</td>
|
||||
* <td>.liv</td>
|
||||
* <td>Info about what documents are live</td>
|
||||
* </tr>
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene50;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.BaseLiveDocsFormatTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestLucene90LiveDocsFormat extends BaseLiveDocsFormatTestCase {
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return TestUtil.getDefaultCodec();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue