diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundFormat.java new file mode 100644 index 00000000000..75d6912d41d --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundFormat.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_codecs.lucene50; + +import java.io.IOException; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.CompoundDirectory; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +/** + * Lucene 5.0 compound file format + * + *

Files: + * + *

+ * + *

Description: + * + *

+ * + *

Notes: + * + *

+ */ +public final class Lucene50CompoundFormat extends CompoundFormat { + + /** Extension of compound file */ + static final String DATA_EXTENSION = "cfs"; + /** Extension of compound file entries */ + static final String ENTRIES_EXTENSION = "cfe"; + + static final String DATA_CODEC = "Lucene50CompoundData"; + static final String ENTRY_CODEC = "Lucene50CompoundEntries"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + /** Sole constructor. */ + public Lucene50CompoundFormat() {} + + @Override + public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) + throws IOException { + return new Lucene50CompoundReader(dir, si, context); + } + + @Override + public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException { + throw new UnsupportedOperationException("Old formats can't be used for writing"); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundReader.java similarity index 99% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundReader.java index 4c8eb846182..9ff51611991 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundReader.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene50; +package org.apache.lucene.backward_codecs.lucene50; import java.io.FileNotFoundException; import java.io.IOException; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java index e34502eae1f..e6e1d9e5e2d 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.backward_codecs.lucene70; +import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode; @@ -34,7 +35,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; -import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; @@ -112,7 +112,7 @@ public class Lucene70Codec extends Codec { } @Override - public final CompoundFormat compoundFormat() { + public CompoundFormat compoundFormat() { return compoundFormat; } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java index f39ffa74199..92b6a21ee63 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.backward_codecs.lucene80; +import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; @@ -33,7 +34,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; -import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java index 0b3ffb728dd..c476e9fbcf4 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java @@ -17,6 +17,7 @@ package org.apache.lucene.backward_codecs.lucene84; import java.util.Objects; +import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode; @@ -36,7 +37,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; -import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat; @@ -125,7 +125,7 @@ public class Lucene84Codec extends Codec { } @Override - public final CompoundFormat compoundFormat() { + public CompoundFormat compoundFormat() { return compoundFormat; } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java index db025737ffb..b8659f74d87 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java @@ -18,6 +18,7 @@ package org.apache.lucene.backward_codecs.lucene86; import java.util.Objects; +import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; @@ -34,7 +35,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; -import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat; @@ -126,7 +126,7 @@ public class Lucene86Codec extends Codec { } @Override - public final CompoundFormat compoundFormat() { + public CompoundFormat compoundFormat() { return compoundFormat; } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java index 8543de6b817..52bc76c899d 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java @@ -18,6 +18,7 @@ package org.apache.lucene.backward_codecs.lucene87; import java.util.Objects; +import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; import org.apache.lucene.codecs.Codec; @@ -33,7 +34,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; -import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; @@ -138,7 +138,7 @@ public class Lucene87Codec extends Codec { } @Override - public final CompoundFormat compoundFormat() { + public CompoundFormat compoundFormat() { return compoundFormat; } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/Lucene87/Lucene87RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/Lucene87/Lucene87RWCodec.java new file mode 100644 index 00000000000..6467bc7c36e --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/Lucene87/Lucene87RWCodec.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_codecs.Lucene87; + +import org.apache.lucene.backward_codecs.lucene50.Lucene50RWCompoundFormat; +import org.apache.lucene.backward_codecs.lucene87.Lucene87Codec; +import org.apache.lucene.codecs.CompoundFormat; + +/** RW impersonation of {@link Lucene87Codec}. */ +public class Lucene87RWCodec extends Lucene87Codec { + + @Override + public final CompoundFormat compoundFormat() { + return new Lucene50RWCompoundFormat(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWCompoundFormat.java similarity index 97% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWCompoundFormat.java index 7c8ae370b6f..7e9b2a6abeb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWCompoundFormat.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene50; +package org.apache.lucene.backward_codecs.lucene50; import java.io.IOException; import org.apache.lucene.codecs.CodecUtil; @@ -63,7 +63,7 @@ import org.apache.lucene.store.IndexOutput; * files length, and a String with that file's name. * */ -public final class Lucene50CompoundFormat extends CompoundFormat { +public final class Lucene50RWCompoundFormat extends CompoundFormat { /** Extension of compound file */ static final String DATA_EXTENSION = "cfs"; @@ -76,7 +76,7 @@ public final class Lucene50CompoundFormat extends CompoundFormat { static final int VERSION_CURRENT = VERSION_START; /** Sole constructor. */ - public Lucene50CompoundFormat() {} + public Lucene50RWCompoundFormat() {} @Override public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50CompoundFormat.java similarity index 86% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50CompoundFormat.java index 15fdf171ef5..28624bf8b02 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50CompoundFormat.java @@ -14,17 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene50; +package org.apache.lucene.backward_codecs.lucene50; +import org.apache.lucene.backward_codecs.Lucene87.Lucene87RWCodec; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.BaseCompoundFormatTestCase; -import org.apache.lucene.util.TestUtil; public class TestLucene50CompoundFormat extends BaseCompoundFormatTestCase { - private final Codec codec = TestUtil.getDefaultCodec(); + ; @Override protected Codec getCodec() { - return codec; + return new Lucene87RWCodec(); } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWCodec.java index 16041ae9faf..7b44821765e 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWCodec.java @@ -16,8 +16,10 @@ */ package org.apache.lucene.backward_codecs.lucene70; +import org.apache.lucene.backward_codecs.lucene50.Lucene50RWCompoundFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50RWPostingsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50RWStoredFieldsFormat; +import org.apache.lucene.codecs.CompoundFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; @@ -58,4 +60,9 @@ public final class Lucene70RWCodec extends Lucene70Codec { public PostingsFormat postingsFormat() { return postingsFormat; } + + @Override + public CompoundFormat compoundFormat() { + return new Lucene50RWCompoundFormat(); + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene84/Lucene84RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene84/Lucene84RWCodec.java index dd08c5dd6cf..05736d94e00 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene84/Lucene84RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene84/Lucene84RWCodec.java @@ -16,9 +16,11 @@ */ package org.apache.lucene.backward_codecs.lucene84; +import org.apache.lucene.backward_codecs.lucene50.Lucene50RWCompoundFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50RWStoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60RWPointsFormat; import org.apache.lucene.backward_codecs.lucene70.Lucene70RWSegmentInfoFormat; +import org.apache.lucene.codecs.CompoundFormat; import org.apache.lucene.codecs.PointsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; @@ -40,4 +42,9 @@ public class Lucene84RWCodec extends Lucene84Codec { public StoredFieldsFormat storedFieldsFormat() { return new Lucene50RWStoredFieldsFormat(); } + + @Override + public final CompoundFormat compoundFormat() { + return new Lucene50RWCompoundFormat(); + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/Lucene86RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/Lucene86RWCodec.java index d9d3a49cbe3..c1d278f38d2 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/Lucene86RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/Lucene86RWCodec.java @@ -16,8 +16,10 @@ */ package org.apache.lucene.backward_codecs.lucene86; +import org.apache.lucene.backward_codecs.lucene50.Lucene50RWCompoundFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50RWStoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; +import org.apache.lucene.codecs.CompoundFormat; import org.apache.lucene.codecs.StoredFieldsFormat; /** RW impersonation of {@link Lucene86Codec}. */ @@ -39,4 +41,9 @@ public class Lucene86RWCodec extends Lucene86Codec { public StoredFieldsFormat storedFieldsFormat() { return storedFieldsFormat; } + + @Override + public final CompoundFormat compoundFormat() { + return new Lucene50RWCompoundFormat(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java index 625059259ec..3f84280600d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java @@ -30,7 +30,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.VectorFormat; -import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat; import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat; @@ -73,7 +72,7 @@ public class Lucene90Codec extends Codec { private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat(); private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat(); private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat(); - private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); + private final CompoundFormat compoundFormat = new Lucene90CompoundFormat(); private final PostingsFormat defaultFormat; private final PostingsFormat postingsFormat = diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java new file mode 100644 index 00000000000..d06802c8954 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene90; + +import java.io.IOException; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.CompoundDirectory; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; + +/** + * Lucene 9.0 compound file format + * + *

Files: + * + *

+ * + *

Description: + * + *

+ * + *

Notes: + * + *

+ */ +public final class Lucene90CompoundFormat extends CompoundFormat { + + /** Extension of compound file */ + static final String DATA_EXTENSION = "cfs"; + /** Extension of compound file entries */ + static final String ENTRIES_EXTENSION = "cfe"; + + static final String DATA_CODEC = "Lucene90CompoundData"; + static final String ENTRY_CODEC = "Lucene90CompoundEntries"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + /** Sole constructor. */ + public Lucene90CompoundFormat() {} + + @Override + public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) + throws IOException { + return new Lucene90CompoundReader(dir, si, context); + } + + @Override + public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException { + String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); + String entriesFile = IndexFileNames.segmentFileName(si.name, "", ENTRIES_EXTENSION); + + try (IndexOutput data = dir.createOutput(dataFile, context); + IndexOutput entries = dir.createOutput(entriesFile, context)) { + CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), ""); + CodecUtil.writeIndexHeader(entries, ENTRY_CODEC, VERSION_CURRENT, si.getId(), ""); + + writeCompoundFile(entries, data, dir, si); + + CodecUtil.writeFooter(data); + CodecUtil.writeFooter(entries); + } + } + + private void writeCompoundFile( + IndexOutput entries, IndexOutput data, Directory dir, SegmentInfo si) throws IOException { + // write number of files + entries.writeVInt(si.files().size()); + for (String file : si.files()) { + // write bytes for file + long startOffset = data.getFilePointer(); + try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) { + + // just copies the index header, verifying that its id matches what we expect + CodecUtil.verifyAndCopyIndexHeader(in, data, si.getId()); + + // copy all bytes except the footer + long numBytesToCopy = in.length() - CodecUtil.footerLength() - in.getFilePointer(); + data.copyBytes(in, numBytesToCopy); + + // verify footer (checksum) matches for the incoming file we are copying + long checksum = CodecUtil.checkFooter(in); + + // this is poached from CodecUtil.writeFooter, but we need to use our own checksum, not + // data.getChecksum(), but I think + // adding a public method to CodecUtil to do that is somewhat dangerous: + data.writeInt(CodecUtil.FOOTER_MAGIC); + data.writeInt(0); + data.writeLong(checksum); + } + long endOffset = data.getFilePointer(); + + long length = endOffset - startOffset; + + // write entry for file + entries.writeString(IndexFileNames.stripSegmentName(file)); + entries.writeLong(startOffset); + entries.writeLong(length); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java new file mode 100644 index 00000000000..cbf1e0df38f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene90; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.CompoundDirectory; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.IOUtils; + +/** + * Class for accessing a compound stream. This class implements a directory, but is limited to only + * read operations. Directory methods that would normally modify data throw an exception. + * + * @lucene.experimental + */ +final class Lucene90CompoundReader extends CompoundDirectory { + + /** Offset/Length for a slice inside of a compound file */ + public static final class FileEntry { + long offset; + long length; + } + + private final Directory directory; + private final String segmentName; + private final Map entries; + private final IndexInput handle; + private int version; + + /** Create a new CompoundFileDirectory. */ + // TODO: we should just pre-strip "entries" and append segment name up-front like simpletext? + // this need not be a "general purpose" directory anymore (it only writes index files) + public Lucene90CompoundReader(Directory directory, SegmentInfo si, IOContext context) + throws IOException { + this.directory = directory; + this.segmentName = si.name; + String dataFileName = + IndexFileNames.segmentFileName(segmentName, "", Lucene90CompoundFormat.DATA_EXTENSION); + String entriesFileName = + IndexFileNames.segmentFileName(segmentName, "", Lucene90CompoundFormat.ENTRIES_EXTENSION); + this.entries = readEntries(si.getId(), directory, entriesFileName); + boolean success = false; + + long expectedLength = CodecUtil.indexHeaderLength(Lucene90CompoundFormat.DATA_CODEC, ""); + for (Map.Entry ent : entries.entrySet()) { + expectedLength += ent.getValue().length; + } + expectedLength += CodecUtil.footerLength(); + + handle = directory.openInput(dataFileName, context); + try { + CodecUtil.checkIndexHeader( + handle, Lucene90CompoundFormat.DATA_CODEC, version, version, si.getId(), ""); + + // NOTE: data file is too costly to verify checksum against all the bytes on open, + // but for now we at least verify proper structure of the checksum footer: which looks + // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption + // such as file truncation. + CodecUtil.retrieveChecksum(handle); + + // We also validate length, because e.g. if you strip 16 bytes off the .cfs we otherwise + // would not detect it: + if (handle.length() != expectedLength) { + throw new CorruptIndexException( + "length should be " + expectedLength + " bytes, but is " + handle.length() + " instead", + handle); + } + + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(handle); + } + } + } + + /** Helper method that reads CFS entries from an input stream */ + private Map readEntries( + byte[] segmentID, Directory dir, String entriesFileName) throws IOException { + Map mapping = null; + try (ChecksumIndexInput entriesStream = + dir.openChecksumInput(entriesFileName, IOContext.READONCE)) { + Throwable priorE = null; + try { + version = + CodecUtil.checkIndexHeader( + entriesStream, + Lucene90CompoundFormat.ENTRY_CODEC, + Lucene90CompoundFormat.VERSION_START, + Lucene90CompoundFormat.VERSION_CURRENT, + segmentID, + ""); + + mapping = readMapping(entriesStream); + + } catch (Throwable exception) { + priorE = exception; + } finally { + CodecUtil.checkFooter(entriesStream, priorE); + } + } + return Collections.unmodifiableMap(mapping); + } + + private Map readMapping(IndexInput entriesStream) throws IOException { + final int numEntries = entriesStream.readVInt(); + Map mapping = new HashMap<>(numEntries); + for (int i = 0; i < numEntries; i++) { + final FileEntry fileEntry = new FileEntry(); + final String id = entriesStream.readString(); + FileEntry previous = mapping.put(id, fileEntry); + if (previous != null) { + throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS ", entriesStream); + } + fileEntry.offset = entriesStream.readLong(); + fileEntry.length = entriesStream.readLong(); + } + return mapping; + } + + @Override + public void close() throws IOException { + IOUtils.close(handle); + } + + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + ensureOpen(); + final String id = IndexFileNames.stripSegmentName(name); + final FileEntry entry = entries.get(id); + if (entry == null) { + String datFileName = + IndexFileNames.segmentFileName(segmentName, "", Lucene90CompoundFormat.DATA_EXTENSION); + throw new FileNotFoundException( + "No sub-file with id " + + id + + " found in compound file \"" + + datFileName + + "\" (fileName=" + + name + + " files: " + + entries.keySet() + + ")"); + } + return handle.slice(name, entry.offset, entry.length); + } + + /** Returns an array of strings, one for each file in the directory. */ + @Override + public String[] listAll() { + ensureOpen(); + String[] res = entries.keySet().toArray(new String[entries.size()]); + + // Add the segment name + for (int i = 0; i < res.length; i++) { + res[i] = segmentName + res[i]; + } + return res; + } + + /** + * Returns the length of a file in the directory. + * + * @throws IOException if the file does not exist + */ + @Override + public long fileLength(String name) throws IOException { + ensureOpen(); + FileEntry e = entries.get(IndexFileNames.stripSegmentName(name)); + if (e == null) throw new FileNotFoundException(name); + return e.length; + } + + @Override + public String toString() { + return "CompoundFileDirectory(segment=\"" + segmentName + "\" in dir=" + directory + ")"; + } + + @Override + public Set getPendingDeletions() { + return Collections.emptySet(); + } + + @Override + public void checkIntegrity() throws IOException { + CodecUtil.checksumEntireFile(handle); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java index b7a9d4ad268..4d34a40569d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java @@ -234,7 +234,7 @@ * Stores metadata about a segment * * - * {@link org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat Compound File} + * {@link org.apache.lucene.codecs.lucene90.Lucene90CompoundFormat Compound File} * .cfs, .cfe * An optional "virtual" file consisting of all the other index files for * systems that frequently run out of file handles. diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90CompoundFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90CompoundFormat.java new file mode 100644 index 00000000000..ed78abd345e --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90CompoundFormat.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene90; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.BaseCompoundFormatTestCase; +import org.apache.lucene.util.TestUtil; + +public class TestLucene90CompoundFormat extends BaseCompoundFormatTestCase { + private final Codec codec = TestUtil.getDefaultCodec(); + + @Override + protected Codec getCodec() { + return codec; + } +}