diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundFormat.java
new file mode 100644
index 00000000000..75d6912d41d
--- /dev/null
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundFormat.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.backward_codecs.lucene50;
+
+import java.io.IOException;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.CompoundDirectory;
+import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+/**
+ * Lucene 5.0 compound file format
+ *
+ *
Files:
+ *
+ *
+ * .cfs
: An optional "virtual" file consisting of all the other index files for
+ * systems that frequently run out of file handles.
+ * .cfe
: The "virtual" compound file's entry table holding all entries in the
+ * corresponding .cfs file.
+ *
+ *
+ * Description:
+ *
+ *
+ * - Compound (.cfs) --> Header, FileData FileCount, Footer
+ *
- Compound Entry Table (.cfe) --> Header, FileCount, <FileName, DataOffset,
+ * DataLength> FileCount
+ *
- Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
+ *
- FileCount --> {@link DataOutput#writeVInt VInt}
+ *
- DataOffset,DataLength,Checksum --> {@link DataOutput#writeLong UInt64}
+ *
- FileName --> {@link DataOutput#writeString String}
+ *
- FileData --> raw file data
+ *
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ *
+ *
+ * Notes:
+ *
+ *
+ * - FileCount indicates how many files are contained in this compound file. The entry table
+ * that follows has that many entries.
+ *
- Each directory entry contains a long pointer to the start of this file's data section, the
+ * files length, and a String with that file's name.
+ *
+ */
+public final class Lucene50CompoundFormat extends CompoundFormat {
+
+ /** Extension of compound file */
+ static final String DATA_EXTENSION = "cfs";
+ /** Extension of compound file entries */
+ static final String ENTRIES_EXTENSION = "cfe";
+
+ static final String DATA_CODEC = "Lucene50CompoundData";
+ static final String ENTRY_CODEC = "Lucene50CompoundEntries";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ /** Sole constructor. */
+ public Lucene50CompoundFormat() {}
+
+ @Override
+ public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context)
+ throws IOException {
+ return new Lucene50CompoundReader(dir, si, context);
+ }
+
+ @Override
+ public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException {
+ throw new UnsupportedOperationException("Old formats can't be used for writing");
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundReader.java
similarity index 99%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundReader.java
index 4c8eb846182..9ff51611991 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundReader.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.backward_codecs.lucene50;
import java.io.FileNotFoundException;
import java.io.IOException;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java
index e34502eae1f..e6e1d9e5e2d 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java
@@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene70;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
@@ -34,7 +35,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@@ -112,7 +112,7 @@ public class Lucene70Codec extends Codec {
}
@Override
- public final CompoundFormat compoundFormat() {
+ public CompoundFormat compoundFormat() {
return compoundFormat;
}
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java
index f39ffa74199..92b6a21ee63 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java
@@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene80;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
@@ -33,7 +34,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java
index 0b3ffb728dd..c476e9fbcf4 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java
@@ -17,6 +17,7 @@
package org.apache.lucene.backward_codecs.lucene84;
import java.util.Objects;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
@@ -36,7 +37,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
@@ -125,7 +125,7 @@ public class Lucene84Codec extends Codec {
}
@Override
- public final CompoundFormat compoundFormat() {
+ public CompoundFormat compoundFormat() {
return compoundFormat;
}
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java
index db025737ffb..b8659f74d87 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java
@@ -18,6 +18,7 @@
package org.apache.lucene.backward_codecs.lucene86;
import java.util.Objects;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
@@ -34,7 +35,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
@@ -126,7 +126,7 @@ public class Lucene86Codec extends Codec {
}
@Override
- public final CompoundFormat compoundFormat() {
+ public CompoundFormat compoundFormat() {
return compoundFormat;
}
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
index 8543de6b817..52bc76c899d 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
@@ -18,6 +18,7 @@
package org.apache.lucene.backward_codecs.lucene87;
import java.util.Objects;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
import org.apache.lucene.codecs.Codec;
@@ -33,7 +34,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
@@ -138,7 +138,7 @@ public class Lucene87Codec extends Codec {
}
@Override
- public final CompoundFormat compoundFormat() {
+ public CompoundFormat compoundFormat() {
return compoundFormat;
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/Lucene87/Lucene87RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/Lucene87/Lucene87RWCodec.java
new file mode 100644
index 00000000000..6467bc7c36e
--- /dev/null
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/Lucene87/Lucene87RWCodec.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.backward_codecs.Lucene87;
+
+import org.apache.lucene.backward_codecs.lucene50.Lucene50RWCompoundFormat;
+import org.apache.lucene.backward_codecs.lucene87.Lucene87Codec;
+import org.apache.lucene.codecs.CompoundFormat;
+
+/** RW impersonation of {@link Lucene87Codec}. */
+public class Lucene87RWCodec extends Lucene87Codec {
+
+ @Override
+ public final CompoundFormat compoundFormat() {
+ return new Lucene50RWCompoundFormat();
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWCompoundFormat.java
similarity index 97%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWCompoundFormat.java
index 7c8ae370b6f..7e9b2a6abeb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWCompoundFormat.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.backward_codecs.lucene50;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
@@ -63,7 +63,7 @@ import org.apache.lucene.store.IndexOutput;
* files length, and a String with that file's name.
*
*/
-public final class Lucene50CompoundFormat extends CompoundFormat {
+public final class Lucene50RWCompoundFormat extends CompoundFormat {
/** Extension of compound file */
static final String DATA_EXTENSION = "cfs";
@@ -76,7 +76,7 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
static final int VERSION_CURRENT = VERSION_START;
/** Sole constructor. */
- public Lucene50CompoundFormat() {}
+ public Lucene50RWCompoundFormat() {}
@Override
public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context)
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50CompoundFormat.java
similarity index 86%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50CompoundFormat.java
index 15fdf171ef5..28624bf8b02 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50CompoundFormat.java
@@ -14,17 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.backward_codecs.lucene50;
+import org.apache.lucene.backward_codecs.Lucene87.Lucene87RWCodec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseCompoundFormatTestCase;
-import org.apache.lucene.util.TestUtil;
public class TestLucene50CompoundFormat extends BaseCompoundFormatTestCase {
- private final Codec codec = TestUtil.getDefaultCodec();
+ ;
@Override
protected Codec getCodec() {
- return codec;
+ return new Lucene87RWCodec();
}
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWCodec.java
index 16041ae9faf..7b44821765e 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWCodec.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/Lucene70RWCodec.java
@@ -16,8 +16,10 @@
*/
package org.apache.lucene.backward_codecs.lucene70;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50RWCompoundFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50RWPostingsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50RWStoredFieldsFormat;
+import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
@@ -58,4 +60,9 @@ public final class Lucene70RWCodec extends Lucene70Codec {
public PostingsFormat postingsFormat() {
return postingsFormat;
}
+
+ @Override
+ public CompoundFormat compoundFormat() {
+ return new Lucene50RWCompoundFormat();
+ }
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene84/Lucene84RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene84/Lucene84RWCodec.java
index dd08c5dd6cf..05736d94e00 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene84/Lucene84RWCodec.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene84/Lucene84RWCodec.java
@@ -16,9 +16,11 @@
*/
package org.apache.lucene.backward_codecs.lucene84;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50RWCompoundFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50RWStoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene60.Lucene60RWPointsFormat;
import org.apache.lucene.backward_codecs.lucene70.Lucene70RWSegmentInfoFormat;
+import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
@@ -40,4 +42,9 @@ public class Lucene84RWCodec extends Lucene84Codec {
public StoredFieldsFormat storedFieldsFormat() {
return new Lucene50RWStoredFieldsFormat();
}
+
+ @Override
+ public final CompoundFormat compoundFormat() {
+ return new Lucene50RWCompoundFormat();
+ }
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/Lucene86RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/Lucene86RWCodec.java
index d9d3a49cbe3..c1d278f38d2 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/Lucene86RWCodec.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/Lucene86RWCodec.java
@@ -16,8 +16,10 @@
*/
package org.apache.lucene.backward_codecs.lucene86;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50RWCompoundFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50RWStoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
+import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
/** RW impersonation of {@link Lucene86Codec}. */
@@ -39,4 +41,9 @@ public class Lucene86RWCodec extends Lucene86Codec {
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}
+
+ @Override
+ public final CompoundFormat compoundFormat() {
+ return new Lucene50RWCompoundFormat();
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
index 625059259ec..3f84280600d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
@@ -30,7 +30,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
@@ -73,7 +72,7 @@ public class Lucene90Codec extends Codec {
private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat();
private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat();
private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat();
- private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
+ private final CompoundFormat compoundFormat = new Lucene90CompoundFormat();
private final PostingsFormat defaultFormat;
private final PostingsFormat postingsFormat =
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java
new file mode 100644
index 00000000000..d06802c8954
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.CompoundDirectory;
+import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Lucene 9.0 compound file format
+ *
+ * Files:
+ *
+ *
+ * .cfs
: An optional "virtual" file consisting of all the other index files for
+ * systems that frequently run out of file handles.
+ * .cfe
: The "virtual" compound file's entry table holding all entries in the
+ * corresponding .cfs file.
+ *
+ *
+ * Description:
+ *
+ *
+ * - Compound (.cfs) --> Header, FileData FileCount, Footer
+ *
- Compound Entry Table (.cfe) --> Header, FileCount, <FileName, DataOffset,
+ * DataLength> FileCount
+ *
- Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
+ *
- FileCount --> {@link DataOutput#writeVInt VInt}
+ *
- DataOffset,DataLength,Checksum --> {@link DataOutput#writeLong UInt64}
+ *
- FileName --> {@link DataOutput#writeString String}
+ *
- FileData --> raw file data
+ *
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ *
+ *
+ * Notes:
+ *
+ *
+ * - FileCount indicates how many files are contained in this compound file. The entry table
+ * that follows has that many entries.
+ *
- Each directory entry contains a long pointer to the start of this file's data section, the
+ * files length, and a String with that file's name.
+ *
+ */
+public final class Lucene90CompoundFormat extends CompoundFormat {
+
+ /** Extension of compound file */
+ static final String DATA_EXTENSION = "cfs";
+ /** Extension of compound file entries */
+ static final String ENTRIES_EXTENSION = "cfe";
+
+ static final String DATA_CODEC = "Lucene90CompoundData";
+ static final String ENTRY_CODEC = "Lucene90CompoundEntries";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ /** Sole constructor. */
+ public Lucene90CompoundFormat() {}
+
+ @Override
+ public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context)
+ throws IOException {
+ return new Lucene90CompoundReader(dir, si, context);
+ }
+
+ @Override
+ public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException {
+ String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
+ String entriesFile = IndexFileNames.segmentFileName(si.name, "", ENTRIES_EXTENSION);
+
+ try (IndexOutput data = dir.createOutput(dataFile, context);
+ IndexOutput entries = dir.createOutput(entriesFile, context)) {
+ CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), "");
+ CodecUtil.writeIndexHeader(entries, ENTRY_CODEC, VERSION_CURRENT, si.getId(), "");
+
+ writeCompoundFile(entries, data, dir, si);
+
+ CodecUtil.writeFooter(data);
+ CodecUtil.writeFooter(entries);
+ }
+ }
+
+ private void writeCompoundFile(
+ IndexOutput entries, IndexOutput data, Directory dir, SegmentInfo si) throws IOException {
+ // write number of files
+ entries.writeVInt(si.files().size());
+ for (String file : si.files()) {
+ // write bytes for file
+ long startOffset = data.getFilePointer();
+ try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) {
+
+ // just copies the index header, verifying that its id matches what we expect
+ CodecUtil.verifyAndCopyIndexHeader(in, data, si.getId());
+
+ // copy all bytes except the footer
+ long numBytesToCopy = in.length() - CodecUtil.footerLength() - in.getFilePointer();
+ data.copyBytes(in, numBytesToCopy);
+
+ // verify footer (checksum) matches for the incoming file we are copying
+ long checksum = CodecUtil.checkFooter(in);
+
+ // this is poached from CodecUtil.writeFooter, but we need to use our own checksum, not
+ // data.getChecksum(), but I think
+ // adding a public method to CodecUtil to do that is somewhat dangerous:
+ data.writeInt(CodecUtil.FOOTER_MAGIC);
+ data.writeInt(0);
+ data.writeLong(checksum);
+ }
+ long endOffset = data.getFilePointer();
+
+ long length = endOffset - startOffset;
+
+ // write entry for file
+ entries.writeString(IndexFileNames.stripSegmentName(file));
+ entries.writeLong(startOffset);
+ entries.writeLong(length);
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java
new file mode 100644
index 00000000000..cbf1e0df38f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.CompoundDirectory;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Class for accessing a compound stream. This class implements a directory, but is limited to only
+ * read operations. Directory methods that would normally modify data throw an exception.
+ *
+ * @lucene.experimental
+ */
+final class Lucene90CompoundReader extends CompoundDirectory {
+
+ /** Offset/Length for a slice inside of a compound file */
+ public static final class FileEntry {
+ long offset;
+ long length;
+ }
+
+ private final Directory directory;
+ private final String segmentName;
+ private final Map entries;
+ private final IndexInput handle;
+ private int version;
+
+ /** Create a new CompoundFileDirectory. */
+ // TODO: we should just pre-strip "entries" and append segment name up-front like simpletext?
+ // this need not be a "general purpose" directory anymore (it only writes index files)
+ public Lucene90CompoundReader(Directory directory, SegmentInfo si, IOContext context)
+ throws IOException {
+ this.directory = directory;
+ this.segmentName = si.name;
+ String dataFileName =
+ IndexFileNames.segmentFileName(segmentName, "", Lucene90CompoundFormat.DATA_EXTENSION);
+ String entriesFileName =
+ IndexFileNames.segmentFileName(segmentName, "", Lucene90CompoundFormat.ENTRIES_EXTENSION);
+ this.entries = readEntries(si.getId(), directory, entriesFileName);
+ boolean success = false;
+
+ long expectedLength = CodecUtil.indexHeaderLength(Lucene90CompoundFormat.DATA_CODEC, "");
+ for (Map.Entry ent : entries.entrySet()) {
+ expectedLength += ent.getValue().length;
+ }
+ expectedLength += CodecUtil.footerLength();
+
+ handle = directory.openInput(dataFileName, context);
+ try {
+ CodecUtil.checkIndexHeader(
+ handle, Lucene90CompoundFormat.DATA_CODEC, version, version, si.getId(), "");
+
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(handle);
+
+ // We also validate length, because e.g. if you strip 16 bytes off the .cfs we otherwise
+ // would not detect it:
+ if (handle.length() != expectedLength) {
+ throw new CorruptIndexException(
+ "length should be " + expectedLength + " bytes, but is " + handle.length() + " instead",
+ handle);
+ }
+
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(handle);
+ }
+ }
+ }
+
+ /** Helper method that reads CFS entries from an input stream */
+ private Map readEntries(
+ byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
+ Map mapping = null;
+ try (ChecksumIndexInput entriesStream =
+ dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
+ Throwable priorE = null;
+ try {
+ version =
+ CodecUtil.checkIndexHeader(
+ entriesStream,
+ Lucene90CompoundFormat.ENTRY_CODEC,
+ Lucene90CompoundFormat.VERSION_START,
+ Lucene90CompoundFormat.VERSION_CURRENT,
+ segmentID,
+ "");
+
+ mapping = readMapping(entriesStream);
+
+ } catch (Throwable exception) {
+ priorE = exception;
+ } finally {
+ CodecUtil.checkFooter(entriesStream, priorE);
+ }
+ }
+ return Collections.unmodifiableMap(mapping);
+ }
+
+ private Map readMapping(IndexInput entriesStream) throws IOException {
+ final int numEntries = entriesStream.readVInt();
+ Map mapping = new HashMap<>(numEntries);
+ for (int i = 0; i < numEntries; i++) {
+ final FileEntry fileEntry = new FileEntry();
+ final String id = entriesStream.readString();
+ FileEntry previous = mapping.put(id, fileEntry);
+ if (previous != null) {
+ throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS ", entriesStream);
+ }
+ fileEntry.offset = entriesStream.readLong();
+ fileEntry.length = entriesStream.readLong();
+ }
+ return mapping;
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.close(handle);
+ }
+
+ @Override
+ public IndexInput openInput(String name, IOContext context) throws IOException {
+ ensureOpen();
+ final String id = IndexFileNames.stripSegmentName(name);
+ final FileEntry entry = entries.get(id);
+ if (entry == null) {
+ String datFileName =
+ IndexFileNames.segmentFileName(segmentName, "", Lucene90CompoundFormat.DATA_EXTENSION);
+ throw new FileNotFoundException(
+ "No sub-file with id "
+ + id
+ + " found in compound file \""
+ + datFileName
+ + "\" (fileName="
+ + name
+ + " files: "
+ + entries.keySet()
+ + ")");
+ }
+ return handle.slice(name, entry.offset, entry.length);
+ }
+
+ /** Returns an array of strings, one for each file in the directory. */
+ @Override
+ public String[] listAll() {
+ ensureOpen();
+ String[] res = entries.keySet().toArray(new String[entries.size()]);
+
+ // Add the segment name
+ for (int i = 0; i < res.length; i++) {
+ res[i] = segmentName + res[i];
+ }
+ return res;
+ }
+
+ /**
+ * Returns the length of a file in the directory.
+ *
+ * @throws IOException if the file does not exist
+ */
+ @Override
+ public long fileLength(String name) throws IOException {
+ ensureOpen();
+ FileEntry e = entries.get(IndexFileNames.stripSegmentName(name));
+ if (e == null) throw new FileNotFoundException(name);
+ return e.length;
+ }
+
+ @Override
+ public String toString() {
+ return "CompoundFileDirectory(segment=\"" + segmentName + "\" in dir=" + directory + ")";
+ }
+
+ @Override
+ public Set getPendingDeletions() {
+ return Collections.emptySet();
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ CodecUtil.checksumEntireFile(handle);
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java
index b7a9d4ad268..4d34a40569d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java
@@ -234,7 +234,7 @@
* Stores metadata about a segment |
*
*
- * {@link org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat Compound File} |
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90CompoundFormat Compound File} |
* .cfs, .cfe |
* An optional "virtual" file consisting of all the other index files for
* systems that frequently run out of file handles. |
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90CompoundFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90CompoundFormat.java
new file mode 100644
index 00000000000..ed78abd345e
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90CompoundFormat.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseCompoundFormatTestCase;
+import org.apache.lucene.util.TestUtil;
+
+public class TestLucene90CompoundFormat extends BaseCompoundFormatTestCase {
+ private final Codec codec = TestUtil.getDefaultCodec();
+
+ @Override
+ protected Codec getCodec() {
+ return codec;
+ }
+}