diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e2c646c6121..0e16e670769 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -198,6 +198,9 @@ Optimizations * LUCENE-9395: ConstantValuesSource now shares a single DoubleValues instance across all segments (Tony Xu) +* LUCENE-9447: BEST_COMPRESSION now provides higher compression ratios on highly + compressible data. (Adrien Grand) + * LUCENE-9373: FunctionMatchQuery now accepts a "matchCost" optimization hint. (Maxim Glazkov, David Smiley) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java similarity index 91% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java index 035fbd9b065..6f3b162d07e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java @@ -100,7 +100,7 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter; * larger than (231 - 214) bytes. * @lucene.experimental */ -public final class Lucene50StoredFieldsFormat extends StoredFieldsFormat { +public class Lucene50StoredFieldsFormat extends StoredFieldsFormat { /** Configuration option for stored fields. */ public static enum Mode { @@ -126,7 +126,7 @@ public final class Lucene50StoredFieldsFormat extends StoredFieldsFormat { } @Override - public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { + public final StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { String value = si.getAttribute(MODE_KEY); if (value == null) { throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name); @@ -137,12 +137,7 @@ public final class Lucene50StoredFieldsFormat extends StoredFieldsFormat { @Override public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException { - String previous = si.putAttribute(MODE_KEY, mode.name()); - if (previous != null && previous.equals(mode.name()) == false) { - throw new IllegalStateException("found existing value for " + MODE_KEY + " for segment: " + si.name + - "old=" + previous + ", new=" + mode.name()); - } - return impl(mode).fieldsWriter(directory, si, context); + throw new UnsupportedOperationException("Old codecs may only be used for reading"); } StoredFieldsFormat impl(Mode mode) { diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene84/Lucene84Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene84/Lucene84Codec.java index bef563301ba..90918c163d2 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene84/Lucene84Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene84/Lucene84Codec.java @@ -97,7 +97,7 @@ public class Lucene84Codec extends Codec { } @Override - public final StoredFieldsFormat storedFieldsFormat() { + public StoredFieldsFormat storedFieldsFormat() { return storedFieldsFormat; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java similarity index 99% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java index 3f69874ef20..e2974655e75 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java @@ -97,7 +97,7 @@ public class Lucene86Codec extends Codec { } @Override - public final StoredFieldsFormat storedFieldsFormat() { + public StoredFieldsFormat storedFieldsFormat() { return storedFieldsFormat; } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/package.html new file mode 100644 index 00000000000..10560c624c2 --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/package.html @@ -0,0 +1,25 @@ + + + + + + + +Lucene 8.6 file format. + + diff --git a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec index cf7a945e133..d6732336efd 100644 --- a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -15,3 +15,4 @@ org.apache.lucene.codecs.lucene80.Lucene80Codec org.apache.lucene.codecs.lucene84.Lucene84Codec +org.apache.lucene.codecs.lucene86.Lucene86Codec diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWStoredFieldsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWStoredFieldsFormat.java new file mode 100644 index 00000000000..82d1c96cf25 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWStoredFieldsFormat.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene50; + +import java.io.IOException; + +import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +/** + * RW impersonation of Lucene50StoredFieldsFormat. + */ +public final class Lucene50RWStoredFieldsFormat extends Lucene50StoredFieldsFormat { + + /** No-argument constructor. */ + public Lucene50RWStoredFieldsFormat() { + super(); + } + + /** Constructor that takes a mode. */ + public Lucene50RWStoredFieldsFormat(Lucene50StoredFieldsFormat.Mode mode) { + super(mode); + } + + @Override + public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException { + String previous = si.putAttribute(MODE_KEY, mode.name()); + if (previous != null && previous.equals(mode.name()) == false) { + throw new IllegalStateException("found existing value for " + MODE_KEY + " for segment: " + si.name + + "old=" + previous + ", new=" + mode.name()); + } + return impl(mode).fieldsWriter(directory, si, context); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java similarity index 92% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java index 4c7bed478c0..fec9e43f4e2 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java @@ -18,12 +18,12 @@ package org.apache.lucene.codecs.lucene50; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene86.Lucene86RWCodec; import org.apache.lucene.index.BaseStoredFieldsFormatTestCase; -import org.apache.lucene.util.TestUtil; public class TestLucene50StoredFieldsFormat extends BaseStoredFieldsFormatTestCase { @Override protected Codec getCodec() { - return TestUtil.getDefaultCodec(); + return new Lucene86RWCodec(); } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java similarity index 91% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java index cccee736d46..41b4b845428 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java @@ -20,7 +20,7 @@ package org.apache.lucene.codecs.lucene50; import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; -import org.apache.lucene.codecs.lucene86.Lucene86Codec; +import org.apache.lucene.codecs.lucene86.Lucene86RWCodec; import org.apache.lucene.document.Document; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.BaseStoredFieldsFormatTestCase; @@ -32,7 +32,7 @@ import org.apache.lucene.store.Directory; public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase { @Override protected Codec getCodec() { - return new Lucene86Codec(Mode.BEST_COMPRESSION); + return new Lucene86RWCodec(Mode.BEST_COMPRESSION); } /** @@ -43,7 +43,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie Directory dir = newDirectory(); for (int i = 0; i < 10; i++) { IndexWriterConfig iwc = newIndexWriterConfig(); - iwc.setCodec(new Lucene86Codec(RandomPicks.randomFrom(random(), Mode.values()))); + iwc.setCodec(new Lucene86RWCodec(RandomPicks.randomFrom(random(), Mode.values()))); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig()); Document doc = new Document(); doc.add(new StoredField("field1", "value1")); @@ -70,7 +70,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie public void testInvalidOptions() { expectThrows(NullPointerException.class, () -> { - new Lucene86Codec(null); + new Lucene86RWCodec(null); }); expectThrows(NullPointerException.class, () -> { diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java similarity index 100% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene70/TestLucene70SegmentInfoFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene70/TestLucene70SegmentInfoFormat.java index ac516a121ef..d9dd0198a51 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene70/TestLucene70SegmentInfoFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene70/TestLucene70SegmentInfoFormat.java @@ -18,8 +18,7 @@ package org.apache.lucene.codecs.lucene70; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.FilterCodec; -import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.lucene84.Lucene84RWCodec; import org.apache.lucene.index.BaseSegmentInfoFormatTestCase; import org.apache.lucene.util.Version; @@ -32,11 +31,6 @@ public class TestLucene70SegmentInfoFormat extends BaseSegmentInfoFormatTestCase @Override protected Codec getCodec() { - return new FilterCodec("Lucene84", Codec.forName("Lucene84")) { - @Override - public SegmentInfoFormat segmentInfoFormat() { - return new Lucene70RWSegmentInfoFormat(); - } - }; + return new Lucene84RWCodec(); } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java index c1fd4677f92..0f74e79f508 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java @@ -18,6 +18,8 @@ package org.apache.lucene.codecs.lucene84; import org.apache.lucene.codecs.PointsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50RWStoredFieldsFormat; import org.apache.lucene.codecs.lucene60.Lucene60RWPointsFormat; import org.apache.lucene.codecs.lucene70.Lucene70RWSegmentInfoFormat; @@ -36,4 +38,9 @@ public class Lucene84RWCodec extends Lucene84Codec { return new Lucene70RWSegmentInfoFormat(); } + @Override + public StoredFieldsFormat storedFieldsFormat() { + return new Lucene50RWStoredFieldsFormat(); + } + } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene86/Lucene86RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene86/Lucene86RWCodec.java new file mode 100644 index 00000000000..72e2bee7e07 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene86/Lucene86RWCodec.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene86; + +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50RWStoredFieldsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; + +/** + * RW impersonation of {@link Lucene86Codec}. + */ +public class Lucene86RWCodec extends Lucene86Codec { + + private final StoredFieldsFormat storedFieldsFormat; + + /** No arguments constructor. */ + public Lucene86RWCodec() { + storedFieldsFormat = new Lucene50RWStoredFieldsFormat(); + } + + /** Constructor that takes a mode. */ + public Lucene86RWCodec(Lucene50StoredFieldsFormat.Mode mode) { + storedFieldsFormat = new Lucene50RWStoredFieldsFormat(mode); + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; + } + +} diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java index db64781cff7..e44b046aa29 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java @@ -29,7 +29,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene86.Lucene86Codec; +import org.apache.lucene.codecs.lucene87.Lucene87Codec; import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexDeletionPolicy; @@ -138,7 +138,7 @@ public class CreateIndexTask extends PerfTask { if (defaultCodec == null && postingsFormat != null) { try { final PostingsFormat postingsFormatChosen = PostingsFormat.forName(postingsFormat); - iwConf.setCodec(new Lucene86Codec() { + iwConf.setCodec(new Lucene87Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postingsFormatChosen; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java index 8b5ca14ff89..14fa7935f9f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java @@ -57,7 +57,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { } // TODO: should we use this, or maybe a system property is better? - static Codec defaultCodec = LOADER.lookup("Lucene86"); + static Codec defaultCodec = LOADER.lookup("Lucene87"); } private final String name; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java index 00412d5473c..9b65fb4fe6d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java @@ -20,6 +20,7 @@ package org.apache.lucene.codecs.lucene50; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.compressing.FieldsIndexWriter; +import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat; import org.apache.lucene.codecs.compressing.CompressingTermVectorsFormat; import org.apache.lucene.codecs.compressing.CompressionMode; import org.apache.lucene.store.DataOutput; @@ -29,7 +30,7 @@ import org.apache.lucene.util.packed.PackedInts; /** * Lucene 5.0 {@link TermVectorsFormat term vectors format}. *

- * Very similarly to {@link Lucene50StoredFieldsFormat}, this format is based + * Very similarly to {@link Lucene87StoredFieldsFormat}, this format is based * on compressed chunks of data, with document-level granularity so that a * document can never span across distinct chunks. Moreover, data is made as * compact as possible: