diff --git a/dev-tools/maven/lucene/backward-codecs/pom.xml.template b/dev-tools/maven/lucene/backward-codecs/pom.xml.template new file mode 100644 index 00000000000..fbdd619287f --- /dev/null +++ b/dev-tools/maven/lucene/backward-codecs/pom.xml.template @@ -0,0 +1,100 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../pom.xml + + org.apache.lucene + lucene-backward-codecs + jar + Lucene Memory + + Codecs for older versions of Lucene. + + + lucene/backward-codecs + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + org.apache.lucene + lucene-test-framework + test + +@lucene-backward-codecs.internal.dependencies@ +@lucene-backward-codecs.external.dependencies@ +@lucene-backward-codecs.internal.test.dependencies@ +@lucene-backward-codecs.external.test.dependencies@ + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + jdk-system-out + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + check + + + + + + + diff --git a/dev-tools/maven/lucene/pom.xml.template b/dev-tools/maven/lucene/pom.xml.template index 6709d3b227d..e7551c4af04 100644 --- a/dev-tools/maven/lucene/pom.xml.template +++ b/dev-tools/maven/lucene/pom.xml.template @@ -41,6 +41,7 @@ core + backward-codecs codecs test-framework analysis diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index 1b7a79e4448..69959dccb35 100644 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -669,7 +669,7 @@ def verifyUnpacked(java, project, artifact, unpackPath, svnRevision, version, te if project == 'lucene': # TODO: clean this up to not be a list of modules that we must maintain - extras = ('analysis', 'benchmark', 'classification', 'codecs', 'core', 'demo', 'docs', 'expressions', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'replicator', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses') + extras = ('analysis', 'backward-codecs', 'benchmark', 'classification', 'codecs', 'core', 'demo', 'docs', 'expressions', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'replicator', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses') if isSrc: extras += ('build.xml', 'common-build.xml', 'module-build.xml', 'ivy-settings.xml', 'ivy-versions.properties', 'ivy-ignore-conflicts.properties', 'version.properties', 'backwards', 'tools', 'site') else: diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 37dd9b8855c..cb2ec69b9fe 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -85,6 +85,9 @@ Other * LUCENE-5563: Removed sep layout: which has fallen behind on features and doesn't perform as well as other options. (Robert Muir) +* LUCENE-5858: Moved compatibility codecs to 'lucene-backward-codecs.jar'. + (Adrien Grand, Robert Muir) + ======================= Lucene 4.11.0 ====================== New Features diff --git a/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationDocValuesField.java b/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationDocValuesField.java index 692f2f60930..3446fcd3c94 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationDocValuesField.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationDocValuesField.java @@ -41,12 +41,10 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; /** * trivial test of CollationDocValuesField */ -@SuppressCodecs("Lucene3x") public class TestCollationDocValuesField extends LuceneTestCase { public void testBasic() throws Exception { diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java b/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java index 59fdb12c550..dff883bbf44 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java @@ -38,7 +38,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import com.ibm.icu.text.Collator; import com.ibm.icu.util.ULocale; @@ -46,7 +45,6 @@ import com.ibm.icu.util.ULocale; /** * trivial test of ICUCollationDocValuesField */ -@SuppressCodecs("Lucene3x") public class TestICUCollationDocValuesField extends LuceneTestCase { public void testBasic() throws Exception { diff --git a/lucene/backward-codecs/build.xml b/lucene/backward-codecs/build.xml new file mode 100644 index 00000000000..3de2979484b --- /dev/null +++ b/lucene/backward-codecs/build.xml @@ -0,0 +1,26 @@ + + + + + + Codecs for older versions of Lucene. + + + + + diff --git a/lucene/backward-codecs/ivy.xml b/lucene/backward-codecs/ivy.xml new file mode 100644 index 00000000000..6d86d6a0c6e --- /dev/null +++ b/lucene/backward-codecs/ivy.xml @@ -0,0 +1,21 @@ + + + + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/package.html similarity index 92% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/package.html rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/package.html index f9d00126b29..7959cc0f464 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/package.html +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/package.html @@ -20,6 +20,6 @@ -Support for testing {@link org.apache.lucene.codecs.lucene41.Lucene41Codec}. +Lucene 4.0 file format. diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/package.html similarity index 91% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/package.html rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/package.html index 85480b8ce6d..abea0c2767e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/package.html +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/package.html @@ -20,6 +20,6 @@ -Support for testing {@link org.apache.lucene.codecs.lucene45.Lucene45Codec}. +Lucene 4.1 file format. - \ No newline at end of file + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsProducer.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsProducer.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsProducer.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/package.html similarity index 91% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/package.html rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/package.html index f1c62d1e049..48043b37609 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/package.html +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/package.html @@ -20,6 +20,6 @@ -Support for testing {@link org.apache.lucene.codecs.lucene42.Lucene42Codec}. +Lucene 4.2 file format. - \ No newline at end of file + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene46/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/package.html similarity index 91% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene46/package.html rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/package.html index 8abed4c2b1d..62c180700ff 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene46/package.html +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/package.html @@ -20,6 +20,6 @@ -Support for testing {@link org.apache.lucene.codecs.lucene46.Lucene46Codec}. +Lucene 4.5 file format. - \ No newline at end of file + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46Codec.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46Codec.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46Codec.java diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/package.html new file mode 100644 index 00000000000..8acd7aa10e6 --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/package.html @@ -0,0 +1,25 @@ + + + + + + + +Lucene 4.6 file format. + + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesFormat.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java similarity index 100% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/package.html new file mode 100644 index 00000000000..35c7c0941b1 --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/package.html @@ -0,0 +1,25 @@ + + + + + + + +Lucene 4.9 file format. + + diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene49/package.html b/lucene/backward-codecs/src/java/overview.html similarity index 76% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene49/package.html rename to lucene/backward-codecs/src/java/overview.html index 30924a60f21..dc05227b294 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene49/package.html +++ b/lucene/backward-codecs/src/java/overview.html @@ -1,4 +1,3 @@ - - - - - -Support for testing {@link org.apache.lucene.codecs.lucene49.Lucene49Codec}. - + + Apache Lucene backward codecs + + + Codecs for reading indexes from older versions of Lucene. + \ No newline at end of file diff --git a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec new file mode 100644 index 00000000000..ccb80e45c7b --- /dev/null +++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.lucene40.Lucene40Codec +org.apache.lucene.codecs.lucene41.Lucene41Codec +org.apache.lucene.codecs.lucene42.Lucene42Codec +org.apache.lucene.codecs.lucene45.Lucene45Codec +org.apache.lucene.codecs.lucene46.Lucene46Codec +org.apache.lucene.codecs.lucene49.Lucene49Codec diff --git a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat new file mode 100644 index 00000000000..01ce305b441 --- /dev/null +++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat +org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat +org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat diff --git a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat new file mode 100644 index 00000000000..112a1698302 --- /dev/null +++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java similarity index 98% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java index 2f636857898..67dccf6c965 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java @@ -52,6 +52,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer { @Override public void addNumericField(FieldInfo field, Iterable values) throws IOException { + if (field.getDocValuesGen() != -1) { + throw new UnsupportedOperationException("4.0 does not support dv updates"); + } // examine the values to determine best type to use long minValue = Long.MAX_VALUE; long maxValue = Long.MIN_VALUE; @@ -154,6 +157,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer { @Override public void addBinaryField(FieldInfo field, Iterable values) throws IOException { + if (field.getDocValuesGen() != -1) { + throw new UnsupportedOperationException("4.0 does not support dv updates"); + } // examine the values to determine best type to use HashSet uniqueValues = new HashSet<>(); int minLength = Integer.MAX_VALUE; @@ -395,6 +401,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer { @Override public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) throws IOException { + if (field.getDocValuesGen() != -1) { + throw new UnsupportedOperationException("4.0 does not support dv updates"); + } // examine the values to determine best type to use int minLength = Integer.MAX_VALUE; int maxLength = Integer.MIN_VALUE; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java similarity index 97% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java index 125e0b9531b..b9bd799d395 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java @@ -46,6 +46,9 @@ public class Lucene40FieldInfosWriter extends FieldInfosWriter { @Override public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { + if (!segmentSuffix.isEmpty()) { + throw new UnsupportedOperationException("4.0 does not support fieldinfo updates"); + } final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); IndexOutput output = directory.createOutput(fileName, context); boolean success = false; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java similarity index 100% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java similarity index 89% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java index 2052ae26883..c591e4e2e00 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java @@ -6,6 +6,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.util.LuceneTestCase; @@ -34,11 +35,7 @@ public final class Lucene40RWCodec extends Lucene40Codec { private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() { @Override public FieldInfosWriter getFieldInfosWriter() throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return super.getFieldInfosWriter(); - } else { - return new Lucene40FieldInfosWriter(); - } + return new Lucene40FieldInfosWriter(); } }; @@ -46,6 +43,7 @@ public final class Lucene40RWCodec extends Lucene40Codec { private final NormsFormat norms = new Lucene40RWNormsFormat(); private final StoredFieldsFormat stored = new Lucene40RWStoredFieldsFormat(); private final TermVectorsFormat vectors = new Lucene40RWTermVectorsFormat(); + private final PostingsFormat postings = new Lucene40RWPostingsFormat(); @Override public FieldInfosFormat fieldInfosFormat() { @@ -71,4 +69,9 @@ public final class Lucene40RWCodec extends Lucene40Codec { public TermVectorsFormat termVectorsFormat() { return vectors; } + + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return postings; + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java similarity index 79% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java index 64b7f02aeee..b4b919fdde6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.util.LuceneTestCase; /** Read-write version of {@link Lucene40DocValuesFormat} for testing */ @SuppressWarnings("deprecation") @@ -30,13 +29,9 @@ public class Lucene40RWDocValuesFormat extends Lucene40DocValuesFormat { @Override public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return super.fieldsConsumer(state); - } else { - String filename = IndexFileNames.segmentFileName(state.segmentInfo.name, + String filename = IndexFileNames.segmentFileName(state.segmentInfo.name, "dv", IndexFileNames.COMPOUND_FILE_EXTENSION); - return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY); - } + return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java similarity index 63% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java index c7ea1daae69..227aeaba330 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java @@ -23,7 +23,6 @@ import org.apache.lucene.codecs.NormsConsumer; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.util.LuceneTestCase; /** Read-write version of {@link Lucene40NormsFormat} for testing */ @SuppressWarnings("deprecation") @@ -31,24 +30,20 @@ public class Lucene40RWNormsFormat extends Lucene40NormsFormat { @Override public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return super.normsConsumer(state); - } else { - String filename = IndexFileNames.segmentFileName(state.segmentInfo.name, - "nrm", - IndexFileNames.COMPOUND_FILE_EXTENSION); - final Lucene40DocValuesWriter impl = new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY); - return new NormsConsumer() { - @Override - public void addNormsField(FieldInfo field, Iterable values) throws IOException { - impl.addNumericField(field, values); - } - - @Override - public void close() throws IOException { - impl.close(); - } - }; - } + String filename = IndexFileNames.segmentFileName(state.segmentInfo.name, + "nrm", + IndexFileNames.COMPOUND_FILE_EXTENSION); + final Lucene40DocValuesWriter impl = new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY); + return new NormsConsumer() { + @Override + public void addNormsField(FieldInfo field, Iterable values) throws IOException { + impl.addNumericField(field, values); + } + + @Override + public void close() throws IOException { + impl.close(); + } + }; } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java similarity index 68% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java index 32437443046..c0baaf654fc 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java @@ -33,24 +33,20 @@ public class Lucene40RWPostingsFormat extends Lucene40PostingsFormat { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return super.fieldsConsumer(state); - } else { - PostingsWriterBase docs = new Lucene40PostingsWriter(state); - - // TODO: should we make the terms index more easily - // pluggable? Ie so that this codec would record which - // index impl was used, and switch on loading? - // Or... you must make a new Codec for this? - boolean success = false; - try { - FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize); - success = true; - return ret; - } finally { - if (!success) { - docs.close(); - } + PostingsWriterBase docs = new Lucene40PostingsWriter(state); + + // TODO: should we make the terms index more easily + // pluggable? Ie so that this codec would record which + // index impl was used, and switch on loading? + // Or... you must make a new Codec for this? + boolean success = false; + try { + FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize); + success = true; + return ret; + } finally { + if (!success) { + docs.close(); } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWStoredFieldsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWStoredFieldsFormat.java similarity index 82% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWStoredFieldsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWStoredFieldsFormat.java index f0d430fa96b..58d977092b0 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWStoredFieldsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWStoredFieldsFormat.java @@ -23,7 +23,6 @@ import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.LuceneTestCase; /** * Simulates writing Lucene 4.0 Stored Fields Format. @@ -32,10 +31,6 @@ public class Lucene40RWStoredFieldsFormat extends Lucene40StoredFieldsFormat { @Override public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - throw new UnsupportedOperationException("this codec can only be used for reading"); - } else { - return new Lucene40StoredFieldsWriter(directory, si.name, context); - } + return new Lucene40StoredFieldsWriter(directory, si.name, context); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWTermVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWTermVectorsFormat.java similarity index 84% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWTermVectorsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWTermVectorsFormat.java index 81c2ac3e980..a6a3e244961 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWTermVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWTermVectorsFormat.java @@ -32,10 +32,6 @@ public class Lucene40RWTermVectorsFormat extends Lucene40TermVectorsFormat { @Override public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - throw new UnsupportedOperationException("this codec can only be used for reading"); - } else { - return new Lucene40TermVectorsWriter(directory, segmentInfo.name, context); - } + return new Lucene40TermVectorsWriter(directory, segmentInfo.name, context); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java similarity index 100% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java similarity index 100% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java similarity index 100% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestBitVector.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestBitVector.java similarity index 100% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestBitVector.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestBitVector.java diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java similarity index 80% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java index 048f05a3f64..1f138d0b446 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java @@ -19,18 +19,12 @@ package org.apache.lucene.codecs.lucene40; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.BaseDocValuesFormatTestCase; -import org.junit.BeforeClass; /** * Tests Lucene40DocValuesFormat */ public class TestLucene40DocValuesFormat extends BaseDocValuesFormatTestCase { private final Codec codec = new Lucene40RWCodec(); - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } @Override protected Codec getCodec() { @@ -43,4 +37,20 @@ public class TestLucene40DocValuesFormat extends BaseDocValuesFormatTestCase { return false; } + // this codec doesnt support missing (its the same as empty string) + @Override + protected boolean codecSupportsDocsWithField() { + return false; + } + + @Override + protected boolean codecSupportsSortedSet() { + return false; + } + + @Override + protected boolean codecSupportsSortedNumeric() { + return false; + } + } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java similarity index 86% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java index f3fb65afc70..51b72558072 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java @@ -19,8 +19,6 @@ package org.apache.lucene.codecs.lucene40; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.BaseNormsFormatTestCase; -import org.junit.BeforeClass; - /** Tests Lucene40's norms format */ public class TestLucene40NormsFormat extends BaseNormsFormatTestCase { @@ -30,9 +28,4 @@ public class TestLucene40NormsFormat extends BaseNormsFormatTestCase { protected Codec getCodec() { return codec; } - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java similarity index 87% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java index a3deacc022e..a742d588575 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java @@ -19,18 +19,12 @@ package org.apache.lucene.codecs.lucene40; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; -import org.junit.BeforeClass; /** * Tests Lucene40PostingsFormat */ public class TestLucene40PostingsFormat extends BasePostingsFormatTestCase { private final Codec codec = new Lucene40RWCodec(); - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } @Override protected Codec getCodec() { diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsReader.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsReader.java similarity index 94% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsReader.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsReader.java index a3f6bc90d49..3dc2c532b11 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsReader.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsReader.java @@ -21,7 +21,6 @@ import java.util.ArrayList; import java.util.Collections; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -34,7 +33,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; -import org.junit.BeforeClass; public class TestLucene40PostingsReader extends LuceneTestCase { static final String terms[] = new String[100]; @@ -43,11 +41,6 @@ public class TestLucene40PostingsReader extends LuceneTestCase { terms[i] = Integer.toString(i+1); } } - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } /** tests terms with different probabilities of being in the document. * depends heavily on term vectors cross-check at checkIndex @@ -55,7 +48,7 @@ public class TestLucene40PostingsReader extends LuceneTestCase { public void testPostings() throws Exception { Directory dir = newFSDirectory(createTempDir("postings")); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); - iwc.setCodec(Codec.forName("Lucene40")); + iwc.setCodec(new Lucene40RWCodec()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40StoredFieldsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40StoredFieldsFormat.java similarity index 88% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40StoredFieldsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40StoredFieldsFormat.java index 2502d895db8..b7e4bda5733 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40StoredFieldsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40StoredFieldsFormat.java @@ -23,11 +23,6 @@ import org.junit.BeforeClass; public class TestLucene40StoredFieldsFormat extends BaseStoredFieldsFormatTestCase { - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } - @Override protected Codec getCodec() { return new Lucene40RWCodec(); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java similarity index 88% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java index e97b3b3e6d9..0d4146189d0 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java @@ -22,11 +22,6 @@ import org.apache.lucene.index.BaseTermVectorsFormatTestCase; import org.junit.BeforeClass; public class TestLucene40TermVectorsFormat extends BaseTermVectorsFormatTestCase { - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } @Override protected Codec getCodec() { diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java similarity index 98% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java index 9ac520dc577..1bbdd8f43a5 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java @@ -42,11 +42,6 @@ import org.junit.BeforeClass; // TODO: really this should be in BaseTestPF or somewhere else? useful test! public class TestReuseDocsEnum extends LuceneTestCase { - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } public void testReuseDocsEnumNoReuse() throws IOException { Directory dir = newDirectory(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java similarity index 93% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java index c5a1cc9d695..c84a5c1a3b2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java @@ -41,11 +41,7 @@ public class Lucene41RWCodec extends Lucene41Codec { private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() { @Override public FieldInfosWriter getFieldInfosWriter() throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return super.getFieldInfosWriter(); - } else { - return new Lucene40FieldInfosWriter(); - } + return new Lucene40FieldInfosWriter(); } }; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java similarity index 97% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java index 48762c51843..c0ed76bae23 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java @@ -87,6 +87,9 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer { @Override public void addNumericField(FieldInfo field, Iterable values) throws IOException { + if (field.getDocValuesGen() != -1) { + throw new UnsupportedOperationException("4.2 does not support dv updates"); + } addNumericField(field, values, true); } @@ -209,6 +212,9 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer { @Override public void addBinaryField(FieldInfo field, final Iterable values) throws IOException { + if (field.getDocValuesGen() != -1) { + throw new UnsupportedOperationException("4.2 does not support dv updates"); + } // write the byte[] data meta.writeVInt(field.number); meta.writeByte(BYTES); @@ -270,6 +276,9 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer { @Override public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) throws IOException { + if (field.getDocValuesGen() != -1) { + throw new UnsupportedOperationException("4.2 does not support dv updates"); + } // three cases for simulating the old writer: // 1. no missing // 2. missing (and empty string in use): remap ord=-1 -> ord=0 @@ -307,6 +316,7 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer { // note: this might not be the most efficient... but its fairly simple @Override public void addSortedSetField(FieldInfo field, Iterable values, final Iterable docToOrdCount, final Iterable ords) throws IOException { + assert field.getDocValuesGen() == -1; // write the ordinals as a binary field addBinaryField(field, new Iterable() { @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java similarity index 96% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java index e3f7b6cd60c..7ad74a106a4 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java @@ -46,6 +46,9 @@ public final class Lucene42FieldInfosWriter extends FieldInfosWriter { @Override public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { + if (!segmentSuffix.isEmpty()) { + throw new UnsupportedOperationException("4.2 does not support fieldinfo updates"); + } final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); IndexOutput output = directory.createOutput(fileName, context); boolean success = false; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java similarity index 100% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java similarity index 91% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java index 68f3859278f..331bacb3535 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java @@ -37,11 +37,7 @@ public class Lucene42RWCodec extends Lucene42Codec { private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat() { @Override public FieldInfosWriter getFieldInfosWriter() throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return super.getFieldInfosWriter(); - } else { - return new Lucene42FieldInfosWriter(); - } + return new Lucene42FieldInfosWriter(); } }; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java similarity index 78% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java index 569ba9e8881..f4401d651e0 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWDocValuesFormat.java @@ -31,11 +31,7 @@ public class Lucene42RWDocValuesFormat extends Lucene42DocValuesFormat { @Override public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return super.fieldsConsumer(state); - } else { - // note: we choose DEFAULT here (its reasonably fast, and for small bpv has tiny waste) - return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio); - } + // note: we choose DEFAULT here (its reasonably fast, and for small bpv has tiny waste) + return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWNormsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWNormsFormat.java similarity index 82% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWNormsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWNormsFormat.java index fbca416e553..be62482a630 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWNormsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWNormsFormat.java @@ -30,10 +30,6 @@ public class Lucene42RWNormsFormat extends Lucene42NormsFormat { @Override public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException { - if (LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return new Lucene42NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio); - } else { - return super.normsConsumer(state); - } + return new Lucene42NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio); } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java similarity index 84% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java index a7796c47b46..2ebdfc12dc5 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java @@ -26,11 +26,6 @@ import org.junit.BeforeClass; */ public class TestLucene42DocValuesFormat extends BaseCompressingDocValuesFormatTestCase { private final Codec codec = new Lucene42RWCodec(); - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } @Override protected Codec getCodec() { @@ -41,4 +36,15 @@ public class TestLucene42DocValuesFormat extends BaseCompressingDocValuesFormatT protected boolean codecAcceptsHugeBinaryValues(String field) { return false; } + + // this codec doesnt support missing (its the same as empty string) + @Override + protected boolean codecSupportsDocsWithField() { + return false; + } + + @Override + protected boolean codecSupportsSortedNumeric() { + return false; + } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java similarity index 89% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java index 9ee7827127a..7485a1ce8e8 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java @@ -30,9 +30,4 @@ public class TestLucene42NormsFormat extends BaseNormsFormatTestCase { protected Codec getCodec() { return codec; } - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java similarity index 90% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java index 0263b3933a0..959cae73661 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java @@ -26,7 +26,6 @@ import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat; import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosWriter; import org.apache.lucene.codecs.lucene42.Lucene42RWNormsFormat; -import org.apache.lucene.util.LuceneTestCase; /** * Read-write version of {@link Lucene45Codec} for testing. @@ -37,11 +36,7 @@ public class Lucene45RWCodec extends Lucene45Codec { private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat() { @Override public FieldInfosWriter getFieldInfosWriter() throws IOException { - if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return super.getFieldInfosWriter(); - } else { - return new Lucene42FieldInfosWriter(); - } + return new Lucene42FieldInfosWriter(); } }; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWDocValuesFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWDocValuesFormat.java similarity index 76% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWDocValuesFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWDocValuesFormat.java index 87a6dd3aa88..c7ca97a6d60 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWDocValuesFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWDocValuesFormat.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.util.LuceneTestCase; /** * Read-write version of {@link Lucene45DocValuesFormat} for testing. @@ -31,15 +30,11 @@ public class Lucene45RWDocValuesFormat extends Lucene45DocValuesFormat { @Override public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - if (LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return new Lucene45DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION) { - @Override - void checkCanWrite(FieldInfo field) { - // allow writing all fields - } - }; - } else { - return super.fieldsConsumer(state); - } + return new Lucene45DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION) { + @Override + void checkCanWrite(FieldInfo field) { + // allow writing all fields + } + }; } } \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java similarity index 89% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java index 67654da5549..f55c65c8d01 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/TestLucene45DocValuesFormat.java @@ -27,13 +27,13 @@ import org.junit.BeforeClass; public class TestLucene45DocValuesFormat extends BaseCompressingDocValuesFormatTestCase { private final Codec codec = new Lucene45RWCodec(); - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } - @Override protected Codec getCodec() { return codec; } + + @Override + protected boolean codecSupportsSortedNumeric() { + return false; + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene46/Lucene46RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46RWCodec.java similarity index 100% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene46/Lucene46RWCodec.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46RWCodec.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene49/Lucene49RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWCodec.java similarity index 100% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene49/Lucene49RWCodec.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWCodec.java diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene49/Lucene49RWDocValuesFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWDocValuesFormat.java similarity index 78% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene49/Lucene49RWDocValuesFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWDocValuesFormat.java index 53c1fe58eeb..67ae4841abc 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene49/Lucene49RWDocValuesFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWDocValuesFormat.java @@ -29,16 +29,12 @@ public class Lucene49RWDocValuesFormat extends Lucene49DocValuesFormat { @Override public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - if (LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { - return new Lucene49DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION) { - @Override - void checkCanWrite(FieldInfo field) { - // allow writing all fields - } - }; - } else { - return super.fieldsConsumer(state); - } + return new Lucene49DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION) { + @Override + void checkCanWrite(FieldInfo field) { + // allow writing all fields + } + }; } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49DocValuesFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/TestLucene49DocValuesFormat.java similarity index 89% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49DocValuesFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/TestLucene49DocValuesFormat.java index c6bb58c81ff..5873f7ea10f 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49DocValuesFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/TestLucene49DocValuesFormat.java @@ -27,11 +27,6 @@ import org.junit.BeforeClass; public class TestLucene49DocValuesFormat extends BaseCompressingDocValuesFormatTestCase { private final Codec codec = new Lucene49RWCodec(); - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } - @Override protected Codec getCodec() { return codec; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java similarity index 97% rename from lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java rename to lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index ee9a19f94c4..b532bc587a2 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -73,11 +73,6 @@ import org.junit.BeforeClass; Verify we can read the pre-5.0 file format, do searches against it, and add documents to it. */ -// note: add this if we make a 4.x impersonator -// TODO: don't use 4.x codec, its unrealistic since it means -// we won't even be running the actual code, only the impostor -// @SuppressCodecs("Lucene4x") -@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42", "Lucene45", "Lucene46", "Lucene49"}) public class TestBackwardsCompatibility extends LuceneTestCase { // Uncomment these cases & run them on an older Lucene version, @@ -167,8 +162,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase { } private void updateBinary(IndexWriter writer, String id, String f, String cf, long value) throws IOException { - writer.updateBinaryDocValue(new Term("id", id), f, TestBinaryDocValuesUpdates.toBytes(value)); - writer.updateBinaryDocValue(new Term("id", id), cf, TestBinaryDocValuesUpdates.toBytes(value*2)); + writer.updateBinaryDocValue(new Term("id", id), f, TestDocValuesUpdatesOnOldSegments.toBytes(value)); + writer.updateBinaryDocValue(new Term("id", id), cf, TestDocValuesUpdatesOnOldSegments.toBytes(value*2)); } /* // Creates an index with DocValues updates @@ -191,10 +186,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase { doc.add(new NumericDocValuesField("ndv1_c", i*2)); doc.add(new NumericDocValuesField("ndv2", i*3)); doc.add(new NumericDocValuesField("ndv2_c", i*6)); - doc.add(new BinaryDocValuesField("bdv1", TestBinaryDocValuesUpdates.toBytes(i))); - doc.add(new BinaryDocValuesField("bdv1_c", TestBinaryDocValuesUpdates.toBytes(i*2))); - doc.add(new BinaryDocValuesField("bdv2", TestBinaryDocValuesUpdates.toBytes(i*3))); - doc.add(new BinaryDocValuesField("bdv2_c", TestBinaryDocValuesUpdates.toBytes(i*6))); + doc.add(new BinaryDocValuesField("bdv1", TestDocValuesUpdatesOnOldSegments.toBytes(i))); + doc.add(new BinaryDocValuesField("bdv1_c", TestDocValuesUpdatesOnOldSegments.toBytes(i*2))); + doc.add(new BinaryDocValuesField("bdv2", TestDocValuesUpdatesOnOldSegments.toBytes(i*3))); + doc.add(new BinaryDocValuesField("bdv2_c", TestDocValuesUpdatesOnOldSegments.toBytes(i*6))); writer.addDocument(doc); if ((i+1) % 10 == 0) { writer.commit(); // flush every 10 docs @@ -281,7 +276,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase { @BeforeClass public static void beforeClass() throws Exception { - assertFalse("test infra is broken!", LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE); List names = new ArrayList<>(oldNames.length + oldSingleSegmentNames.length); names.addAll(Arrays.asList(oldNames)); names.addAll(Arrays.asList(oldSingleSegmentNames)); @@ -1085,7 +1079,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { BinaryDocValues bdvf = r.getBinaryDocValues(f); BinaryDocValues bdvcf = r.getBinaryDocValues(cf); for (int i = 0; i < r.maxDoc(); i++) { - assertEquals(TestBinaryDocValuesUpdates.getValue(bdvcf, i), TestBinaryDocValuesUpdates.getValue(bdvf, i)*2); + assertEquals(TestDocValuesUpdatesOnOldSegments.getValue(bdvcf, i), TestDocValuesUpdatesOnOldSegments.getValue(bdvf, i)*2); } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestDocValuesUpdatesOnOldSegments.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestDocValuesUpdatesOnOldSegments.java new file mode 100644 index 00000000000..46f983b51ae --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestDocValuesUpdatesOnOldSegments.java @@ -0,0 +1,115 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene40.Lucene40RWCodec; +import org.apache.lucene.codecs.lucene41.Lucene41RWCodec; +import org.apache.lucene.codecs.lucene42.Lucene42RWCodec; +import org.apache.lucene.codecs.lucene45.Lucene45RWCodec; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + + +public class TestDocValuesUpdatesOnOldSegments extends LuceneTestCase { + + static long getValue(BinaryDocValues bdv, int idx) { + BytesRef term = bdv.get(idx); + idx = term.offset; + byte b = term.bytes[idx++]; + long value = b & 0x7FL; + for (int shift = 7; (b & 0x80L) != 0; shift += 7) { + b = term.bytes[idx++]; + value |= (b & 0x7FL) << shift; + } + return value; + } + + // encodes a long into a BytesRef as VLong so that we get varying number of bytes when we update + static BytesRef toBytes(long value) { + BytesRef bytes = new BytesRef(10); // negative longs may take 10 bytes + while ((value & ~0x7FL) != 0L) { + bytes.bytes[bytes.length++] = (byte) ((value & 0x7FL) | 0x80L); + value >>>= 7; + } + bytes.bytes[bytes.length++] = (byte) value; + return bytes; + } + + public void testBinaryUpdates() throws Exception { + Codec[] oldCodecs = new Codec[] { new Lucene40RWCodec(), new Lucene41RWCodec(), new Lucene42RWCodec(), new Lucene45RWCodec() }; + Directory dir = newDirectory(); + + // create a segment with an old Codec + IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); + conf.setCodec(oldCodecs[random().nextInt(oldCodecs.length)]); + IndexWriter writer = new IndexWriter(dir, conf); + Document doc = new Document(); + doc.add(new StringField("id", "doc", Store.NO)); + doc.add(new BinaryDocValuesField("f", toBytes(5L))); + writer.addDocument(doc); + writer.close(); + + conf = newIndexWriterConfig(new MockAnalyzer(random())); + writer = new IndexWriter(dir, conf); + writer.updateBinaryDocValue(new Term("id", "doc"), "f", toBytes(4L)); + try { + writer.close(); + fail("should not have succeeded to update a segment written with an old Codec"); + } catch (UnsupportedOperationException e) { + writer.rollback(); + } + + dir.close(); + } + + public void testNumericUpdates() throws Exception { + Codec[] oldCodecs = new Codec[] { new Lucene40RWCodec(), new Lucene41RWCodec(), new Lucene42RWCodec(), new Lucene45RWCodec() }; + Directory dir = newDirectory(); + + // create a segment with an old Codec + IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); + conf.setCodec(oldCodecs[random().nextInt(oldCodecs.length)]); + IndexWriter writer = new IndexWriter(dir, conf); + Document doc = new Document(); + doc.add(new StringField("id", "doc", Store.NO)); + doc.add(new NumericDocValuesField("f", 5)); + writer.addDocument(doc); + writer.close(); + + conf = newIndexWriterConfig(new MockAnalyzer(random())); + writer = new IndexWriter(dir, conf); + writer.updateNumericDocValue(new Term("id", "doc"), "f", 4L); + try { + writer.close(); + fail("should not have succeeded to update a segment written with an old Codec"); + } catch (UnsupportedOperationException e) { + writer.rollback(); + } + + dir.close(); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/index/dvupdates.48.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/dvupdates.48.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/dvupdates.48.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/dvupdates.48.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.40.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.40.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.40.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.40.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.41.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.41.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.41.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.41.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.41.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.41.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.41.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.41.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.42.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.42.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.42.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.42.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.42.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.42.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.42.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.42.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.45.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.45.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.45.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.45.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.45.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.45.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.45.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.45.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.461.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.461.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.461.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.461.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.461.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.461.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.461.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.461.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.49.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.49.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.49.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.49.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/index.49.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.49.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/index.49.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/index.49.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/moreterms.40.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/moreterms.40.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.19.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.19.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.19.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.19.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.19.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.19.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.19.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.19.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.20.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.20.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.20.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.20.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.20.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.20.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.20.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.20.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.21.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.21.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.21.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.21.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.21.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.21.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.21.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.21.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.22.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.22.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.22.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.22.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.22.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.22.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.22.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.22.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.23.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.23.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.23.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.23.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.23.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.23.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.23.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.23.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.24.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.24.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.24.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.24.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.24.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.24.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.24.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.24.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.29.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.29.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.29.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.29.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.29.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.29.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.29.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.29.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.30.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.30.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.30.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.30.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.30.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.30.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.30.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.30.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.31.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.31.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.31.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.31.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.31.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.31.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.31.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.31.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.32.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.32.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.32.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.32.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.32.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.32.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.32.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.32.nocfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.34.cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.34.cfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.34.cfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.34.cfs.zip diff --git a/lucene/core/src/test/org/apache/lucene/index/unsupported.34.nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.34.nocfs.zip similarity index 100% rename from lucene/core/src/test/org/apache/lucene/index/unsupported.34.nocfs.zip rename to lucene/backward-codecs/src/test/org/apache/lucene/index/unsupported.34.nocfs.zip diff --git a/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java b/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java index 65157afcae7..dec2874aea0 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java @@ -34,7 +34,6 @@ import java.io.Reader; * Testcase for {@link SimpleNaiveBayesClassifier} */ // TODO : eventually remove this if / when fallback methods exist for all un-supportable codec methods (see LUCENE-4872) -@LuceneTestCase.SuppressCodecs("Lucene3x") public class SimpleNaiveBayesClassifierTest extends ClassificationTestBase { @Test diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 14095ede940..96666844ae1 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -794,10 +794,9 @@ - + - + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java index d8cf4692cbf..5ffcb6c5eea 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java @@ -23,7 +23,6 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsWriter; -import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.SegmentInfo; @@ -32,14 +31,12 @@ import org.apache.lucene.store.IOContext; /** - * A {@link StoredFieldsFormat} that is very similar to - * {@link Lucene40StoredFieldsFormat} but compresses documents in chunks in + * A {@link StoredFieldsFormat} that compresses documents in chunks in * order to improve the compression ratio. *

* For a chunk size of chunkSize bytes, this {@link StoredFieldsFormat} * does not support documents larger than (231 - chunkSize) - * bytes. In case this is a problem, you should use another format, such as - * {@link Lucene40StoredFieldsFormat}. + * bytes. *

* For optimal performance, you should use a {@link MergePolicy} that returns * segments that have the biggest byte size first. @@ -81,8 +78,6 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat { *

* chunkSize is the minimum byte size of a chunk of documents. * A value of 1 can make sense if there is redundancy across - * fields. In that case, both performance and compression ratio should be - * better than with {@link Lucene40StoredFieldsFormat} with compressed * fields. *

* Higher values of chunkSize should improve the compression diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html index 51873599b30..7959cc0f464 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html @@ -21,363 +21,5 @@ Lucene 4.0 file format. - -

Apache Lucene - Index File Formats

- - -

Introduction

-
-

This document defines the index file formats used in this version of Lucene. -If you are using a different version of Lucene, please consult the copy of -docs/ that was distributed with -the version you are using.

-

Apache Lucene is written in Java, but several efforts are underway to write -versions of -Lucene in other programming languages. If these versions are to remain -compatible with Apache Lucene, then a language-independent definition of the -Lucene index format is required. This document thus attempts to provide a -complete and independent definition of the Apache Lucene file formats.

-

As Lucene evolves, this document should evolve. Versions of Lucene in -different programming languages should endeavor to agree on file formats, and -generate new versions of this document.

-
- -

Definitions

-
-

The fundamental concepts in Lucene are index, document, field and term.

-

An index contains a sequence of documents.

-
    -
  • A document is a sequence of fields.
  • -
  • A field is a named sequence of terms.
  • -
  • A term is a sequence of bytes.
  • -
-

The same sequence of bytes in two different fields is considered a different -term. Thus terms are represented as a pair: the string naming the field, and the -bytes within the field.

- -

Inverted Indexing

-

The index stores statistics about terms in order to make term-based search -more efficient. Lucene's index falls into the family of indexes known as an -inverted index. This is because it can list, for a term, the documents -that contain it. This is the inverse of the natural relationship, in which -documents list terms.

- -

Types of Fields

-

In Lucene, fields may be stored, in which case their text is stored -in the index literally, in a non-inverted manner. Fields that are inverted are -called indexed. A field may be both stored and indexed.

-

The text of a field may be tokenized into terms to be indexed, or the -text of a field may be used literally as a term to be indexed. Most fields are -tokenized, but sometimes it is useful for certain identifier fields to be -indexed literally.

-

See the {@link org.apache.lucene.document.Field Field} -java docs for more information on Fields.

- -

Segments

-

Lucene indexes may be composed of multiple sub-indexes, or segments. -Each segment is a fully independent index, which could be searched separately. -Indexes evolve by:

-
    -
  1. Creating new segments for newly added documents.
  2. -
  3. Merging existing segments.
  4. -
-

Searches may involve multiple segments and/or multiple indexes, each index -potentially composed of a set of segments.

- -

Document Numbers

-

Internally, Lucene refers to documents by an integer document number. -The first document added to an index is numbered zero, and each subsequent -document added gets a number one greater than the previous.

-

Note that a document's number may change, so caution should be taken when -storing these numbers outside of Lucene. In particular, numbers may change in -the following situations:

-
    -
  • -

    The numbers stored in each segment are unique only within the segment, and -must be converted before they can be used in a larger context. The standard -technique is to allocate each segment a range of values, based on the range of -numbers used in that segment. To convert a document number from a segment to an -external value, the segment's base document number is added. To convert -an external value back to a segment-specific value, the segment is identified -by the range that the external value is in, and the segment's base value is -subtracted. For example two five document segments might be combined, so that -the first segment has a base value of zero, and the second of five. Document -three from the second segment would have an external value of eight.

    -
  • -
  • -

    When documents are deleted, gaps are created in the numbering. These are -eventually removed as the index evolves through merging. Deleted documents are -dropped when segments are merged. A freshly-merged segment thus has no gaps in -its numbering.

    -
  • -
-
- -

Index Structure Overview

-
-

Each segment index maintains the following:

-
    -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment info}. - This contains metadata about a segment, such as the number of documents, - what files it uses, -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Field names}. - This contains the set of field names used in the index. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat Stored Field values}. -This contains, for each document, a list of attribute-value pairs, where the attributes -are field names. These are used to store auxiliary information about the document, such as -its title, url, or an identifier to access a database. The set of stored fields are what is -returned for each hit when searching. This is keyed by document number. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat Term dictionary}. -A dictionary containing all of the terms used in all of the -indexed fields of all of the documents. The dictionary also contains the number -of documents which contain the term, and pointers to the term's frequency and -proximity data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat Term Frequency data}. -For each term in the dictionary, the numbers of all the -documents that contain that term, and the frequency of the term in that -document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat Term Proximity data}. -For each term in the dictionary, the positions that the -term occurs in each document. Note that this will not exist if all fields in -all documents omit position data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Normalization factors}. -For each field in each document, a value is stored -that is multiplied into the score for hits on that field. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vectors}. -For each field in each document, the term vector (sometimes -called document vector) may be stored. A term vector consists of term text and -term frequency. To add Term Vectors to your index see the -{@link org.apache.lucene.document.Field Field} constructors -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-document values}. -Like stored values, these are also keyed by document -number, but are generally intended to be loaded into main memory for fast -access. Whereas stored values are generally intended for summary results from -searches, per-document values are useful for things like scoring factors. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. -An optional file indicating which documents are deleted. -
  • -
-

Details on each of these are provided in their linked pages.

-
- -

File Naming

-
-

All files belonging to a segment have the same name with varying extensions. -The extensions correspond to the different file formats described below. When -using the Compound File format (default in 1.4 and greater) these files (except -for the Segment info file, the Lock file, and Deleted documents file) are collapsed -into a single .cfs file (see below for details)

-

Typically, all segments in an index are stored in a single directory, -although this is not required.

-

As of version 2.1 (lock-less commits), file names are never re-used (there -is one exception, "segments.gen", see below). That is, when any file is saved -to the Directory it is given a never before used filename. This is achieved -using a simple generations approach. For example, the first segments file is -segments_1, then segments_2, etc. The generation is a sequential long integer -represented in alpha-numeric (base 36) form.

-
- -

Summary of File Extensions

-
-

The following table summarizes the names and extensions of the files in -Lucene:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameExtensionBrief Description
{@link org.apache.lucene.index.SegmentInfos Segments File}segments.gen, segments_NStores information about a commit point
Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same -file.
{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}.siStores metadata about a segment
{@link org.apache.lucene.store.CompoundFileDirectory Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for -systems that frequently run out of file handles.
{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Fields}.fnmStores information about the fields
{@link org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat Field Index}.fdxContains pointers to field data
{@link org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat Field Data}.fdtThe stored fields for documents
{@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
{@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat Term Index}.tipThe index into the Term Dictionary
{@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat Frequencies}.frqContains the list of docs which contain each term along with frequency
{@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat Positions}.prxStores position information about where a term occurs in the index
{@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Norms}.nrm.cfs, .nrm.cfeEncodes length and boost factors for docs and fields
{@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-Document Values}.dv.cfs, .dv.cfeEncodes additional scoring factors or other per-document information.
{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}.delInfo about what files are deleted
-
- -

Lock File

-The write lock, which is stored in the index directory by default, is named -"write.lock". If the lock directory is different from the index directory then -the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix -derived from the full path to the index directory. When this file is present, a -writer is currently modifying the index (adding or removing documents). This -lock file ensures that only one writer is modifying the index at a time.

- -

History

-

Compatibility notes are provided in this document, describing how file -formats have changed from prior versions:

-
    -
  • In version 2.1, the file format was changed to allow lock-less commits (ie, -no more commit lock). The change is fully backwards compatible: you can open a -pre-2.1 index for searching or adding/deleting of docs. When the new segments -file is saved (committed), it will be written in the new file format (meaning -no specific "upgrade" process is needed). But note that once a commit has -occurred, pre-2.1 Lucene will not be able to read the index.
  • -
  • In version 2.3, the file format was changed to allow segments to share a -single set of doc store (vectors & stored fields) files. This allows for -faster indexing in certain cases. The change is fully backwards compatible (in -the same way as the lock-less commits change in 2.1).
  • -
  • In version 2.4, Strings are now written as true UTF-8 byte sequence, not -Java's modified UTF-8. See -LUCENE-510 for details.
  • -
  • In version 2.9, an optional opaque Map<String,String> CommitUserData -may be passed to IndexWriter's commit methods (and later retrieved), which is -recorded in the segments_N file. See -LUCENE-1382 for details. Also, -diagnostics were added to each segment written recording details about why it -was written (due to flush, merge; which OS/JRE was used; etc.). See issue -LUCENE-1654 for details.
  • -
  • In version 3.0, compressed fields are no longer written to the index (they -can still be read, but on merge the new segment will write them, uncompressed). -See issue LUCENE-1960 -for details.
  • -
  • In version 3.1, segments records the code version that created them. See -LUCENE-2720 for details. -Additionally segments track explicitly whether or not they have term vectors. -See LUCENE-2811 -for details.
  • -
  • In version 3.2, numeric fields are written as natively to stored fields -file, previously they were stored in text format only.
  • -
  • In version 3.4, fields can omit position data while still indexing term -frequencies.
  • -
  • In version 4.0, the format of the inverted index became extensible via -the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage -({@code DocValues}) was introduced. Normalization factors need no longer be a -single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. -Terms need not be unicode strings, they can be any byte sequence. Term offsets -can optionally be indexed into the postings lists. Payloads can be stored in the -term vectors.
  • -
- -

Limitations

-
-

Lucene uses a Java int to refer to -document numbers, and the index file format uses an Int32 -on-disk to store document numbers. This is a limitation -of both the index file format and the current implementation. Eventually these -should be replaced with either UInt64 values, or -better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

-
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41StoredFieldsFormat.java index 49d97ead5f7..a00ec122ce0 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41StoredFieldsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41StoredFieldsFormat.java @@ -22,7 +22,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat; import org.apache.lucene.codecs.compressing.CompressingStoredFieldsIndexWriter; import org.apache.lucene.codecs.compressing.CompressionMode; -import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.packed.PackedInts; @@ -112,9 +111,7 @@ import org.apache.lucene.util.packed.PackedInts; * *

Known limitations

*

This {@link StoredFieldsFormat} does not support individual documents - * larger than (231 - 214) bytes. In case this - * is a problem, you should use another format, such as - * {@link Lucene40StoredFieldsFormat}.

+ * larger than (231 - 214) bytes.

* @lucene.experimental */ public final class Lucene41StoredFieldsFormat extends CompressingStoredFieldsFormat { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html index d429cb0fe93..abea0c2767e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html @@ -21,372 +21,5 @@ Lucene 4.1 file format. - -

Apache Lucene - Index File Formats

- - -

Introduction

-
-

This document defines the index file formats used in this version of Lucene. -If you are using a different version of Lucene, please consult the copy of -docs/ that was distributed with -the version you are using.

-

Apache Lucene is written in Java, but several efforts are underway to write -versions of -Lucene in other programming languages. If these versions are to remain -compatible with Apache Lucene, then a language-independent definition of the -Lucene index format is required. This document thus attempts to provide a -complete and independent definition of the Apache Lucene file formats.

-

As Lucene evolves, this document should evolve. Versions of Lucene in -different programming languages should endeavor to agree on file formats, and -generate new versions of this document.

-
- -

Definitions

-
-

The fundamental concepts in Lucene are index, document, field and term.

-

An index contains a sequence of documents.

-
    -
  • A document is a sequence of fields.
  • -
  • A field is a named sequence of terms.
  • -
  • A term is a sequence of bytes.
  • -
-

The same sequence of bytes in two different fields is considered a different -term. Thus terms are represented as a pair: the string naming the field, and the -bytes within the field.

- -

Inverted Indexing

-

The index stores statistics about terms in order to make term-based search -more efficient. Lucene's index falls into the family of indexes known as an -inverted index. This is because it can list, for a term, the documents -that contain it. This is the inverse of the natural relationship, in which -documents list terms.

- -

Types of Fields

-

In Lucene, fields may be stored, in which case their text is stored -in the index literally, in a non-inverted manner. Fields that are inverted are -called indexed. A field may be both stored and indexed.

-

The text of a field may be tokenized into terms to be indexed, or the -text of a field may be used literally as a term to be indexed. Most fields are -tokenized, but sometimes it is useful for certain identifier fields to be -indexed literally.

-

See the {@link org.apache.lucene.document.Field Field} -java docs for more information on Fields.

- -

Segments

-

Lucene indexes may be composed of multiple sub-indexes, or segments. -Each segment is a fully independent index, which could be searched separately. -Indexes evolve by:

-
    -
  1. Creating new segments for newly added documents.
  2. -
  3. Merging existing segments.
  4. -
-

Searches may involve multiple segments and/or multiple indexes, each index -potentially composed of a set of segments.

- -

Document Numbers

-

Internally, Lucene refers to documents by an integer document number. -The first document added to an index is numbered zero, and each subsequent -document added gets a number one greater than the previous.

-

Note that a document's number may change, so caution should be taken when -storing these numbers outside of Lucene. In particular, numbers may change in -the following situations:

-
    -
  • -

    The numbers stored in each segment are unique only within the segment, and -must be converted before they can be used in a larger context. The standard -technique is to allocate each segment a range of values, based on the range of -numbers used in that segment. To convert a document number from a segment to an -external value, the segment's base document number is added. To convert -an external value back to a segment-specific value, the segment is identified -by the range that the external value is in, and the segment's base value is -subtracted. For example two five document segments might be combined, so that -the first segment has a base value of zero, and the second of five. Document -three from the second segment would have an external value of eight.

    -
  • -
  • -

    When documents are deleted, gaps are created in the numbering. These are -eventually removed as the index evolves through merging. Deleted documents are -dropped when segments are merged. A freshly-merged segment thus has no gaps in -its numbering.

    -
  • -
-
- -

Index Structure Overview

-
-

Each segment index maintains the following:

-
    -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment info}. - This contains metadata about a segment, such as the number of documents, - what files it uses, -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Field names}. - This contains the set of field names used in the index. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Stored Field values}. -This contains, for each document, a list of attribute-value pairs, where the attributes -are field names. These are used to store auxiliary information about the document, such as -its title, url, or an identifier to access a database. The set of stored fields are what is -returned for each hit when searching. This is keyed by document number. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}. -A dictionary containing all of the terms used in all of the -indexed fields of all of the documents. The dictionary also contains the number -of documents which contain the term, and pointers to the term's frequency and -proximity data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}. -For each term in the dictionary, the numbers of all the -documents that contain that term, and the frequency of the term in that -document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}. -For each term in the dictionary, the positions that the -term occurs in each document. Note that this will not exist if all fields in -all documents omit position data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Normalization factors}. -For each field in each document, a value is stored -that is multiplied into the score for hits on that field. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vectors}. -For each field in each document, the term vector (sometimes -called document vector) may be stored. A term vector consists of term text and -term frequency. To add Term Vectors to your index see the -{@link org.apache.lucene.document.Field Field} constructors -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-document values}. -Like stored values, these are also keyed by document -number, but are generally intended to be loaded into main memory for fast -access. Whereas stored values are generally intended for summary results from -searches, per-document values are useful for things like scoring factors. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. -An optional file indicating which documents are deleted. -
  • -
-

Details on each of these are provided in their linked pages.

-
- -

File Naming

-
-

All files belonging to a segment have the same name with varying extensions. -The extensions correspond to the different file formats described below. When -using the Compound File format (default in 1.4 and greater) these files (except -for the Segment info file, the Lock file, and Deleted documents file) are collapsed -into a single .cfs file (see below for details)

-

Typically, all segments in an index are stored in a single directory, -although this is not required.

-

As of version 2.1 (lock-less commits), file names are never re-used (there -is one exception, "segments.gen", see below). That is, when any file is saved -to the Directory it is given a never before used filename. This is achieved -using a simple generations approach. For example, the first segments file is -segments_1, then segments_2, etc. The generation is a sequential long integer -represented in alpha-numeric (base 36) form.

-
- -

Summary of File Extensions

-
-

The following table summarizes the names and extensions of the files in -Lucene:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameExtensionBrief Description
{@link org.apache.lucene.index.SegmentInfos Segments File}segments.gen, segments_NStores information about a commit point
Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same -file.
{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}.siStores metadata about a segment
{@link org.apache.lucene.store.CompoundFileDirectory Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for -systems that frequently run out of file handles.
{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Fields}.fnmStores information about the fields
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Index}.fdxContains pointers to field data
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Data}.fdtThe stored fields for documents
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}.tipThe index into the Term Dictionary
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}.docContains the list of docs which contain each term along with frequency
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}.posStores position information about where a term occurs in the index
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}.payStores additional per-position metadata information such as character offsets and user payloads
{@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Norms}.nrm.cfs, .nrm.cfeEncodes length and boost factors for docs and fields
{@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-Document Values}.dv.cfs, .dv.cfeEncodes additional scoring factors or other per-document information.
{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}.delInfo about what files are deleted
-
- -

Lock File

-The write lock, which is stored in the index directory by default, is named -"write.lock". If the lock directory is different from the index directory then -the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix -derived from the full path to the index directory. When this file is present, a -writer is currently modifying the index (adding or removing documents). This -lock file ensures that only one writer is modifying the index at a time.

- -

History

-

Compatibility notes are provided in this document, describing how file -formats have changed from prior versions:

-
    -
  • In version 2.1, the file format was changed to allow lock-less commits (ie, -no more commit lock). The change is fully backwards compatible: you can open a -pre-2.1 index for searching or adding/deleting of docs. When the new segments -file is saved (committed), it will be written in the new file format (meaning -no specific "upgrade" process is needed). But note that once a commit has -occurred, pre-2.1 Lucene will not be able to read the index.
  • -
  • In version 2.3, the file format was changed to allow segments to share a -single set of doc store (vectors & stored fields) files. This allows for -faster indexing in certain cases. The change is fully backwards compatible (in -the same way as the lock-less commits change in 2.1).
  • -
  • In version 2.4, Strings are now written as true UTF-8 byte sequence, not -Java's modified UTF-8. See -LUCENE-510 for details.
  • -
  • In version 2.9, an optional opaque Map<String,String> CommitUserData -may be passed to IndexWriter's commit methods (and later retrieved), which is -recorded in the segments_N file. See -LUCENE-1382 for details. Also, -diagnostics were added to each segment written recording details about why it -was written (due to flush, merge; which OS/JRE was used; etc.). See issue -LUCENE-1654 for details.
  • -
  • In version 3.0, compressed fields are no longer written to the index (they -can still be read, but on merge the new segment will write them, uncompressed). -See issue LUCENE-1960 -for details.
  • -
  • In version 3.1, segments records the code version that created them. See -LUCENE-2720 for details. -Additionally segments track explicitly whether or not they have term vectors. -See LUCENE-2811 -for details.
  • -
  • In version 3.2, numeric fields are written as natively to stored fields -file, previously they were stored in text format only.
  • -
  • In version 3.4, fields can omit position data while still indexing term -frequencies.
  • -
  • In version 4.0, the format of the inverted index became extensible via -the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage -({@code DocValues}) was introduced. Normalization factors need no longer be a -single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. -Terms need not be unicode strings, they can be any byte sequence. Term offsets -can optionally be indexed into the postings lists. Payloads can be stored in the -term vectors.
  • -
  • In version 4.1, the format of the postings list changed to use either -of FOR compression or variable-byte encoding, depending upon the frequency -of the term. Terms appearing only once were changed to inline directly into -the term dictionary. Stored fields are compressed by default.
  • -
- -

Limitations

-
-

Lucene uses a Java int to refer to -document numbers, and the index file format uses an Int32 -on-disk to store document numbers. This is a limitation -of both the index file format and the current implementation. Eventually these -should be replaced with either UInt64 values, or -better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

-
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene410/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene410/package.html index 2282f7ac4da..7be42fa47ac 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene410/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene410/package.html @@ -238,7 +238,7 @@ Lucene:

file. -{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info} +{@link org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat Segment Info} .si Stores metadata about a segment diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html index ae55e7a63de..48043b37609 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html @@ -21,375 +21,5 @@ Lucene 4.2 file format. - -

Apache Lucene - Index File Formats

- - -

Introduction

-
-

This document defines the index file formats used in this version of Lucene. -If you are using a different version of Lucene, please consult the copy of -docs/ that was distributed with -the version you are using.

-

Apache Lucene is written in Java, but several efforts are underway to write -versions of -Lucene in other programming languages. If these versions are to remain -compatible with Apache Lucene, then a language-independent definition of the -Lucene index format is required. This document thus attempts to provide a -complete and independent definition of the Apache Lucene file formats.

-

As Lucene evolves, this document should evolve. Versions of Lucene in -different programming languages should endeavor to agree on file formats, and -generate new versions of this document.

-
- -

Definitions

-
-

The fundamental concepts in Lucene are index, document, field and term.

-

An index contains a sequence of documents.

-
    -
  • A document is a sequence of fields.
  • -
  • A field is a named sequence of terms.
  • -
  • A term is a sequence of bytes.
  • -
-

The same sequence of bytes in two different fields is considered a different -term. Thus terms are represented as a pair: the string naming the field, and the -bytes within the field.

- -

Inverted Indexing

-

The index stores statistics about terms in order to make term-based search -more efficient. Lucene's index falls into the family of indexes known as an -inverted index. This is because it can list, for a term, the documents -that contain it. This is the inverse of the natural relationship, in which -documents list terms.

- -

Types of Fields

-

In Lucene, fields may be stored, in which case their text is stored -in the index literally, in a non-inverted manner. Fields that are inverted are -called indexed. A field may be both stored and indexed.

-

The text of a field may be tokenized into terms to be indexed, or the -text of a field may be used literally as a term to be indexed. Most fields are -tokenized, but sometimes it is useful for certain identifier fields to be -indexed literally.

-

See the {@link org.apache.lucene.document.Field Field} -java docs for more information on Fields.

- -

Segments

-

Lucene indexes may be composed of multiple sub-indexes, or segments. -Each segment is a fully independent index, which could be searched separately. -Indexes evolve by:

-
    -
  1. Creating new segments for newly added documents.
  2. -
  3. Merging existing segments.
  4. -
-

Searches may involve multiple segments and/or multiple indexes, each index -potentially composed of a set of segments.

- -

Document Numbers

-

Internally, Lucene refers to documents by an integer document number. -The first document added to an index is numbered zero, and each subsequent -document added gets a number one greater than the previous.

-

Note that a document's number may change, so caution should be taken when -storing these numbers outside of Lucene. In particular, numbers may change in -the following situations:

-
    -
  • -

    The numbers stored in each segment are unique only within the segment, and -must be converted before they can be used in a larger context. The standard -technique is to allocate each segment a range of values, based on the range of -numbers used in that segment. To convert a document number from a segment to an -external value, the segment's base document number is added. To convert -an external value back to a segment-specific value, the segment is identified -by the range that the external value is in, and the segment's base value is -subtracted. For example two five document segments might be combined, so that -the first segment has a base value of zero, and the second of five. Document -three from the second segment would have an external value of eight.

    -
  • -
  • -

    When documents are deleted, gaps are created in the numbering. These are -eventually removed as the index evolves through merging. Deleted documents are -dropped when segments are merged. A freshly-merged segment thus has no gaps in -its numbering.

    -
  • -
-
- -

Index Structure Overview

-
-

Each segment index maintains the following:

-
    -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment info}. - This contains metadata about a segment, such as the number of documents, - what files it uses, -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat Field names}. - This contains the set of field names used in the index. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Stored Field values}. -This contains, for each document, a list of attribute-value pairs, where the attributes -are field names. These are used to store auxiliary information about the document, such as -its title, url, or an identifier to access a database. The set of stored fields are what is -returned for each hit when searching. This is keyed by document number. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}. -A dictionary containing all of the terms used in all of the -indexed fields of all of the documents. The dictionary also contains the number -of documents which contain the term, and pointers to the term's frequency and -proximity data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}. -For each term in the dictionary, the numbers of all the -documents that contain that term, and the frequency of the term in that -document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}. -For each term in the dictionary, the positions that the -term occurs in each document. Note that this will not exist if all fields in -all documents omit position data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Normalization factors}. -For each field in each document, a value is stored -that is multiplied into the score for hits on that field. -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vectors}. -For each field in each document, the term vector (sometimes -called document vector) may be stored. A term vector consists of term text and -term frequency. To add Term Vectors to your index see the -{@link org.apache.lucene.document.Field Field} constructors -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-document values}. -Like stored values, these are also keyed by document -number, but are generally intended to be loaded into main memory for fast -access. Whereas stored values are generally intended for summary results from -searches, per-document values are useful for things like scoring factors. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. -An optional file indicating which documents are deleted. -
  • -
-

Details on each of these are provided in their linked pages.

-
- -

File Naming

-
-

All files belonging to a segment have the same name with varying extensions. -The extensions correspond to the different file formats described below. When -using the Compound File format (default in 1.4 and greater) these files (except -for the Segment info file, the Lock file, and Deleted documents file) are collapsed -into a single .cfs file (see below for details)

-

Typically, all segments in an index are stored in a single directory, -although this is not required.

-

As of version 2.1 (lock-less commits), file names are never re-used (there -is one exception, "segments.gen", see below). That is, when any file is saved -to the Directory it is given a never before used filename. This is achieved -using a simple generations approach. For example, the first segments file is -segments_1, then segments_2, etc. The generation is a sequential long integer -represented in alpha-numeric (base 36) form.

-
- -

Summary of File Extensions

-
-

The following table summarizes the names and extensions of the files in -Lucene:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameExtensionBrief Description
{@link org.apache.lucene.index.SegmentInfos Segments File}segments.gen, segments_NStores information about a commit point
Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same -file.
{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}.siStores metadata about a segment
{@link org.apache.lucene.store.CompoundFileDirectory Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for -systems that frequently run out of file handles.
{@link org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat Fields}.fnmStores information about the fields
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Index}.fdxContains pointers to field data
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Data}.fdtThe stored fields for documents
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}.tipThe index into the Term Dictionary
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}.docContains the list of docs which contain each term along with frequency
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}.posStores position information about where a term occurs in the index
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}.payStores additional per-position metadata information such as character offsets and user payloads
{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Norms}.nvd, .nvmEncodes length and boost factors for docs and fields
{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-Document Values}.dvd, .dvmEncodes additional scoring factors or other per-document information.
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}.delInfo about what files are deleted
-
- -

Lock File

-The write lock, which is stored in the index directory by default, is named -"write.lock". If the lock directory is different from the index directory then -the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix -derived from the full path to the index directory. When this file is present, a -writer is currently modifying the index (adding or removing documents). This -lock file ensures that only one writer is modifying the index at a time.

- -

History

-

Compatibility notes are provided in this document, describing how file -formats have changed from prior versions:

-
    -
  • In version 2.1, the file format was changed to allow lock-less commits (ie, -no more commit lock). The change is fully backwards compatible: you can open a -pre-2.1 index for searching or adding/deleting of docs. When the new segments -file is saved (committed), it will be written in the new file format (meaning -no specific "upgrade" process is needed). But note that once a commit has -occurred, pre-2.1 Lucene will not be able to read the index.
  • -
  • In version 2.3, the file format was changed to allow segments to share a -single set of doc store (vectors & stored fields) files. This allows for -faster indexing in certain cases. The change is fully backwards compatible (in -the same way as the lock-less commits change in 2.1).
  • -
  • In version 2.4, Strings are now written as true UTF-8 byte sequence, not -Java's modified UTF-8. See -LUCENE-510 for details.
  • -
  • In version 2.9, an optional opaque Map<String,String> CommitUserData -may be passed to IndexWriter's commit methods (and later retrieved), which is -recorded in the segments_N file. See -LUCENE-1382 for details. Also, -diagnostics were added to each segment written recording details about why it -was written (due to flush, merge; which OS/JRE was used; etc.). See issue -LUCENE-1654 for details.
  • -
  • In version 3.0, compressed fields are no longer written to the index (they -can still be read, but on merge the new segment will write them, uncompressed). -See issue LUCENE-1960 -for details.
  • -
  • In version 3.1, segments records the code version that created them. See -LUCENE-2720 for details. -Additionally segments track explicitly whether or not they have term vectors. -See LUCENE-2811 -for details.
  • -
  • In version 3.2, numeric fields are written as natively to stored fields -file, previously they were stored in text format only.
  • -
  • In version 3.4, fields can omit position data while still indexing term -frequencies.
  • -
  • In version 4.0, the format of the inverted index became extensible via -the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage -({@code DocValues}) was introduced. Normalization factors need no longer be a -single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. -Terms need not be unicode strings, they can be any byte sequence. Term offsets -can optionally be indexed into the postings lists. Payloads can be stored in the -term vectors.
  • -
  • In version 4.1, the format of the postings list changed to use either -of FOR compression or variable-byte encoding, depending upon the frequency -of the term. Terms appearing only once were changed to inline directly into -the term dictionary. Stored fields are compressed by default.
  • -
  • In version 4.2, term vectors are compressed by default. DocValues has -a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining -on multi-valued fields.
  • -
- -

Limitations

-
-

Lucene uses a Java int to refer to -document numbers, and the index file format uses an Int32 -on-disk to store document numbers. This is a limitation -of both the index file format and the current implementation. Eventually these -should be replaced with either UInt64 values, or -better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

-
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/package.html deleted file mode 100644 index 890ca6ca92c..00000000000 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/package.html +++ /dev/null @@ -1,396 +0,0 @@ - - - - - - - -Lucene 4.5 file format. - -

Apache Lucene - Index File Formats

- - -

Introduction

-
-

This document defines the index file formats used in this version of Lucene. -If you are using a different version of Lucene, please consult the copy of -docs/ that was distributed with -the version you are using.

-

Apache Lucene is written in Java, but several efforts are underway to write -versions of -Lucene in other programming languages. If these versions are to remain -compatible with Apache Lucene, then a language-independent definition of the -Lucene index format is required. This document thus attempts to provide a -complete and independent definition of the Apache Lucene file formats.

-

As Lucene evolves, this document should evolve. Versions of Lucene in -different programming languages should endeavor to agree on file formats, and -generate new versions of this document.

-
- -

Definitions

-
-

The fundamental concepts in Lucene are index, document, field and term.

-

An index contains a sequence of documents.

-
    -
  • A document is a sequence of fields.
  • -
  • A field is a named sequence of terms.
  • -
  • A term is a sequence of bytes.
  • -
-

The same sequence of bytes in two different fields is considered a different -term. Thus terms are represented as a pair: the string naming the field, and the -bytes within the field.

- -

Inverted Indexing

-

The index stores statistics about terms in order to make term-based search -more efficient. Lucene's index falls into the family of indexes known as an -inverted index. This is because it can list, for a term, the documents -that contain it. This is the inverse of the natural relationship, in which -documents list terms.

- -

Types of Fields

-

In Lucene, fields may be stored, in which case their text is stored -in the index literally, in a non-inverted manner. Fields that are inverted are -called indexed. A field may be both stored and indexed.

-

The text of a field may be tokenized into terms to be indexed, or the -text of a field may be used literally as a term to be indexed. Most fields are -tokenized, but sometimes it is useful for certain identifier fields to be -indexed literally.

-

See the {@link org.apache.lucene.document.Field Field} -java docs for more information on Fields.

- -

Segments

-

Lucene indexes may be composed of multiple sub-indexes, or segments. -Each segment is a fully independent index, which could be searched separately. -Indexes evolve by:

-
    -
  1. Creating new segments for newly added documents.
  2. -
  3. Merging existing segments.
  4. -
-

Searches may involve multiple segments and/or multiple indexes, each index -potentially composed of a set of segments.

- -

Document Numbers

-

Internally, Lucene refers to documents by an integer document number. -The first document added to an index is numbered zero, and each subsequent -document added gets a number one greater than the previous.

-

Note that a document's number may change, so caution should be taken when -storing these numbers outside of Lucene. In particular, numbers may change in -the following situations:

-
    -
  • -

    The numbers stored in each segment are unique only within the segment, and -must be converted before they can be used in a larger context. The standard -technique is to allocate each segment a range of values, based on the range of -numbers used in that segment. To convert a document number from a segment to an -external value, the segment's base document number is added. To convert -an external value back to a segment-specific value, the segment is identified -by the range that the external value is in, and the segment's base value is -subtracted. For example two five document segments might be combined, so that -the first segment has a base value of zero, and the second of five. Document -three from the second segment would have an external value of eight.

    -
  • -
  • -

    When documents are deleted, gaps are created in the numbering. These are -eventually removed as the index evolves through merging. Deleted documents are -dropped when segments are merged. A freshly-merged segment thus has no gaps in -its numbering.

    -
  • -
-
- -

Index Structure Overview

-
-

Each segment index maintains the following:

-
    -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment info}. - This contains metadata about a segment, such as the number of documents, - what files it uses, -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat Field names}. - This contains the set of field names used in the index. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Stored Field values}. -This contains, for each document, a list of attribute-value pairs, where the attributes -are field names. These are used to store auxiliary information about the document, such as -its title, url, or an identifier to access a database. The set of stored fields are what is -returned for each hit when searching. This is keyed by document number. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}. -A dictionary containing all of the terms used in all of the -indexed fields of all of the documents. The dictionary also contains the number -of documents which contain the term, and pointers to the term's frequency and -proximity data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}. -For each term in the dictionary, the numbers of all the -documents that contain that term, and the frequency of the term in that -document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}. -For each term in the dictionary, the positions that the -term occurs in each document. Note that this will not exist if all fields in -all documents omit position data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Normalization factors}. -For each field in each document, a value is stored -that is multiplied into the score for hits on that field. -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vectors}. -For each field in each document, the term vector (sometimes -called document vector) may be stored. A term vector consists of term text and -term frequency. To add Term Vectors to your index see the -{@link org.apache.lucene.document.Field Field} constructors -
  • -
  • -{@link org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat Per-document values}. -Like stored values, these are also keyed by document -number, but are generally intended to be loaded into main memory for fast -access. Whereas stored values are generally intended for summary results from -searches, per-document values are useful for things like scoring factors. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. -An optional file indicating which documents are deleted. -
  • -
-

Details on each of these are provided in their linked pages.

-
- -

File Naming

-
-

All files belonging to a segment have the same name with varying extensions. -The extensions correspond to the different file formats described below. When -using the Compound File format (default in 1.4 and greater) these files (except -for the Segment info file, the Lock file, and Deleted documents file) are collapsed -into a single .cfs file (see below for details)

-

Typically, all segments in an index are stored in a single directory, -although this is not required.

-

As of version 2.1 (lock-less commits), file names are never re-used (there -is one exception, "segments.gen", see below). That is, when any file is saved -to the Directory it is given a never before used filename. This is achieved -using a simple generations approach. For example, the first segments file is -segments_1, then segments_2, etc. The generation is a sequential long integer -represented in alpha-numeric (base 36) form.

-
- -

Summary of File Extensions

-
-

The following table summarizes the names and extensions of the files in -Lucene:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameExtensionBrief Description
{@link org.apache.lucene.index.SegmentInfos Segments File}segments.gen, segments_NStores information about a commit point
Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same -file.
{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}.siStores metadata about a segment
{@link org.apache.lucene.store.CompoundFileDirectory Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for -systems that frequently run out of file handles.
{@link org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat Fields}.fnmStores information about the fields
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Index}.fdxContains pointers to field data
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Data}.fdtThe stored fields for documents
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}.tipThe index into the Term Dictionary
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}.docContains the list of docs which contain each term along with frequency
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}.posStores position information about where a term occurs in the index
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}.payStores additional per-position metadata information such as character offsets and user payloads
{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Norms}.nvd, .nvmEncodes length and boost factors for docs and fields
{@link org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat Per-Document Values}.dvd, .dvmEncodes additional scoring factors or other per-document information.
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}.delInfo about what files are deleted
-
- -

Lock File

-The write lock, which is stored in the index directory by default, is named -"write.lock". If the lock directory is different from the index directory then -the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix -derived from the full path to the index directory. When this file is present, a -writer is currently modifying the index (adding or removing documents). This -lock file ensures that only one writer is modifying the index at a time.

- -

History

-

Compatibility notes are provided in this document, describing how file -formats have changed from prior versions:

-
    -
  • In version 2.1, the file format was changed to allow lock-less commits (ie, -no more commit lock). The change is fully backwards compatible: you can open a -pre-2.1 index for searching or adding/deleting of docs. When the new segments -file is saved (committed), it will be written in the new file format (meaning -no specific "upgrade" process is needed). But note that once a commit has -occurred, pre-2.1 Lucene will not be able to read the index.
  • -
  • In version 2.3, the file format was changed to allow segments to share a -single set of doc store (vectors & stored fields) files. This allows for -faster indexing in certain cases. The change is fully backwards compatible (in -the same way as the lock-less commits change in 2.1).
  • -
  • In version 2.4, Strings are now written as true UTF-8 byte sequence, not -Java's modified UTF-8. See -LUCENE-510 for details.
  • -
  • In version 2.9, an optional opaque Map<String,String> CommitUserData -may be passed to IndexWriter's commit methods (and later retrieved), which is -recorded in the segments_N file. See -LUCENE-1382 for details. Also, -diagnostics were added to each segment written recording details about why it -was written (due to flush, merge; which OS/JRE was used; etc.). See issue -LUCENE-1654 for details.
  • -
  • In version 3.0, compressed fields are no longer written to the index (they -can still be read, but on merge the new segment will write them, uncompressed). -See issue LUCENE-1960 -for details.
  • -
  • In version 3.1, segments records the code version that created them. See -LUCENE-2720 for details. -Additionally segments track explicitly whether or not they have term vectors. -See LUCENE-2811 -for details.
  • -
  • In version 3.2, numeric fields are written as natively to stored fields -file, previously they were stored in text format only.
  • -
  • In version 3.4, fields can omit position data while still indexing term -frequencies.
  • -
  • In version 4.0, the format of the inverted index became extensible via -the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage -({@code DocValues}) was introduced. Normalization factors need no longer be a -single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. -Terms need not be unicode strings, they can be any byte sequence. Term offsets -can optionally be indexed into the postings lists. Payloads can be stored in the -term vectors.
  • -
  • In version 4.1, the format of the postings list changed to use either -of FOR compression or variable-byte encoding, depending upon the frequency -of the term. Terms appearing only once were changed to inline directly into -the term dictionary. Stored fields are compressed by default.
  • -
  • In version 4.2, term vectors are compressed by default. DocValues has -a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining -on multi-valued fields.
  • -
  • In version 4.5, DocValues were extended to explicitly represent missing values.
  • -
- -

Limitations

-
-

Lucene uses a Java int to refer to -document numbers, and the index file format uses an Int32 -on-disk to store document numbers. This is a limitation -of both the index file format and the current implementation. Eventually these -should be replaced with either UInt64 values, or -better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

-
- - diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html index f3cb05c9223..8acd7aa10e6 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html @@ -21,381 +21,5 @@ Lucene 4.6 file format. - -

Apache Lucene - Index File Formats

- - -

Introduction

-
-

This document defines the index file formats used in this version of Lucene. -If you are using a different version of Lucene, please consult the copy of -docs/ that was distributed with -the version you are using.

-

Apache Lucene is written in Java, but several efforts are underway to write -versions of -Lucene in other programming languages. If these versions are to remain -compatible with Apache Lucene, then a language-independent definition of the -Lucene index format is required. This document thus attempts to provide a -complete and independent definition of the Apache Lucene file formats.

-

As Lucene evolves, this document should evolve. Versions of Lucene in -different programming languages should endeavor to agree on file formats, and -generate new versions of this document.

-
- -

Definitions

-
-

The fundamental concepts in Lucene are index, document, field and term.

-

An index contains a sequence of documents.

-
    -
  • A document is a sequence of fields.
  • -
  • A field is a named sequence of terms.
  • -
  • A term is a sequence of bytes.
  • -
-

The same sequence of bytes in two different fields is considered a different -term. Thus terms are represented as a pair: the string naming the field, and the -bytes within the field.

- -

Inverted Indexing

-

The index stores statistics about terms in order to make term-based search -more efficient. Lucene's index falls into the family of indexes known as an -inverted index. This is because it can list, for a term, the documents -that contain it. This is the inverse of the natural relationship, in which -documents list terms.

- -

Types of Fields

-

In Lucene, fields may be stored, in which case their text is stored -in the index literally, in a non-inverted manner. Fields that are inverted are -called indexed. A field may be both stored and indexed.

-

The text of a field may be tokenized into terms to be indexed, or the -text of a field may be used literally as a term to be indexed. Most fields are -tokenized, but sometimes it is useful for certain identifier fields to be -indexed literally.

-

See the {@link org.apache.lucene.document.Field Field} -java docs for more information on Fields.

- -

Segments

-

Lucene indexes may be composed of multiple sub-indexes, or segments. -Each segment is a fully independent index, which could be searched separately. -Indexes evolve by:

-
    -
  1. Creating new segments for newly added documents.
  2. -
  3. Merging existing segments.
  4. -
-

Searches may involve multiple segments and/or multiple indexes, each index -potentially composed of a set of segments.

- -

Document Numbers

-

Internally, Lucene refers to documents by an integer document number. -The first document added to an index is numbered zero, and each subsequent -document added gets a number one greater than the previous.

-

Note that a document's number may change, so caution should be taken when -storing these numbers outside of Lucene. In particular, numbers may change in -the following situations:

-
    -
  • -

    The numbers stored in each segment are unique only within the segment, and -must be converted before they can be used in a larger context. The standard -technique is to allocate each segment a range of values, based on the range of -numbers used in that segment. To convert a document number from a segment to an -external value, the segment's base document number is added. To convert -an external value back to a segment-specific value, the segment is identified -by the range that the external value is in, and the segment's base value is -subtracted. For example two five document segments might be combined, so that -the first segment has a base value of zero, and the second of five. Document -three from the second segment would have an external value of eight.

    -
  • -
  • -

    When documents are deleted, gaps are created in the numbering. These are -eventually removed as the index evolves through merging. Deleted documents are -dropped when segments are merged. A freshly-merged segment thus has no gaps in -its numbering.

    -
  • -
-
- -

Index Structure Overview

-
-

Each segment index maintains the following:

-
    -
  • -{@link org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat Segment info}. - This contains metadata about a segment, such as the number of documents, - what files it uses, -
  • -
  • -{@link org.apache.lucene.codecs.lucene46.Lucene46FieldInfosFormat Field names}. - This contains the set of field names used in the index. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Stored Field values}. -This contains, for each document, a list of attribute-value pairs, where the attributes -are field names. These are used to store auxiliary information about the document, such as -its title, url, or an identifier to access a database. The set of stored fields are what is -returned for each hit when searching. This is keyed by document number. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}. -A dictionary containing all of the terms used in all of the -indexed fields of all of the documents. The dictionary also contains the number -of documents which contain the term, and pointers to the term's frequency and -proximity data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}. -For each term in the dictionary, the numbers of all the -documents that contain that term, and the frequency of the term in that -document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}. -For each term in the dictionary, the positions that the -term occurs in each document. Note that this will not exist if all fields in -all documents omit position data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Normalization factors}. -For each field in each document, a value is stored -that is multiplied into the score for hits on that field. -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vectors}. -For each field in each document, the term vector (sometimes -called document vector) may be stored. A term vector consists of term text and -term frequency. To add Term Vectors to your index see the -{@link org.apache.lucene.document.Field Field} constructors -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-document values}. -Like stored values, these are also keyed by document -number, but are generally intended to be loaded into main memory for fast -access. Whereas stored values are generally intended for summary results from -searches, per-document values are useful for things like scoring factors. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. -An optional file indicating which documents are deleted. -
  • -
-

Details on each of these are provided in their linked pages.

-
- -

File Naming

-
-

All files belonging to a segment have the same name with varying extensions. -The extensions correspond to the different file formats described below. When -using the Compound File format (default in 1.4 and greater) these files (except -for the Segment info file, the Lock file, and Deleted documents file) are collapsed -into a single .cfs file (see below for details)

-

Typically, all segments in an index are stored in a single directory, -although this is not required.

-

As of version 2.1 (lock-less commits), file names are never re-used (there -is one exception, "segments.gen", see below). That is, when any file is saved -to the Directory it is given a never before used filename. This is achieved -using a simple generations approach. For example, the first segments file is -segments_1, then segments_2, etc. The generation is a sequential long integer -represented in alpha-numeric (base 36) form.

-
- -

Summary of File Extensions

-
-

The following table summarizes the names and extensions of the files in -Lucene:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameExtensionBrief Description
{@link org.apache.lucene.index.SegmentInfos Segments File}segments.gen, segments_NStores information about a commit point
Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same -file.
{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}.siStores metadata about a segment
{@link org.apache.lucene.store.CompoundFileDirectory Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for -systems that frequently run out of file handles.
{@link org.apache.lucene.codecs.lucene46.Lucene46FieldInfosFormat Fields}.fnmStores information about the fields
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Index}.fdxContains pointers to field data
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Data}.fdtThe stored fields for documents
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}.tipThe index into the Term Dictionary
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}.docContains the list of docs which contain each term along with frequency
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}.posStores position information about where a term occurs in the index
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}.payStores additional per-position metadata information such as character offsets and user payloads
{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Norms}.nvd, .nvmEncodes length and boost factors for docs and fields
{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-Document Values}.dvd, .dvmEncodes additional scoring factors or other per-document information.
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}.delInfo about what files are deleted
-
- -

Lock File

-The write lock, which is stored in the index directory by default, is named -"write.lock". If the lock directory is different from the index directory then -the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix -derived from the full path to the index directory. When this file is present, a -writer is currently modifying the index (adding or removing documents). This -lock file ensures that only one writer is modifying the index at a time.

- -

History

-

Compatibility notes are provided in this document, describing how file -formats have changed from prior versions:

-
    -
  • In version 2.1, the file format was changed to allow lock-less commits (ie, -no more commit lock). The change is fully backwards compatible: you can open a -pre-2.1 index for searching or adding/deleting of docs. When the new segments -file is saved (committed), it will be written in the new file format (meaning -no specific "upgrade" process is needed). But note that once a commit has -occurred, pre-2.1 Lucene will not be able to read the index.
  • -
  • In version 2.3, the file format was changed to allow segments to share a -single set of doc store (vectors & stored fields) files. This allows for -faster indexing in certain cases. The change is fully backwards compatible (in -the same way as the lock-less commits change in 2.1).
  • -
  • In version 2.4, Strings are now written as true UTF-8 byte sequence, not -Java's modified UTF-8. See -LUCENE-510 for details.
  • -
  • In version 2.9, an optional opaque Map<String,String> CommitUserData -may be passed to IndexWriter's commit methods (and later retrieved), which is -recorded in the segments_N file. See -LUCENE-1382 for details. Also, -diagnostics were added to each segment written recording details about why it -was written (due to flush, merge; which OS/JRE was used; etc.). See issue -LUCENE-1654 for details.
  • -
  • In version 3.0, compressed fields are no longer written to the index (they -can still be read, but on merge the new segment will write them, uncompressed). -See issue LUCENE-1960 -for details.
  • -
  • In version 3.1, segments records the code version that created them. See -LUCENE-2720 for details. -Additionally segments track explicitly whether or not they have term vectors. -See LUCENE-2811 -for details.
  • -
  • In version 3.2, numeric fields are written as natively to stored fields -file, previously they were stored in text format only.
  • -
  • In version 3.4, fields can omit position data while still indexing term -frequencies.
  • -
  • In version 4.0, the format of the inverted index became extensible via -the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage -({@code DocValues}) was introduced. Normalization factors need no longer be a -single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. -Terms need not be unicode strings, they can be any byte sequence. Term offsets -can optionally be indexed into the postings lists. Payloads can be stored in the -term vectors.
  • -
  • In version 4.1, the format of the postings list changed to use either -of FOR compression or variable-byte encoding, depending upon the frequency -of the term. Terms appearing only once were changed to inline directly into -the term dictionary. Stored fields are compressed by default.
  • -
  • In version 4.2, term vectors are compressed by default. DocValues has -a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining -on multi-valued fields.
  • -
  • In version 4.5, DocValues were extended to explicitly represent missing values.
  • -
  • In version 4.6, FieldInfos were extended to support per-field DocValues generation, to -allow updating NumericDocValues fields.
  • -
  • In version 4.8, checksum footers were added to the end of each index file -for improved data integrity. Specifically, the last 8 bytes of every index file -contain the zlib-crc32 checksum of the file.
  • -
- -

Limitations

-
-

Lucene uses a Java int to refer to -document numbers, and the index file format uses an Int32 -on-disk to store document numbers. This is a limitation -of both the index file format and the current implementation. Eventually these -should be replaced with either UInt64 values, or -better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

-
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html index f2d104e0e65..35c7c0941b1 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html @@ -21,384 +21,5 @@ Lucene 4.9 file format. - -

Apache Lucene - Index File Formats

- - -

Introduction

-
-

This document defines the index file formats used in this version of Lucene. -If you are using a different version of Lucene, please consult the copy of -docs/ that was distributed with -the version you are using.

-

Apache Lucene is written in Java, but several efforts are underway to write -versions of -Lucene in other programming languages. If these versions are to remain -compatible with Apache Lucene, then a language-independent definition of the -Lucene index format is required. This document thus attempts to provide a -complete and independent definition of the Apache Lucene file formats.

-

As Lucene evolves, this document should evolve. Versions of Lucene in -different programming languages should endeavor to agree on file formats, and -generate new versions of this document.

-
- -

Definitions

-
-

The fundamental concepts in Lucene are index, document, field and term.

-

An index contains a sequence of documents.

-
    -
  • A document is a sequence of fields.
  • -
  • A field is a named sequence of terms.
  • -
  • A term is a sequence of bytes.
  • -
-

The same sequence of bytes in two different fields is considered a different -term. Thus terms are represented as a pair: the string naming the field, and the -bytes within the field.

- -

Inverted Indexing

-

The index stores statistics about terms in order to make term-based search -more efficient. Lucene's index falls into the family of indexes known as an -inverted index. This is because it can list, for a term, the documents -that contain it. This is the inverse of the natural relationship, in which -documents list terms.

- -

Types of Fields

-

In Lucene, fields may be stored, in which case their text is stored -in the index literally, in a non-inverted manner. Fields that are inverted are -called indexed. A field may be both stored and indexed.

-

The text of a field may be tokenized into terms to be indexed, or the -text of a field may be used literally as a term to be indexed. Most fields are -tokenized, but sometimes it is useful for certain identifier fields to be -indexed literally.

-

See the {@link org.apache.lucene.document.Field Field} -java docs for more information on Fields.

- -

Segments

-

Lucene indexes may be composed of multiple sub-indexes, or segments. -Each segment is a fully independent index, which could be searched separately. -Indexes evolve by:

-
    -
  1. Creating new segments for newly added documents.
  2. -
  3. Merging existing segments.
  4. -
-

Searches may involve multiple segments and/or multiple indexes, each index -potentially composed of a set of segments.

- -

Document Numbers

-

Internally, Lucene refers to documents by an integer document number. -The first document added to an index is numbered zero, and each subsequent -document added gets a number one greater than the previous.

-

Note that a document's number may change, so caution should be taken when -storing these numbers outside of Lucene. In particular, numbers may change in -the following situations:

-
    -
  • -

    The numbers stored in each segment are unique only within the segment, and -must be converted before they can be used in a larger context. The standard -technique is to allocate each segment a range of values, based on the range of -numbers used in that segment. To convert a document number from a segment to an -external value, the segment's base document number is added. To convert -an external value back to a segment-specific value, the segment is identified -by the range that the external value is in, and the segment's base value is -subtracted. For example two five document segments might be combined, so that -the first segment has a base value of zero, and the second of five. Document -three from the second segment would have an external value of eight.

    -
  • -
  • -

    When documents are deleted, gaps are created in the numbering. These are -eventually removed as the index evolves through merging. Deleted documents are -dropped when segments are merged. A freshly-merged segment thus has no gaps in -its numbering.

    -
  • -
-
- -

Index Structure Overview

-
-

Each segment index maintains the following:

-
    -
  • -{@link org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat Segment info}. - This contains metadata about a segment, such as the number of documents, - what files it uses, -
  • -
  • -{@link org.apache.lucene.codecs.lucene46.Lucene46FieldInfosFormat Field names}. - This contains the set of field names used in the index. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Stored Field values}. -This contains, for each document, a list of attribute-value pairs, where the attributes -are field names. These are used to store auxiliary information about the document, such as -its title, url, or an identifier to access a database. The set of stored fields are what is -returned for each hit when searching. This is keyed by document number. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}. -A dictionary containing all of the terms used in all of the -indexed fields of all of the documents. The dictionary also contains the number -of documents which contain the term, and pointers to the term's frequency and -proximity data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}. -For each term in the dictionary, the numbers of all the -documents that contain that term, and the frequency of the term in that -document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) -
  • -
  • -{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}. -For each term in the dictionary, the positions that the -term occurs in each document. Note that this will not exist if all fields in -all documents omit position data. -
  • -
  • -{@link org.apache.lucene.codecs.lucene49.Lucene49NormsFormat Normalization factors}. -For each field in each document, a value is stored -that is multiplied into the score for hits on that field. -
  • -
  • -{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vectors}. -For each field in each document, the term vector (sometimes -called document vector) may be stored. A term vector consists of term text and -term frequency. To add Term Vectors to your index see the -{@link org.apache.lucene.document.Field Field} constructors -
  • -
  • -{@link org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat Per-document values}. -Like stored values, these are also keyed by document -number, but are generally intended to be loaded into main memory for fast -access. Whereas stored values are generally intended for summary results from -searches, per-document values are useful for things like scoring factors. -
  • -
  • -{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. -An optional file indicating which documents are deleted. -
  • -
-

Details on each of these are provided in their linked pages.

-
- -

File Naming

-
-

All files belonging to a segment have the same name with varying extensions. -The extensions correspond to the different file formats described below. When -using the Compound File format (default in 1.4 and greater) these files (except -for the Segment info file, the Lock file, and Deleted documents file) are collapsed -into a single .cfs file (see below for details)

-

Typically, all segments in an index are stored in a single directory, -although this is not required.

-

As of version 2.1 (lock-less commits), file names are never re-used (there -is one exception, "segments.gen", see below). That is, when any file is saved -to the Directory it is given a never before used filename. This is achieved -using a simple generations approach. For example, the first segments file is -segments_1, then segments_2, etc. The generation is a sequential long integer -represented in alpha-numeric (base 36) form.

-
- -

Summary of File Extensions

-
-

The following table summarizes the names and extensions of the files in -Lucene:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameExtensionBrief Description
{@link org.apache.lucene.index.SegmentInfos Segments File}segments.gen, segments_NStores information about a commit point
Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same -file.
{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}.siStores metadata about a segment
{@link org.apache.lucene.store.CompoundFileDirectory Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for -systems that frequently run out of file handles.
{@link org.apache.lucene.codecs.lucene46.Lucene46FieldInfosFormat Fields}.fnmStores information about the fields
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Index}.fdxContains pointers to field data
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Data}.fdtThe stored fields for documents
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}.tipThe index into the Term Dictionary
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}.docContains the list of docs which contain each term along with frequency
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}.posStores position information about where a term occurs in the index
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}.payStores additional per-position metadata information such as character offsets and user payloads
{@link org.apache.lucene.codecs.lucene49.Lucene49NormsFormat Norms}.nvd, .nvmEncodes length and boost factors for docs and fields
{@link org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat Per-Document Values}.dvd, .dvmEncodes additional scoring factors or other per-document information.
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}.delInfo about what files are deleted
-
- -

Lock File

-The write lock, which is stored in the index directory by default, is named -"write.lock". If the lock directory is different from the index directory then -the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix -derived from the full path to the index directory. When this file is present, a -writer is currently modifying the index (adding or removing documents). This -lock file ensures that only one writer is modifying the index at a time.

- -

History

-

Compatibility notes are provided in this document, describing how file -formats have changed from prior versions:

-
    -
  • In version 2.1, the file format was changed to allow lock-less commits (ie, -no more commit lock). The change is fully backwards compatible: you can open a -pre-2.1 index for searching or adding/deleting of docs. When the new segments -file is saved (committed), it will be written in the new file format (meaning -no specific "upgrade" process is needed). But note that once a commit has -occurred, pre-2.1 Lucene will not be able to read the index.
  • -
  • In version 2.3, the file format was changed to allow segments to share a -single set of doc store (vectors & stored fields) files. This allows for -faster indexing in certain cases. The change is fully backwards compatible (in -the same way as the lock-less commits change in 2.1).
  • -
  • In version 2.4, Strings are now written as true UTF-8 byte sequence, not -Java's modified UTF-8. See -LUCENE-510 for details.
  • -
  • In version 2.9, an optional opaque Map<String,String> CommitUserData -may be passed to IndexWriter's commit methods (and later retrieved), which is -recorded in the segments_N file. See -LUCENE-1382 for details. Also, -diagnostics were added to each segment written recording details about why it -was written (due to flush, merge; which OS/JRE was used; etc.). See issue -LUCENE-1654 for details.
  • -
  • In version 3.0, compressed fields are no longer written to the index (they -can still be read, but on merge the new segment will write them, uncompressed). -See issue LUCENE-1960 -for details.
  • -
  • In version 3.1, segments records the code version that created them. See -LUCENE-2720 for details. -Additionally segments track explicitly whether or not they have term vectors. -See LUCENE-2811 -for details.
  • -
  • In version 3.2, numeric fields are written as natively to stored fields -file, previously they were stored in text format only.
  • -
  • In version 3.4, fields can omit position data while still indexing term -frequencies.
  • -
  • In version 4.0, the format of the inverted index became extensible via -the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage -({@code DocValues}) was introduced. Normalization factors need no longer be a -single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. -Terms need not be unicode strings, they can be any byte sequence. Term offsets -can optionally be indexed into the postings lists. Payloads can be stored in the -term vectors.
  • -
  • In version 4.1, the format of the postings list changed to use either -of FOR compression or variable-byte encoding, depending upon the frequency -of the term. Terms appearing only once were changed to inline directly into -the term dictionary. Stored fields are compressed by default.
  • -
  • In version 4.2, term vectors are compressed by default. DocValues has -a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining -on multi-valued fields.
  • -
  • In version 4.5, DocValues were extended to explicitly represent missing values.
  • -
  • In version 4.6, FieldInfos were extended to support per-field DocValues generation, to -allow updating NumericDocValues fields.
  • -
  • In version 4.8, checksum footers were added to the end of each index file -for improved data integrity. Specifically, the last 8 bytes of every index file -contain the zlib-crc32 checksum of the file.
  • -
  • In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric) -that is suitable for faceting/sorting/analytics. -
  • -
- -

Limitations

-
-

Lucene uses a Java int to refer to -document numbers, and the index file format uses an Int32 -on-disk to store document numbers. This is a limitation -of both the index file format and the current implementation. Eventually these -should be replaced with either UInt64 values, or -better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

-
diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec index 890fffae4f2..11a1ea920b4 100644 --- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -13,10 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.lucene.codecs.lucene40.Lucene40Codec -org.apache.lucene.codecs.lucene41.Lucene41Codec -org.apache.lucene.codecs.lucene42.Lucene42Codec -org.apache.lucene.codecs.lucene45.Lucene45Codec -org.apache.lucene.codecs.lucene46.Lucene46Codec -org.apache.lucene.codecs.lucene49.Lucene49Codec org.apache.lucene.codecs.lucene410.Lucene410Codec diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat index 0dbc7e81060..8cc6f70d0f4 100644 --- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat +++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat @@ -13,7 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat -org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat -org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat org.apache.lucene.codecs.lucene410.Lucene410DocValuesFormat diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 023d9c9e1a6..95e92675165 100644 --- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -13,5 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41StoredFieldsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41StoredFieldsFormat.java index cf296f50167..485ed37ff34 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41StoredFieldsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41StoredFieldsFormat.java @@ -18,18 +18,12 @@ package org.apache.lucene.codecs.lucene41; */ import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene410.Lucene410Codec; import org.apache.lucene.index.BaseStoredFieldsFormatTestCase; -import org.junit.BeforeClass; public class TestLucene41StoredFieldsFormat extends BaseStoredFieldsFormatTestCase { - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } - @Override protected Codec getCodec() { - return new Lucene41RWCodec(); + return new Lucene410Codec(); } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java index b2f0d90b170..f84d61a1882 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java @@ -18,19 +18,14 @@ package org.apache.lucene.codecs.lucene49; */ import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene410.Lucene410Codec; import org.apache.lucene.index.BaseNormsFormatTestCase; -import org.junit.BeforeClass; /** * Tests Lucene49NormsFormat */ public class TestLucene49NormsFormat extends BaseNormsFormatTestCase { - private final Codec codec = new Lucene49RWCodec(); - - @BeforeClass - public static void beforeClass() { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec - } + private final Codec codec = new Lucene410Codec(); @Override protected Codec getCodec() { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestBinaryDocValuesUpdates.java b/lucene/core/src/test/org/apache/lucene/index/TestBinaryDocValuesUpdates.java index 7df11e1f258..38d23e50dc7 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestBinaryDocValuesUpdates.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestBinaryDocValuesUpdates.java @@ -9,13 +9,8 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat; -import org.apache.lucene.codecs.lucene40.Lucene40RWCodec; -import org.apache.lucene.codecs.lucene41.Lucene41RWCodec; -import org.apache.lucene.codecs.lucene42.Lucene42RWCodec; -import org.apache.lucene.codecs.lucene45.Lucene45RWCodec; import org.apache.lucene.codecs.lucene410.Lucene410Codec; import org.apache.lucene.codecs.lucene410.Lucene410DocValuesFormat; import org.apache.lucene.document.BinaryDocValuesField; @@ -32,7 +27,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestUtil; import org.junit.Test; @@ -55,8 +49,6 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; * limitations under the License. */ -@SuppressCodecs({"Lucene40","Lucene41","Lucene42","Lucene45"}) -@SuppressWarnings("resource") public class TestBinaryDocValuesUpdates extends LuceneTestCase { static long getValue(BinaryDocValues bdv, int idx) { @@ -861,38 +853,6 @@ public class TestBinaryDocValuesUpdates extends LuceneTestCase { dir.close(); } - public void testUpdateOldSegments() throws Exception { - Codec[] oldCodecs = new Codec[] { new Lucene40RWCodec(), new Lucene41RWCodec(), new Lucene42RWCodec(), new Lucene45RWCodec() }; - Directory dir = newDirectory(); - - boolean oldValue = OLD_FORMAT_IMPERSONATION_IS_ACTIVE; - // create a segment with an old Codec - IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); - conf.setCodec(oldCodecs[random().nextInt(oldCodecs.length)]); - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; - IndexWriter writer = new IndexWriter(dir, conf); - Document doc = new Document(); - doc.add(new StringField("id", "doc", Store.NO)); - doc.add(new BinaryDocValuesField("f", toBytes(5L))); - writer.addDocument(doc); - writer.close(); - - conf = newIndexWriterConfig(new MockAnalyzer(random())); - writer = new IndexWriter(dir, conf); - writer.updateBinaryDocValue(new Term("id", "doc"), "f", toBytes(4L)); - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; - try { - writer.close(); - fail("should not have succeeded to update a segment written with an old Codec"); - } catch (UnsupportedOperationException e) { - writer.rollback(); - } finally { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = oldValue; - } - - dir.close(); - } - public void testStressMultiThreading() throws Exception { final Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index 3c8f89224d5..e0a0a889628 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -27,9 +27,6 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.lucene40.Lucene40RWCodec; -import org.apache.lucene.codecs.lucene41.Lucene41RWCodec; -import org.apache.lucene.codecs.lucene42.Lucene42RWCodec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.NumericDocValuesField; @@ -850,29 +847,4 @@ public class TestCodecs extends LuceneTestCase { dir.close(); } - public void testDisableImpersonation() throws Exception { - Codec[] oldCodecs = new Codec[] { new Lucene40RWCodec(), new Lucene41RWCodec(), new Lucene42RWCodec() }; - Directory dir = newDirectory(); - IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); - conf.setCodec(oldCodecs[random().nextInt(oldCodecs.length)]); - IndexWriter writer = new IndexWriter(dir, conf); - - Document doc = new Document(); - doc.add(new StringField("f", "bar", Store.YES)); - doc.add(new NumericDocValuesField("n", 18L)); - - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; - try { - writer.addDocument(doc); - writer.close(); - fail("should not have succeeded to impersonate an old format!"); - } catch (UnsupportedOperationException e) { - writer.rollback(); - } finally { - OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; - } - - dir.close(); - } - } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index 8111447a879..027c1ac5f70 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -378,7 +378,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { } public void testTooLargeTermSortedSetBytes() throws IOException { - assumeTrue("codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index fa229d8ed17..3316372e55a 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -1037,14 +1037,10 @@ public class TestIndexWriter extends LuceneTestCase { doc.add(new BinaryDocValuesField("binarydv", new BytesRef("500"))); doc.add(new NumericDocValuesField("numericdv", 500)); doc.add(new SortedDocValuesField("sorteddv", new BytesRef("500"))); - if (defaultCodecSupportsSortedSet()) { - doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("one"))); - doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two"))); - } - if (defaultCodecSupportsSortedNumeric()) { - doc.add(new SortedNumericDocValuesField("sortednumericdv", 4)); - doc.add(new SortedNumericDocValuesField("sortednumericdv", 3)); - } + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("one"))); + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two"))); + doc.add(new SortedNumericDocValuesField("sortednumericdv", 4)); + doc.add(new SortedNumericDocValuesField("sortednumericdv", 3)); w.addDocument(doc); doc = new Document(); doc.add(newStringField(random, "id", "501", Field.Store.NO)); @@ -1052,14 +1048,10 @@ public class TestIndexWriter extends LuceneTestCase { doc.add(new BinaryDocValuesField("binarydv", new BytesRef("501"))); doc.add(new NumericDocValuesField("numericdv", 501)); doc.add(new SortedDocValuesField("sorteddv", new BytesRef("501"))); - if (defaultCodecSupportsSortedSet()) { - doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two"))); - doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("three"))); - } - if (defaultCodecSupportsSortedNumeric()) { - doc.add(new SortedNumericDocValuesField("sortednumericdv", 6)); - doc.add(new SortedNumericDocValuesField("sortednumericdv", 1)); - } + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two"))); + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("three"))); + doc.add(new SortedNumericDocValuesField("sortednumericdv", 6)); + doc.add(new SortedNumericDocValuesField("sortednumericdv", 1)); w.addDocument(doc); w.deleteDocuments(new Term("id", "500")); w.close(); @@ -1108,9 +1100,7 @@ public class TestIndexWriter extends LuceneTestCase { doc.add(binaryDVField); doc.add(numericDVField); doc.add(sortedDVField); - if (defaultCodecSupportsSortedSet()) { - doc.add(sortedSetDVField); - } + doc.add(sortedSetDVField); for(int i=0;i<100;i++) { idField.setStringValue(Integer.toString(i)); binaryDVField.setBytesValue(new BytesRef(idField.stringValue())); @@ -2893,11 +2883,7 @@ public class TestIndexWriter extends LuceneTestCase { assertNotNull(id1); String id2 = sis.info(0).info.getId(); - if (defaultCodecSupportsSegmentIds()) { - assertNotNull(id2); - } else { - assertNull(id2); - } + assertNotNull(id2); // Make sure CheckIndex includes id output: ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); @@ -2912,12 +2898,7 @@ public class TestIndexWriter extends LuceneTestCase { // Commit id is always stored: assertTrue("missing id=" + id1 + " in:\n" + s, s.contains("id=" + id1)); - // Per-segment id may or may not be stored depending on the codec: - if (defaultCodecSupportsSegmentIds()) { - assertTrue("missing id=" + id2 + " in:\n" + s, s.contains("id=" + id2)); - } else { - assertTrue("missing id=null in:\n" + s, s.contains("id=null")); - } + assertTrue("missing id=" + id2 + " in:\n" + s, s.contains("id=" + id2)); d.close(); Set ids = new HashSet<>(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java index c923f1c0c79..a8fb84ed753 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java @@ -156,14 +156,10 @@ public class TestIndexWriterExceptions extends LuceneTestCase { doc.add(new NumericDocValuesField("numericdv", 5)); doc.add(new BinaryDocValuesField("binarydv", new BytesRef("hello"))); doc.add(new SortedDocValuesField("sorteddv", new BytesRef("world"))); - if (defaultCodecSupportsSortedSet()) { - doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("hellllo"))); - doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("again"))); - } - if (defaultCodecSupportsSortedNumeric()) { - doc.add(new SortedNumericDocValuesField("sortednumericdv", 10)); - doc.add(new SortedNumericDocValuesField("sortednumericdv", 5)); - } + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("hellllo"))); + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("again"))); + doc.add(new SortedNumericDocValuesField("sortednumericdv", 10)); + doc.add(new SortedNumericDocValuesField("sortednumericdv", 5)); doc.add(newField(r, "content7", "aaa bbb ccc ddd", DocCopyIterator.custom4)); @@ -2054,10 +2050,9 @@ public class TestIndexWriterExceptions extends LuceneTestCase { shouldFail.set(true); boolean doClose = false; try { - boolean defaultCodecSupportsFieldUpdates = defaultCodecSupportsFieldUpdates(); for(int i=0;i" : br.utf8ToString())); if (idx == hitCount-1) { break; @@ -228,20 +222,8 @@ public class TestSortRandom extends LuceneTestCase { for(int hitIDX=0;hitIDX primaryExtensions) throws IOException { diff --git a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestDistanceFacetsExample.java b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestDistanceFacetsExample.java index 0ab2dc3b7fa..3eb20c7af5d 100644 --- a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestDistanceFacetsExample.java +++ b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestDistanceFacetsExample.java @@ -20,10 +20,8 @@ package org.apache.lucene.demo.facet; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase; -@SuppressCodecs("Lucene3x") public class TestDistanceFacetsExample extends LuceneTestCase { public void testSimple() throws Exception { diff --git a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java index 6b780dfac0e..f48d7065f80 100644 --- a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java +++ b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java @@ -21,11 +21,9 @@ import java.util.List; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; -@SuppressCodecs("Lucene3x") public class TestRangeFacetsExample extends LuceneTestCase { @Test diff --git a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleSortedSetFacetsExample.java b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleSortedSetFacetsExample.java index a629d563b2e..e4db6651a8a 100644 --- a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleSortedSetFacetsExample.java +++ b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleSortedSetFacetsExample.java @@ -21,13 +21,11 @@ import java.util.List; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.taxonomy.FacetLabel; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; // We require sorted set DVs: -@SuppressCodecs({"Lucene40", "Lucene41"}) public class TestSimpleSortedSetFacetsExample extends LuceneTestCase { @Test diff --git a/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionValueSource.java b/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionValueSource.java index 20a500a7f4e..5ba626860fc 100644 --- a/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionValueSource.java +++ b/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionValueSource.java @@ -35,9 +35,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.SortField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; -@SuppressCodecs("Lucene3x") public class TestExpressionValueSource extends LuceneTestCase { DirectoryReader reader; Directory dir; diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java index bbfd1615b0d..f2b34bb4eba 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java @@ -401,8 +401,6 @@ public class TestDrillSideways extends FacetTestCase { public void testRandom() throws Exception { - boolean canUseDV = defaultCodecSupportsSortedSet(); - while (aChance == 0.0) { aChance = random().nextDouble(); } @@ -490,7 +488,7 @@ public class TestDrillSideways extends FacetTestCase { config.setMultiValued("dim"+i, true); } - boolean doUseDV = canUseDV && random().nextBoolean(); + boolean doUseDV = random().nextBoolean(); for(Doc rawDoc : docs) { Document doc = new Document(); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java index edb4d064d1a..7f00aadfb19 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java @@ -824,7 +824,6 @@ public class TestRangeFacetCounts extends FacetTestCase { // LUCENE-5178 public void testMissingValues() throws Exception { - assumeTrue("codec does not support docsWithField", defaultCodecSupportsDocsWithField()); Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java index ad026ceace1..b9c29730fca 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java @@ -49,7 +49,6 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { // randomly uses SortedSetDV public void testBasic() throws Exception { - assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet()); Directory dir = newDirectory(); FacetsConfig config = new FacetsConfig(); @@ -99,7 +98,6 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { // LUCENE-5090 @SuppressWarnings("unused") public void testStaleState() throws Exception { - assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet()); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); @@ -142,7 +140,6 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { // LUCENE-5333 public void testSparseFacets() throws Exception { - assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet()); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); @@ -196,7 +193,6 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { } public void testSomeSegmentsMissing() throws Exception { - assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet()); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); @@ -236,7 +232,6 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { } public void testSlowCompositeReaderWrapper() throws Exception { - assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet()); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); @@ -272,7 +267,6 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { public void testRandom() throws Exception { - assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet()); String[] tokens = getRandomTokens(10); Directory indexDir = newDirectory(); Directory taxoDir = newDirectory(); diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java index 1579751edb9..367402e52a8 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java @@ -52,13 +52,11 @@ import org.apache.lucene.search.grouping.term.TermDistinctValuesCollector; import org.apache.lucene.search.grouping.term.TermFirstPassGroupingCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueStr; -@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // we need missing support... i think? public class DistinctValuesCollectorTest extends AbstractGroupingTestCase { private final static NullComparator nullComparator = new NullComparator(); diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java index 277c59429b8..4ffc728b071 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java @@ -48,12 +48,9 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.grouping.term.TermGroupFacetCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestUtil; -// Need SSDV, ord=-1 for missing: -@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) public class GroupFacetCollectorTest extends AbstractGroupingTestCase { public void testSimple() throws Exception { diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java index 06691f7b47a..ebaafb1d220 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java @@ -41,7 +41,6 @@ import org.apache.lucene.search.grouping.term.TermSecondPassGroupingCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueStr; @@ -55,7 +54,6 @@ import java.util.*; // - test ties // - test compound sort -@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // we need missing support... i think? public class TestGrouping extends LuceneTestCase { public void testBasic() throws Exception { diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java index 2bd6e28b50e..8642d3c4397 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java @@ -33,9 +33,7 @@ import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.store.Directory; import org.apache.lucene.util.*; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; -@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // needs working "missing" public class TestBlockJoin extends LuceneTestCase { // One resume... diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java index cd842c170e7..fbd65b430fa 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java @@ -68,11 +68,9 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestUtil; import org.junit.Test; -@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // we need SortedSet, docsWithField public class TestJoinUtil extends LuceneTestCase { public void testSimple() throws Exception { diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java index dafc32a9534..0a438026956 100644 --- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java +++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java @@ -182,14 +182,10 @@ public abstract class SorterTestBase extends LuceneTestCase { doc.add(norms); doc.add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(Integer.toString(id)))); doc.add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(Integer.toString(id)))); - if (defaultCodecSupportsSortedSet()) { - doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id)))); - doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id + 1)))); - } - if (defaultCodecSupportsSortedNumeric()) { - doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id)); - doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id + 1)); - } + doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id)))); + doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id + 1)))); + doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id)); + doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id + 1)); doc.add(new Field(TERM_VECTORS_FIELD, Integer.toString(id), TERM_VECTORS_TYPE)); return doc; } @@ -381,7 +377,6 @@ public abstract class SorterTestBase extends LuceneTestCase { @Test public void testSortedSetDocValuesField() throws Exception { - assumeTrue("default codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); SortedSetDocValues dv = reader.getSortedSetDocValues(SORTED_SET_DV_FIELD); int maxDoc = reader.maxDoc(); for (int i = 0; i < maxDoc; i++) { @@ -397,7 +392,6 @@ public abstract class SorterTestBase extends LuceneTestCase { @Test public void testSortedNumericDocValuesField() throws Exception { - assumeTrue("default codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); SortedNumericDocValues dv = reader.getSortedNumericDocValues(SORTED_NUMERIC_DV_FIELD); int maxDoc = reader.maxDoc(); for (int i = 0; i < maxDoc; i++) { diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java index 5b752eedc34..490eb28da02 100644 --- a/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java +++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java @@ -135,13 +135,11 @@ public class TestSortingMergePolicy extends LuceneTestCase { iw1.w.addDocument(doc); iw2.w.addDocument(doc); - if (defaultCodecSupportsFieldUpdates()) { - // update NDV of docs belonging to one term (covers many documents) - final long value = random().nextLong(); - final String term = RandomPicks.randomFrom(random(), terms); - iw1.w.updateNumericDocValue(new Term("s", term), "ndv", value); - iw2.w.updateNumericDocValue(new Term("s", term), "ndv", value); - } + // update NDV of docs belonging to one term (covers many documents) + final long value = random().nextLong(); + final String term = RandomPicks.randomFrom(random(), terms); + iw1.w.updateNumericDocValue(new Term("s", term), "ndv", value); + iw2.w.updateNumericDocValue(new Term("s", term), "ndv", value); iw1.forceMerge(1); iw2.forceMerge(1); diff --git a/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCache.java b/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCache.java index 2f5ceca29d1..cb9cf2c035e 100644 --- a/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCache.java +++ b/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCache.java @@ -430,10 +430,8 @@ public class TestFieldCache extends LuceneTestCase { doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value"))); doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value"))); doc.add(new NumericDocValuesField("numeric", 42)); - if (defaultCodecSupportsSortedSet()) { - doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1"))); - doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2"))); - } + doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1"))); + doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2"))); iw.addDocument(doc); DirectoryReader ir = iw.getReader(); iw.close(); @@ -525,37 +523,35 @@ public class TestFieldCache extends LuceneTestCase { assertTrue(bits.get(0)); // SortedSet type: can be retrieved via getDocTermOrds() - if (defaultCodecSupportsSortedSet()) { - try { - FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.NUMERIC_UTILS_INT_PARSER, false); - fail(); - } catch (IllegalStateException expected) {} + try { + FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.NUMERIC_UTILS_INT_PARSER, false); + fail(); + } catch (IllegalStateException expected) {} - try { - FieldCache.DEFAULT.getTerms(ar, "sortedset", true); - fail(); - } catch (IllegalStateException expected) {} + try { + FieldCache.DEFAULT.getTerms(ar, "sortedset", true); + fail(); + } catch (IllegalStateException expected) {} - try { - FieldCache.DEFAULT.getTermsIndex(ar, "sortedset"); - fail(); - } catch (IllegalStateException expected) {} - - try { - new DocTermOrds(ar, null, "sortedset"); - fail(); - } catch (IllegalStateException expected) {} + try { + FieldCache.DEFAULT.getTermsIndex(ar, "sortedset"); + fail(); + } catch (IllegalStateException expected) {} - sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null); - sortedSet.setDocument(0); - assertEquals(0, sortedSet.nextOrd()); - assertEquals(1, sortedSet.nextOrd()); - assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd()); - assertEquals(2, sortedSet.getValueCount()); + try { + new DocTermOrds(ar, null, "sortedset"); + fail(); + } catch (IllegalStateException expected) {} - bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset"); - assertTrue(bits.get(0)); - } + sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null); + sortedSet.setDocument(0); + assertEquals(0, sortedSet.nextOrd()); + assertEquals(1, sortedSet.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd()); + assertEquals(2, sortedSet.getValueCount()); + + bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset"); + assertTrue(bits.get(0)); ir.close(); dir.close(); diff --git a/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java b/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java index af67fb8fa1f..42f0b4576c9 100644 --- a/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java +++ b/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java @@ -25,7 +25,6 @@ import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -100,7 +99,6 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase { } public void testSortedSetFixedLengthVsUninvertedField() throws Exception { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { int fixedLength = TestUtil.nextInt(random(), 1, 10); @@ -109,7 +107,6 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase { } public void testSortedSetVariableLengthVsUninvertedField() throws Exception { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedSetVsUninvertedField(1, 10); @@ -211,6 +208,8 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase { d.close(); } + private static final int LARGE_BINARY_FIELD_LENGTH = (1 << 15) - 2; + // TODO: get this out of here and into the deprecated codecs (4.0, 4.2) public void testHugeBinaryValueLimit() throws Exception { // We only test DVFormats that have a limit @@ -226,7 +225,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase { // Sometimes make all values fixed length since some // codecs have different code paths for this: numDocs = TestUtil.nextInt(random(), 10, 20); - fixedLength = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH; + fixedLength = LARGE_BINARY_FIELD_LENGTH; } else { numDocs = TestUtil.nextInt(random(), 100, 200); } @@ -243,9 +242,9 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase { if (doFixed) { numBytes = fixedLength; } else if (docID == 0 || random().nextInt(5) == 3) { - numBytes = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH; + numBytes = LARGE_BINARY_FIELD_LENGTH; } else { - numBytes = TestUtil.nextInt(random(), 1, Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH); + numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH); } totalBytes += numBytes; if (totalBytes > 5 * 1024*1024) { @@ -402,7 +401,6 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase { } private void doTestMissingVsFieldCache(LongProducer longs) throws Exception { - assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField()); Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); diff --git a/lucene/module-build.xml b/lucene/module-build.xml index df0e9d7ecae..cc5ca70fc52 100644 --- a/lucene/module-build.xml +++ b/lucene/module-build.xml @@ -433,6 +433,28 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestSortedSetFieldSource.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestSortedSetFieldSource.java index c016aa65084..09fc6ae7621 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestSortedSetFieldSource.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestSortedSetFieldSource.java @@ -29,9 +29,7 @@ import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; -@SuppressCodecs({"Lucene40", "Lucene41"}) // avoid codecs that don't support sortedset public class TestSortedSetFieldSource extends LuceneTestCase { public void testSimple() throws Exception { Directory dir = newDirectory(); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java index bc6527aa2b4..523d02cb50b 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java @@ -41,11 +41,9 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; /** */ -@SuppressCodecs("Lucene3x") // binary terms public class TestAnalyzingQueryParser extends LuceneTestCase { private final static String FIELD = "field"; diff --git a/lucene/replicator/src/test/org/apache/lucene/replicator/ReplicatorTestCase.java b/lucene/replicator/src/test/org/apache/lucene/replicator/ReplicatorTestCase.java index 4c613c76276..aca90b7d347 100644 --- a/lucene/replicator/src/test/org/apache/lucene/replicator/ReplicatorTestCase.java +++ b/lucene/replicator/src/test/org/apache/lucene/replicator/ReplicatorTestCase.java @@ -22,7 +22,6 @@ import java.util.Random; import org.apache.http.conn.HttpClientConnectionManager; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.eclipse.jetty.server.Connector; import org.eclipse.jetty.server.Handler; import org.eclipse.jetty.server.Server; @@ -35,7 +34,6 @@ import org.eclipse.jetty.util.ssl.SslContextFactory; import org.eclipse.jetty.util.thread.QueuedThreadPool; import org.junit.AfterClass; -@SuppressCodecs("Lucene3x") public abstract class ReplicatorTestCase extends LuceneTestCase { private static HttpClientConnectionManager clientConnectionManager; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html deleted file mode 100644 index c83302cf5b7..00000000000 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - -Support for testing {@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat}. - - diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java index beac7da45db..0c638360dff 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java @@ -78,13 +78,13 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes doc.add(new BinaryDocValuesField("bdv", new BytesRef(TestUtil.randomSimpleString(random())))); doc.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), 2)))); } - if (defaultCodecSupportsSortedSet()) { + if (codecSupportsSortedSet()) { final int numValues = random().nextInt(5); for (int i = 0; i < numValues; ++i) { doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), 2)))); } } - if (defaultCodecSupportsSortedNumeric()) { + if (codecSupportsSortedNumeric()) { final int numValues = random().nextInt(5); for (int i = 0; i < numValues; ++i) { doc.add(new SortedNumericDocValuesField("sndv", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE))); @@ -711,7 +711,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes iwriter.close(); SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field"); - if (defaultCodecSupportsDocsWithField()) { + if (codecSupportsDocsWithField()) { assertEquals(-1, dv.getOrd(0)); assertEquals(0, dv.getValueCount()); } else { @@ -768,7 +768,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv"); BytesRef scratch = dv.lookupOrd(dv.getOrd(0)); assertEquals(new BytesRef("hello world 2"), scratch); - if (defaultCodecSupportsDocsWithField()) { + if (codecSupportsDocsWithField()) { assertEquals(-1, dv.getOrd(1)); } scratch = dv.get(1); @@ -1056,7 +1056,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes public void testRandomSortedBytes() throws IOException { Directory dir = newDirectory(); IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); - if (!defaultCodecSupportsDocsWithField()) { + if (!codecSupportsDocsWithField()) { // if the codec doesnt support missing, we expect missing to be mapped to byte[] // by the impersonator, but we have to give it a chance to merge them to this cfg.setMergePolicy(newLogMergePolicy()); @@ -1085,14 +1085,14 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes doc.add(newTextField("id", "noValue", Field.Store.YES)); w.addDocument(doc); } - if (!defaultCodecSupportsDocsWithField()) { + if (!codecSupportsDocsWithField()) { BytesRef bytesRef = new BytesRef(); hash.add(bytesRef); // add empty value for the gaps } if (rarely()) { w.commit(); } - if (!defaultCodecSupportsDocsWithField()) { + if (!codecSupportsDocsWithField()) { // if the codec doesnt support missing, we expect missing to be mapped to byte[] // by the impersonator, but we have to give it a chance to merge them to this w.forceMerge(1); @@ -1472,7 +1472,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetOneValue() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); @@ -1497,7 +1497,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetTwoFields() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); @@ -1532,7 +1532,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetTwoDocumentsMerged() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); @@ -1574,7 +1574,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetTwoValues() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); @@ -1604,7 +1604,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetTwoValuesUnordered() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); @@ -1634,7 +1634,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetThreeValuesTwoDocs() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); @@ -1683,7 +1683,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetTwoDocumentsLastMissing() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); @@ -1715,7 +1715,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); @@ -1749,7 +1749,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetTwoDocumentsFirstMissing() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); @@ -1782,7 +1782,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); @@ -1816,7 +1816,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetMergeAwayAllValues() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); @@ -1845,7 +1845,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetTermsEnum() throws IOException { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); @@ -2006,7 +2006,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetFixedLengthVsStoredFields() throws Exception { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { int fixedLength = TestUtil.nextInt(random(), 1, 10); @@ -2015,7 +2015,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedNumericsSingleValuedVsStoredFields() throws Exception { - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedNumericsVsStoredFields( @@ -2036,7 +2036,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedNumericsSingleValuedMissingVsStoredFields() throws Exception { - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedNumericsVsStoredFields( @@ -2057,7 +2057,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedNumericsMultipleValuesVsStoredFields() throws Exception { - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedNumericsVsStoredFields( @@ -2078,7 +2078,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetVariableLengthVsStoredFields() throws Exception { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 16); @@ -2086,7 +2086,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetFixedLengthSingleValuedVsStoredFields() throws Exception { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { int fixedLength = TestUtil.nextInt(random(), 1, 10); @@ -2095,7 +2095,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedSetVariableLengthSingleValuedVsStoredFields() throws Exception { - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 1); @@ -2137,7 +2137,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testTwoNumbersOneMissing() throws IOException { - assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField()); + assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); @@ -2166,7 +2166,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testTwoNumbersOneMissingWithMerging() throws IOException { - assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField()); + assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); @@ -2196,7 +2196,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testThreeNumbersOneMissingWithMerging() throws IOException { - assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField()); + assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); @@ -2232,7 +2232,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testTwoBytesOneMissing() throws IOException { - assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField()); + assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); @@ -2263,7 +2263,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testTwoBytesOneMissingWithMerging() throws IOException { - assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField()); + assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); @@ -2295,7 +2295,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testThreeBytesOneMissingWithMerging() throws IOException { - assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField()); + assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); @@ -2424,9 +2424,9 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes /** Tests dv against stored fields with threads (all types + missing) */ public void testThreads2() throws Exception { - assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField()); - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); @@ -2596,9 +2596,9 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testThreads3() throws Exception { - assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField()); - assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support getDocsWithField", codecSupportsDocsWithField()); + assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); Directory dir = newFSDirectory(createTempDir()); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); @@ -2700,7 +2700,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testOneSortedNumber() throws IOException { - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); Document doc = new Document(); @@ -2721,7 +2721,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testOneSortedNumberOneMissing() throws IOException { - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(null)); Document doc = new Document(); @@ -2750,7 +2750,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testTwoSortedNumber() throws IOException { - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); Document doc = new Document(); @@ -2773,7 +2773,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testTwoSortedNumberOneMissing() throws IOException { - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(null)); Document doc = new Document(); @@ -2804,7 +2804,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedNumberMerge() throws IOException { - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); Directory directory = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMergePolicy(newLogMergePolicy()); @@ -2835,7 +2835,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes } public void testSortedNumberMergeAwayAllValues() throws IOException { - assumeTrue("Codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric()); + assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric()); Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); @@ -2867,4 +2867,20 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes protected boolean codecAcceptsHugeBinaryValues(String field) { return true; } + + /** Returns true if the codec "supports" docsWithField + * (other codecs return MatchAllBits, because you couldnt write missing values before) */ + protected boolean codecSupportsDocsWithField() { + return true; + } + + /** Returns true if the default codec supports SORTED_SET docvalues */ + protected boolean codecSupportsSortedSet() { + return true; + } + + /** Returns true if the default codec supports SORTED_NUMERIC docvalues */ + protected boolean codecSupportsSortedNumeric() { + return true; + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index fbff1590773..a332cb6853b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -466,15 +466,6 @@ public abstract class LuceneTestCase extends Assert { // Fields initialized in class or instance rules. // ----------------------------------------------------------------- - /** - * When {@code true}, Codecs for old Lucene version will support writing - * indexes in that format. Defaults to {@code false}, can be disabled by - * specific tests on demand. - * - * @lucene.internal - */ - public static boolean OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; - // ----------------------------------------------------------------- // Class level (suite) rules. @@ -1682,61 +1673,6 @@ public abstract class LuceneTestCase extends Assert { throw new IOException("Cannot find resource: " + name); } } - - /** Returns true if the default codec supports single valued docvalues with missing values */ - public static boolean defaultCodecSupportsMissingDocValues() { - String name = Codec.getDefault().getName(); - if (name.equals("Lucene3x") || - name.equals("Lucene40") || name.equals("Appending") || - name.equals("Lucene41") || - name.equals("Lucene42")) { - return false; - } - return true; - } - - /** Returns true if the default codec supports SORTED_SET docvalues */ - public static boolean defaultCodecSupportsSortedSet() { - String name = Codec.getDefault().getName(); - if (name.equals("Lucene40") || name.equals("Lucene41")) { - return false; - } - return true; - } - - /** Returns true if the default codec supports SORTED_NUMERIC docvalues */ - public static boolean defaultCodecSupportsSortedNumeric() { - String name = Codec.getDefault().getName(); - if (name.equals("Lucene40") || name.equals("Lucene41") || name.equals("Lucene42") || name.equals("Lucene45") || name.equals("Lucene46")) { - return false; - } - return true; - } - - /** Returns true if the codec "supports" docsWithField - * (other codecs return MatchAllBits, because you couldnt write missing values before) */ - public static boolean defaultCodecSupportsDocsWithField() { - String name = Codec.getDefault().getName(); - if (name.equals("Lucene40") || name.equals("Lucene41") || name.equals("Lucene42")) { - return false; - } - return true; - } - - /** Returns true if the codec "supports" field updates. */ - public static boolean defaultCodecSupportsFieldUpdates() { - String name = Codec.getDefault().getName(); - if (name.equals("Lucene40") || name.equals("Lucene41") || name.equals("Lucene42") || name.equals("Lucene45")) { - return false; - } - return true; - } - - /** Returns true if the codec "supports" writing segment and commit ids. */ - public static boolean defaultCodecSupportsSegmentIds() { - SegmentInfoFormat siFormat = Codec.getDefault().segmentInfoFormat(); - return siFormat instanceof SimpleTextSegmentInfoFormat || siFormat instanceof Lucene46SegmentInfoFormat; - } public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException { assertReaderStatisticsEquals(info, leftReader, rightReader); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java index efc53f84a32..6a3d4dae101 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java @@ -34,14 +34,7 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.asserting.AssertingCodec; import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec; import org.apache.lucene.codecs.compressing.CompressingCodec; -import org.apache.lucene.codecs.lucene40.Lucene40RWCodec; -import org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat; -import org.apache.lucene.codecs.lucene41.Lucene41RWCodec; import org.apache.lucene.codecs.lucene410.Lucene410Codec; -import org.apache.lucene.codecs.lucene42.Lucene42RWCodec; -import org.apache.lucene.codecs.lucene45.Lucene45RWCodec; -import org.apache.lucene.codecs.lucene46.Lucene46RWCodec; -import org.apache.lucene.codecs.lucene49.Lucene49RWCodec; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.index.RandomCodec; @@ -155,61 +148,9 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule { avoidCodecs.addAll(Arrays.asList(a.value())); } - // set back to default - LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; - savedCodec = Codec.getDefault(); int randomVal = random.nextInt(11); - if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && - "random".equals(TEST_POSTINGSFORMAT) && - "random".equals(TEST_DOCVALUESFORMAT) && - randomVal == 0 && - !shouldAvoidCodec("Lucene40"))) { - codec = Codec.forName("Lucene40"); - LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; - assert codec instanceof Lucene40RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; - assert (PostingsFormat.forName("Lucene40") instanceof Lucene40RWPostingsFormat) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; - } else if ("Lucene41".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && - "random".equals(TEST_POSTINGSFORMAT) && - "random".equals(TEST_DOCVALUESFORMAT) && - randomVal == 1 && - !shouldAvoidCodec("Lucene41"))) { - codec = Codec.forName("Lucene41"); - LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; - assert codec instanceof Lucene41RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; - } else if ("Lucene42".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && - "random".equals(TEST_POSTINGSFORMAT) && - "random".equals(TEST_DOCVALUESFORMAT) && - randomVal == 2 && - !shouldAvoidCodec("Lucene42"))) { - codec = Codec.forName("Lucene42"); - LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; - assert codec instanceof Lucene42RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; - } else if ("Lucene45".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && - "random".equals(TEST_POSTINGSFORMAT) && - "random".equals(TEST_DOCVALUESFORMAT) && - randomVal == 3 && - !shouldAvoidCodec("Lucene45"))) { - codec = Codec.forName("Lucene45"); - LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; - assert codec instanceof Lucene45RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; - } else if ("Lucene46".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && - "random".equals(TEST_POSTINGSFORMAT) && - "random".equals(TEST_DOCVALUESFORMAT) && - randomVal == 4 && - !shouldAvoidCodec("Lucene46"))) { - codec = Codec.forName("Lucene46"); - LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; - assert codec instanceof Lucene46RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; - } else if ("Lucene49".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && - "random".equals(TEST_POSTINGSFORMAT) && - "random".equals(TEST_DOCVALUESFORMAT) && - randomVal == 5 && - !shouldAvoidCodec("Lucene49"))) { - codec = Codec.forName("Lucene49"); - LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; - assert codec instanceof Lucene49RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; - } else if (("random".equals(TEST_POSTINGSFORMAT) == false) || ("random".equals(TEST_DOCVALUESFORMAT) == false)) { + if (("random".equals(TEST_POSTINGSFORMAT) == false) || ("random".equals(TEST_DOCVALUESFORMAT) == false)) { // the user wired postings or DV: this is messy // refactor into RandomCodec.... diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec index 2f5cc9212d1..282f5dd20c8 100644 --- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -19,9 +19,3 @@ org.apache.lucene.codecs.compressing.FastCompressingCodec org.apache.lucene.codecs.compressing.FastDecompressionCompressingCodec org.apache.lucene.codecs.compressing.HighCompressionCompressingCodec org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec -org.apache.lucene.codecs.lucene40.Lucene40RWCodec -org.apache.lucene.codecs.lucene41.Lucene41RWCodec -org.apache.lucene.codecs.lucene42.Lucene42RWCodec -org.apache.lucene.codecs.lucene45.Lucene45RWCodec -org.apache.lucene.codecs.lucene46.Lucene46RWCodec -org.apache.lucene.codecs.lucene49.Lucene49RWCodec diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat index 2086be1f0e7..d1798334486 100644 --- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat +++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat @@ -14,4 +14,3 @@ # limitations under the License. org.apache.lucene.codecs.asserting.AssertingDocValuesFormat -org.apache.lucene.codecs.lucene42.Lucene42RWDocValuesFormat diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index f85f32d7550..ed5309c7e03 100644 --- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -21,4 +21,3 @@ org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings org.apache.lucene.codecs.asserting.AssertingPostingsFormat -org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat diff --git a/solr/common-build.xml b/solr/common-build.xml index 8a33d968014..7c2a145d706 100644 --- a/solr/common-build.xml +++ b/solr/common-build.xml @@ -88,6 +88,7 @@ + @@ -156,7 +157,7 @@ @@ -233,7 +234,7 @@ - + @@ -299,6 +300,7 @@ + diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java index 27c695dd6f6..f3d45752dca 100644 --- a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java +++ b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java @@ -22,9 +22,6 @@ import java.io.FileOutputStream; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.TestUtil; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.SolrTestCaseJ4; import org.junit.BeforeClass; @@ -35,7 +32,6 @@ import com.ibm.icu.util.ULocale; /** * Tests {@link ICUCollationField} with docValues. */ -@SuppressCodecs({"Lucene40", "Lucene41"}) public class TestICUCollationFieldDocValues extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java index a73dd5d021c..6d8c0863e6a 100644 --- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java @@ -46,7 +46,6 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.lucene.util.Constants; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.ORDER; @@ -90,7 +89,6 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Conseque @ThreadLeakLingering(linger = 0) @ThreadLeakZombies(Consequence.CONTINUE) @ThreadLeakScope(Scope.NONE) -@SuppressCodecs({"Lucene3x", "Lucene40"}) @SuppressSSL // SSL does not work with this test for currently unknown reasons @Slow public class MorphlineGoLiveMiniMRTest extends AbstractFullDistribZkTestBase { diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java index 4c11b4c45f5..be5626ccdaa 100644 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java @@ -22,7 +22,6 @@ import java.util.Iterator; import org.apache.lucene.util.Constants; import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.request.QueryRequest; @@ -48,7 +47,6 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Conseque @ThreadLeakLingering(linger = 0) @ThreadLeakZombies(Consequence.CONTINUE) @ThreadLeakScope(Scope.NONE) -@SuppressCodecs({"Lucene3x", "Lucene40"}) @Slow public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase { diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java index 095fb1df302..816e827a1d5 100644 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java @@ -30,7 +30,6 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.lucene.util.Constants; import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; @@ -54,7 +53,6 @@ import com.google.common.io.Files; @ThreadLeakLingering(linger = 0) @ThreadLeakZombies(Consequence.CONTINUE) @ThreadLeakScope(Scope.NONE) -@SuppressCodecs({"Lucene3x", "Lucene40"}) @Slow public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase { diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java index dbea3234590..69865a6bec6 100644 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java @@ -21,7 +21,6 @@ import java.util.Iterator; import org.apache.lucene.util.Constants; import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; @@ -42,7 +41,6 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Conseque @ThreadLeakLingering(linger = 0) @ThreadLeakZombies(Consequence.CONTINUE) @ThreadLeakScope(Scope.NONE) -@SuppressCodecs({"Lucene3x", "Lucene40"}) @Slow public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase { diff --git a/solr/core/src/test/org/apache/solr/CursorPagingTest.java b/solr/core/src/test/org/apache/solr/CursorPagingTest.java index aa711e4e967..3b7c8a2c08d 100644 --- a/solr/core/src/test/org/apache/solr/CursorPagingTest.java +++ b/solr/core/src/test/org/apache/solr/CursorPagingTest.java @@ -125,7 +125,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 { SolrParams params = null; final String intsort = "int" + (random().nextBoolean() ? "" : "_dv"); - final String intmissingsort = defaultCodecSupportsMissingDocValues() ? intsort : "int"; + final String intmissingsort = intsort; // trivial base case: ensure cursorMark against an empty index doesn't blow up cursorMark = CURSOR_MARK_START; @@ -633,26 +633,15 @@ public class CursorPagingTest extends SolrTestCaseJ4 { *

*
    *
  • _version_ is removed
  • - *
  • - * *_dv_last, *_dv_first and *_dv - * fields are removed if the codec doesn't support missing DocValues - *
  • *
- * @see #defaultCodecSupportsMissingDocValues */ public static List pruneAndDeterministicallySort(Collection raw) { - final boolean prune_dv = ! defaultCodecSupportsMissingDocValues(); - ArrayList names = new ArrayList<>(37); for (String f : raw) { if (f.equals("_version_")) { continue; } - if (prune_dv && (f.endsWith("_dv_last") || f.endsWith("_dv_first")) - || f.endsWith("_dv")) { - continue; - } names.add(f); } diff --git a/solr/core/src/test/org/apache/solr/DistributedIntervalFacetingTest.java b/solr/core/src/test/org/apache/solr/DistributedIntervalFacetingTest.java index 04a6048d17b..275e9945e6b 100644 --- a/solr/core/src/test/org/apache/solr/DistributedIntervalFacetingTest.java +++ b/solr/core/src/test/org/apache/solr/DistributedIntervalFacetingTest.java @@ -4,7 +4,6 @@ import java.util.Arrays; import java.util.Comparator; import java.util.List; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.response.IntervalFacet.Count; @@ -29,7 +28,6 @@ import org.junit.BeforeClass; * limitations under the License. */ @Slow -@LuceneTestCase.SuppressCodecs({"Lucene40", "Lucene41", "Lucene42", "Lucene43"}) public class DistributedIntervalFacetingTest extends BaseDistributedSearchTestCase { diff --git a/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java b/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java index 1e75ab24e31..135f0e905d1 100644 --- a/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java +++ b/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java @@ -25,7 +25,6 @@ import java.util.Random; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.SchemaField; @@ -38,7 +37,6 @@ import org.junit.Test; * to the indexed facet results as if it were just another faceting method. */ @Slow -@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) public class TestRandomDVFaceting extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/core/src/test/org/apache/solr/analytics/AbstractAnalyticsStatsTest.java b/solr/core/src/test/org/apache/solr/analytics/AbstractAnalyticsStatsTest.java index e59758d08bb..2089a79c6c1 100644 --- a/solr/core/src/test/org/apache/solr/analytics/AbstractAnalyticsStatsTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/AbstractAnalyticsStatsTest.java @@ -36,7 +36,6 @@ import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.apache.commons.lang.StringUtils; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.analytics.util.MedianCalculator; @@ -48,7 +47,6 @@ import org.xml.sax.SAXException; import com.google.common.collect.ObjectArrays; -@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"}) public class AbstractAnalyticsStatsTest extends SolrTestCaseJ4 { protected static final String[] BASEPARMS = new String[]{ "q", "*:*", "indent", "true", "olap", "true", "rows", "0" }; diff --git a/solr/core/src/test/org/apache/solr/analytics/NoFacetTest.java b/solr/core/src/test/org/apache/solr/analytics/NoFacetTest.java index a18e9d627d9..f217150c22b 100644 --- a/solr/core/src/test/org/apache/solr/analytics/NoFacetTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/NoFacetTest.java @@ -21,11 +21,9 @@ package org.apache.solr.analytics; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.junit.BeforeClass; import org.junit.Test; -@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"}) public class NoFacetTest extends AbstractAnalyticsStatsTest { static String fileName = "/analytics/requestFiles/noFacets.txt"; diff --git a/solr/core/src/test/org/apache/solr/analytics/expression/ExpressionTest.java b/solr/core/src/test/org/apache/solr/analytics/expression/ExpressionTest.java index 5819254b429..5cf8be110d2 100644 --- a/solr/core/src/test/org/apache/solr/analytics/expression/ExpressionTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/expression/ExpressionTest.java @@ -20,7 +20,6 @@ package org.apache.solr.analytics.expression; import com.google.common.collect.ObjectArrays; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.analytics.AbstractAnalyticsStatsTest; import org.apache.solr.request.SolrQueryRequest; @@ -34,7 +33,6 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.Scanner; -@SuppressCodecs({"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Appending", "Asserting"}) public class ExpressionTest extends AbstractAnalyticsStatsTest { private static final String fileName = "/analytics/requestFiles/expressions.txt"; diff --git a/solr/core/src/test/org/apache/solr/analytics/facet/AbstractAnalyticsFacetTest.java b/solr/core/src/test/org/apache/solr/analytics/facet/AbstractAnalyticsFacetTest.java index 7abd79a3ad9..3d3ca3f0949 100644 --- a/solr/core/src/test/org/apache/solr/analytics/facet/AbstractAnalyticsFacetTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/facet/AbstractAnalyticsFacetTest.java @@ -31,7 +31,6 @@ import java.util.List; import java.util.Scanner; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.analytics.util.MedianCalculator; import org.apache.solr.analytics.util.PercentileCalculator; @@ -52,7 +51,6 @@ import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; -@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"}) public class AbstractAnalyticsFacetTest extends SolrTestCaseJ4 { protected static final HashMap defaults = new HashMap<>(); diff --git a/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetExtrasTest.java b/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetExtrasTest.java index 8377ccd4822..09e63fbc43e 100644 --- a/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetExtrasTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetExtrasTest.java @@ -23,12 +23,10 @@ import java.util.Collection; import java.util.Collections; import java.util.List; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; -@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"}) public class FieldFacetExtrasTest extends AbstractAnalyticsFacetTest { static String fileName = "/analytics/requestFiles/fieldFacetExtras.txt"; diff --git a/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetTest.java b/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetTest.java index 12cfe37cb6a..2eab53a2423 100644 --- a/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetTest.java @@ -23,13 +23,11 @@ import java.util.Collection; import java.util.Collections; import java.util.List; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; -@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"}) public class FieldFacetTest extends AbstractAnalyticsFacetTest{ static String fileName = "/analytics/requestFiles/fieldFacets.txt"; diff --git a/solr/core/src/test/org/apache/solr/analytics/facet/QueryFacetTest.java b/solr/core/src/test/org/apache/solr/analytics/facet/QueryFacetTest.java index 4516c956b94..8c5787d1932 100644 --- a/solr/core/src/test/org/apache/solr/analytics/facet/QueryFacetTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/facet/QueryFacetTest.java @@ -20,11 +20,9 @@ package org.apache.solr.analytics.facet; import java.util.ArrayList; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.junit.BeforeClass; import org.junit.Test; -@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"}) public class QueryFacetTest extends AbstractAnalyticsFacetTest { static String fileName = "/analytics/requestFiles/queryFacets.txt"; diff --git a/solr/core/src/test/org/apache/solr/analytics/facet/RangeFacetTest.java b/solr/core/src/test/org/apache/solr/analytics/facet/RangeFacetTest.java index d7477df2aed..c6e7494875d 100644 --- a/solr/core/src/test/org/apache/solr/analytics/facet/RangeFacetTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/facet/RangeFacetTest.java @@ -20,12 +20,10 @@ package org.apache.solr.analytics.facet; import java.util.ArrayList; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.junit.BeforeClass; import org.junit.Test; -@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"}) public class RangeFacetTest extends AbstractAnalyticsFacetTest { static String fileName = "/analytics/requestFiles/rangeFacets.txt"; diff --git a/solr/core/src/test/org/apache/solr/analytics/util/valuesource/FunctionTest.java b/solr/core/src/test/org/apache/solr/analytics/util/valuesource/FunctionTest.java index 29b97d3f1a8..6a91401843f 100644 --- a/solr/core/src/test/org/apache/solr/analytics/util/valuesource/FunctionTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/util/valuesource/FunctionTest.java @@ -18,13 +18,11 @@ package org.apache.solr.analytics.util.valuesource; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.analytics.AbstractAnalyticsStatsTest; import org.apache.solr.analytics.facet.AbstractAnalyticsFacetTest; import org.junit.BeforeClass; import org.junit.Test; -@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"}) public class FunctionTest extends AbstractAnalyticsStatsTest { static String fileName = "/analytics/requestFiles/functions.txt"; diff --git a/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java b/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java index 9c95441cd1e..fffcf6d5225 100644 --- a/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java @@ -149,7 +149,7 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase { QueryResponse rsp = null; final String intsort = "int" + (random().nextBoolean() ? "" : "_dv"); - final String intmissingsort = defaultCodecSupportsMissingDocValues() ? intsort : "int"; + final String intmissingsort = intsort; // trivial base case: ensure cursorMark against an empty index doesn't blow up cursorMark = CURSOR_MARK_START; diff --git a/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java index ec41fe1a47a..157ca6af749 100644 --- a/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java +++ b/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java @@ -17,7 +17,6 @@ package org.apache.solr.highlight; * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.handler.component.HighlightComponent; import org.apache.solr.schema.IndexSchema; diff --git a/solr/core/src/test/org/apache/solr/request/TestIntervalFaceting.java b/solr/core/src/test/org/apache/solr/request/TestIntervalFaceting.java index 266f95d317e..508c0ceabf6 100644 --- a/solr/core/src/test/org/apache/solr/request/TestIntervalFaceting.java +++ b/solr/core/src/test/org/apache/solr/request/TestIntervalFaceting.java @@ -21,7 +21,6 @@ import java.util.Arrays; import java.util.Comparator; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; @@ -36,7 +35,6 @@ import org.apache.solr.util.RefCounted; import org.junit.BeforeClass; import org.junit.Test; -@LuceneTestCase.SuppressCodecs({"Lucene40", "Lucene41", "Lucene42", "Lucene43"}) public class TestIntervalFaceting extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/core/src/test/org/apache/solr/response/TestSortingResponseWriter.java b/solr/core/src/test/org/apache/solr/response/TestSortingResponseWriter.java index 6f306fdeaf6..d18684da639 100644 --- a/solr/core/src/test/org/apache/solr/response/TestSortingResponseWriter.java +++ b/solr/core/src/test/org/apache/solr/response/TestSortingResponseWriter.java @@ -19,9 +19,7 @@ package org.apache.solr.response; import org.apache.solr.SolrTestCaseJ4; import org.junit.*; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; -@SuppressCodecs({"Lucene3x", "Lucene40","Lucene41","Lucene42","Lucene45"}) public class TestSortingResponseWriter extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { diff --git a/solr/core/src/test/org/apache/solr/schema/DocValuesMissingTest.java b/solr/core/src/test/org/apache/solr/schema/DocValuesMissingTest.java index 0483e8c139d..eba83b0eaee 100644 --- a/solr/core/src/test/org/apache/solr/schema/DocValuesMissingTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DocValuesMissingTest.java @@ -17,14 +17,12 @@ package org.apache.solr.schema; * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.SolrTestCaseJ4; import org.junit.BeforeClass; /** * Tests things like sorting on docvalues with missing values */ -@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // old formats cannot represent missing values public class DocValuesMissingTest extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/core/src/test/org/apache/solr/schema/DocValuesMultiTest.java b/solr/core/src/test/org/apache/solr/schema/DocValuesMultiTest.java index 95d7c4b4a36..6386b3b34ee 100644 --- a/solr/core/src/test/org/apache/solr/schema/DocValuesMultiTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DocValuesMultiTest.java @@ -21,7 +21,6 @@ import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.core.SolrCore; import org.apache.solr.search.SolrIndexSearcher; @@ -30,7 +29,6 @@ import org.junit.BeforeClass; import java.io.IOException; -@SuppressCodecs({"Lucene40", "Lucene41"}) public class DocValuesMultiTest extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/core/src/test/org/apache/solr/schema/TestCollationFieldDocValues.java b/solr/core/src/test/org/apache/solr/schema/TestCollationFieldDocValues.java index 2bc5cfc32b8..6e2927ac59b 100644 --- a/solr/core/src/test/org/apache/solr/schema/TestCollationFieldDocValues.java +++ b/solr/core/src/test/org/apache/solr/schema/TestCollationFieldDocValues.java @@ -23,9 +23,6 @@ import java.text.Collator; import java.text.RuleBasedCollator; import java.util.Locale; -import org.apache.lucene.util.TestUtil; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; - import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.solr.SolrTestCaseJ4; @@ -34,7 +31,6 @@ import org.junit.BeforeClass; /** * Tests {@link CollationField} with docvalues */ -@SuppressCodecs({"Lucene40", "Lucene41"}) public class TestCollationFieldDocValues extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java b/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java index e234ff7dc3e..eb57c9e918f 100644 --- a/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java +++ b/solr/core/src/test/org/apache/solr/search/TestFieldSortValues.java @@ -17,7 +17,6 @@ package org.apache.solr.search; * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.SolrTestCaseJ4; import org.junit.BeforeClass; @@ -25,7 +24,6 @@ import org.junit.BeforeClass; /** * Test QueryComponent.doFieldSortValues */ -@SuppressCodecs({"Lucene3x"}) public class TestFieldSortValues extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java index 1e684c07d8d..27dfe5b8069 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java +++ b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java @@ -17,15 +17,12 @@ package org.apache.solr.search; * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.SolrTestCaseJ4; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; //Unlike TestSolr4Spatial, not parameterized / not generic. -//We exclude Codecs that don't support DocValues (though not sure if this list is quite right) -@LuceneTestCase.SuppressCodecs({"Lucene3x", "Appending", "Lucene40", "Lucene41"}) public class TestSolr4Spatial2 extends SolrTestCaseJ4 { @BeforeClass