From 78e445ce579a90d50e1050695517a20bb484d6b9 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 15 Oct 2014 16:58:30 +0000 Subject: [PATCH] LUCENE-6006: remove unnecessary FieldInfo.normType git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1632120 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../lucene/codecs/UndeadNormsProducer.java | 113 +++++++++++++++ .../lucene40/Lucene40FieldInfosFormat.java | 13 +- .../codecs/lucene40/Lucene40NormsFormat.java | 7 +- .../codecs/lucene40/Lucene40NormsReader.java | 6 + .../lucene42/Lucene42FieldInfosFormat.java | 15 +- .../codecs/lucene42/Lucene42NormsFormat.java | 7 +- .../lucene42/Lucene42NormsProducer.java | 6 + .../lucene46/Lucene46FieldInfosFormat.java | 17 ++- .../codecs/lucene49/Lucene49NormsFormat.java | 7 +- .../lucene49/Lucene49NormsProducer.java | 8 +- .../org/apache/lucene/codecs/package.html | 25 ++++ .../lucene40/Lucene40RWFieldInfosFormat.java | 2 +- .../lucene40/TestLucene40FieldInfoFormat.java | 2 +- .../lucene40/TestLucene40NormsFormat.java | 107 +++++++++++++++ .../codecs/lucene40/index.40.undeadnorms.zip | Bin 0 -> 2356 bytes .../lucene42/Lucene42RWFieldInfosFormat.java | 2 +- .../lucene42/TestLucene42NormsFormat.java | 108 ++++++++++++++- .../codecs/lucene42/index.42.undeadnorms.zip | Bin 0 -> 2419 bytes .../lucene46/TestLucene46UndeadNorms.java | 129 ++++++++++++++++++ .../codecs/lucene46/index.46.undeadnorms.zip | Bin 0 -> 1613 bytes .../lucene49/TestLucene49NormsFormat.java | 107 +++++++++++++++ .../codecs/lucene49/index.49.undeadnorms.zip | Bin 0 -> 1899 bytes .../index/TestBackwardsCompatibility.java | 3 +- .../simpletext/SimpleTextDocValuesWriter.java | 3 +- .../SimpleTextFieldInfosFormat.java | 14 +- .../lucene50/Lucene50FieldInfosFormat.java | 13 +- .../org/apache/lucene/index/CheckIndex.java | 8 +- .../lucene/index/DefaultIndexingChain.java | 27 ++-- .../org/apache/lucene/index/FieldInfo.java | 37 +---- .../org/apache/lucene/index/FieldInfos.java | 22 ++- .../lucene/index/memory/MemoryIndex.java | 2 +- .../lucene/uninverting/UninvertingReader.java | 12 +- .../asserting/AssertingNormsFormat.java | 2 +- .../index/BaseFieldInfoFormatTestCase.java | 6 - .../lucene/index/BaseNormsFormatTestCase.java | 59 +++++++- .../index/BasePostingsFormatTestCase.java | 4 +- .../java/org/apache/solr/search/Insanity.java | 2 +- 38 files changed, 766 insertions(+), 133 deletions(-) create mode 100644 lucene/backward-codecs/src/java/org/apache/lucene/codecs/UndeadNormsProducer.java create mode 100644 lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/index.40.undeadnorms.zip create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/index.42.undeadnorms.zip create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/TestLucene46UndeadNorms.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/index.46.undeadnorms.zip create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/index.49.undeadnorms.zip diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 98ac77f99e9..9e08556773e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -160,6 +160,10 @@ API Changes their jflex impl directly. (Ryan Ernst) +* LUCENE-6006: Removed FieldInfo.normType since it's redundant: it + will be DocValuesType.NUMERIC if the field indexed and does not omit + norms, else null. (Robert Muir, Mike McCandless) + Bug Fixes * LUCENE-5650: Enforce read-only access to any path outside the temporary diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/UndeadNormsProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/UndeadNormsProducer.java new file mode 100644 index 00000000000..fa5b4f7cff8 --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/UndeadNormsProducer.java @@ -0,0 +1,113 @@ +package org.apache.lucene.codecs; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.util.Accountable; + +/** + * Used only for backwards compatibility corner case, to provide + * re-animated norms when all fields are undead. + * + * @lucene.internal */ +public class UndeadNormsProducer extends NormsProducer { + + /** Used to bring undead norms back to life. */ + public final static String LEGACY_UNDEAD_NORMS_KEY = UndeadNormsProducer.class.getSimpleName() + ".undeadnorms"; + + /** Use this instance */ + public final static NormsProducer INSTANCE = new UndeadNormsProducer(); + + private UndeadNormsProducer() { + } + + /* Returns true if all indexed fields have undead norms. */ + public static boolean isUndeadArmy(FieldInfos fieldInfos) { + + boolean everythingIsUndead = true; + for(FieldInfo fieldInfo : fieldInfos) { + if (fieldInfo.hasNorms()) { + String isUndead = fieldInfo.getAttribute(LEGACY_UNDEAD_NORMS_KEY); + if (isUndead != null) { + assert "true".equals(isUndead); + } else { + everythingIsUndead = false; + } + } + } + + return everythingIsUndead; + } + + /** Returns true if this field has undead norms. */ + public static boolean isUndead(FieldInfo fieldInfo) { + String isUndead = fieldInfo.getAttribute(LEGACY_UNDEAD_NORMS_KEY); + if (isUndead != null) { + // Bring undead norms back to life; this is set in Lucene40FieldInfosFormat, to emulate pre-5.0 undead norms + assert "true".equals(isUndead); + return true; + } else { + return false; + } + } + + /** Call this to note that the field with these attributes has undead norms. */ + public static void setUndead(Map attributes) { + attributes.put(LEGACY_UNDEAD_NORMS_KEY, "true"); + } + + @Override + public NumericDocValues getNorms(FieldInfo field) throws IOException { + return DocValues.emptyNumeric(); + } + + @Override + public void close() { + } + + @Override + public long ramBytesUsed() { + return 0; + } + + @Override + public Iterable getChildResources() { + return Collections.emptyList(); + } + + @Override + public void checkIntegrity() throws IOException { + } + + @Override + public NormsProducer getMergeInstance() throws IOException { + return this; + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java index f4a1e326469..146542fb8e7 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java @@ -23,13 +23,14 @@ import java.util.Map; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.UndeadNormsProducer; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.FieldInfo.DocValuesType; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -91,7 +92,7 @@ public class Lucene40FieldInfosFormat extends FieldInfosFormat { byte val = input.readByte(); final LegacyDocValuesType oldValuesType = getDocValuesType((byte) (val & 0x0F)); final LegacyDocValuesType oldNormsType = getDocValuesType((byte) ((val >>> 4) & 0x0F)); - final Map attributes = input.readStringStringMap();; + final Map attributes = input.readStringStringMap(); if (oldValuesType.mapping != null) { attributes.put(LEGACY_DV_TYPE_KEY, oldValuesType.name()); } @@ -101,8 +102,12 @@ public class Lucene40FieldInfosFormat extends FieldInfosFormat { } attributes.put(LEGACY_NORM_TYPE_KEY, oldNormsType.name()); } + if (isIndexed && omitNorms == false && oldNormsType.mapping == null) { + // Undead norms! Lucene40NormsReader will check this and bring norms back from the dead: + UndeadNormsProducer.setUndead(attributes); + } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, - omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, -1, Collections.unmodifiableMap(attributes)); + omitNorms, storePayloads, indexOptions, oldValuesType.mapping, -1, Collections.unmodifiableMap(attributes)); } CodecUtil.checkEOF(input); diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java index 85c0931ce03..7a84fbbd0f3 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.lucene.codecs.NormsConsumer; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.NormsProducer; +import org.apache.lucene.codecs.UndeadNormsProducer; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; @@ -46,6 +47,10 @@ public class Lucene40NormsFormat extends NormsFormat { String filename = IndexFileNames.segmentFileName(state.segmentInfo.name, "nrm", Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION); - return new Lucene40NormsReader(state, filename); + if (UndeadNormsProducer.isUndeadArmy(state.fieldInfos)) { + return UndeadNormsProducer.INSTANCE; + } else { + return new Lucene40NormsReader(state, filename); + } } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java index e52491204d7..490c43a4645 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.NormsProducer; +import org.apache.lucene.codecs.UndeadNormsProducer; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; @@ -45,6 +47,10 @@ final class Lucene40NormsReader extends NormsProducer { @Override public NumericDocValues getNorms(FieldInfo field) throws IOException { + if (UndeadNormsProducer.isUndead(field)) { + // Bring undead norms back to life; this is set in Lucene40FieldInfosFormat, to emulate pre-5.0 undead norms + return DocValues.emptyNumeric(); + } return impl.getNumeric(field); } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java index 6ebfd86ac20..8c33abf6460 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java @@ -23,13 +23,14 @@ import java.util.Map; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.UndeadNormsProducer; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.FieldInfo.DocValuesType; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -41,7 +42,7 @@ import org.apache.lucene.util.IOUtils; */ @Deprecated public class Lucene42FieldInfosFormat extends FieldInfosFormat { - + /** Sole constructor. */ public Lucene42FieldInfosFormat() { } @@ -86,8 +87,14 @@ public class Lucene42FieldInfosFormat extends FieldInfosFormat { final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F)); final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F)); final Map attributes = input.readStringStringMap(); + + if (isIndexed && omitNorms == false && normsType == null) { + // Undead norms! Lucene42NormsProducer will check this and bring norms back from the dead: + UndeadNormsProducer.setUndead(attributes); + } + infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, - omitNorms, storePayloads, indexOptions, docValuesType, normsType, -1, Collections.unmodifiableMap(attributes)); + omitNorms, storePayloads, indexOptions, docValuesType, -1, Collections.unmodifiableMap(attributes)); } CodecUtil.checkEOF(input); diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java index b021eb58463..c2123d141d2 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.lucene.codecs.NormsConsumer; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.NormsProducer; +import org.apache.lucene.codecs.UndeadNormsProducer; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.packed.PackedInts; @@ -62,7 +63,11 @@ public class Lucene42NormsFormat extends NormsFormat { @Override public final NormsProducer normsProducer(SegmentReadState state) throws IOException { - return new Lucene42NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION); + if (UndeadNormsProducer.isUndeadArmy(state.fieldInfos)) { + return UndeadNormsProducer.INSTANCE; + } else { + return new Lucene42NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION); + } } static final String DATA_CODEC = "Lucene41NormsData"; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsProducer.java index 8635f0628b6..7048b1d42b8 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsProducer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsProducer.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.NormsProducer; +import org.apache.lucene.codecs.UndeadNormsProducer; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; @@ -45,6 +47,10 @@ final class Lucene42NormsProducer extends NormsProducer { @Override public NumericDocValues getNorms(FieldInfo field) throws IOException { + if (UndeadNormsProducer.isUndead(field)) { + // Bring undead norms back to life; this is set in Lucene42FieldInfosFormat, to emulate pre-5.0 undead norms + return DocValues.emptyNumeric(); + } return impl.getNumeric(field); } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java index 9318d6d4212..36858a9956a 100755 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java @@ -23,13 +23,14 @@ import java.util.Map; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.UndeadNormsProducer; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.FieldInfo.DocValuesType; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -42,7 +43,7 @@ import org.apache.lucene.store.IndexOutput; */ @Deprecated public final class Lucene46FieldInfosFormat extends FieldInfosFormat { - + /** Sole constructor. */ public Lucene46FieldInfosFormat() { } @@ -88,8 +89,14 @@ public final class Lucene46FieldInfosFormat extends FieldInfosFormat { final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F)); final long dvGen = input.readLong(); final Map attributes = input.readStringStringMap(); + + if (isIndexed && omitNorms == false && normsType == null) { + // Undead norms! Lucene42NormsProducer will check this and bring norms back from the dead: + UndeadNormsProducer.setUndead(attributes); + } + infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, - omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes)); + omitNorms, storePayloads, indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(attributes)); } if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) { @@ -148,7 +155,7 @@ public final class Lucene46FieldInfosFormat extends FieldInfosFormat { // pack the DV types in one byte final byte dv = docValuesByte(fi.getDocValuesType()); - final byte nrm = docValuesByte(fi.getNormType()); + final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : null); assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; byte val = (byte) (0xff & ((nrm << 4) | dv)); output.writeByte(val); diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java index 0f4f512c87e..f9b5fa75967 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.lucene.codecs.NormsConsumer; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.NormsProducer; +import org.apache.lucene.codecs.UndeadNormsProducer; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; @@ -42,7 +43,11 @@ public class Lucene49NormsFormat extends NormsFormat { @Override public final NormsProducer normsProducer(SegmentReadState state) throws IOException { - return new Lucene49NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION); + if (UndeadNormsProducer.isUndeadArmy(state.fieldInfos)) { + return UndeadNormsProducer.INSTANCE; + } else { + return new Lucene49NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION); + } } static final String DATA_CODEC = "Lucene49NormsData"; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java index 01fd39c5222..aab84587407 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java @@ -25,7 +25,9 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.NormsProducer; +import org.apache.lucene.codecs.UndeadNormsProducer; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; @@ -40,8 +42,8 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.BlockPackedReader; import org.apache.lucene.util.packed.PackedInts; -import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_START; import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_CURRENT; +import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_START; /** * Reader for 4.9 norms @@ -153,6 +155,10 @@ final class Lucene49NormsProducer extends NormsProducer { @Override public synchronized NumericDocValues getNorms(FieldInfo field) throws IOException { + if (UndeadNormsProducer.isUndead(field)) { + // Bring undead norms back to life; this is set in Lucene46FieldInfosFormat, to emulate pre-5.0 undead norms + return DocValues.emptyNumeric(); + } NumericDocValues instance = instances.get(field.name); if (instance == null) { instance = loadNorms(field); diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html new file mode 100644 index 00000000000..6defdbe769e --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html @@ -0,0 +1,25 @@ + + + + + + + +Common APIs for use by backwards compatibility codecs. + + diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWFieldInfosFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWFieldInfosFormat.java index e9fd7cdfb49..ea671b85659 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWFieldInfosFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40RWFieldInfosFormat.java @@ -75,7 +75,7 @@ public final class Lucene40RWFieldInfosFormat extends Lucene40FieldInfosFormat { // pack the DV types in one byte final byte dv = docValuesByte(fi.getDocValuesType(), fi.getAttribute(LEGACY_DV_TYPE_KEY)); - final byte nrm = docValuesByte(fi.getNormType(), fi.getAttribute(LEGACY_NORM_TYPE_KEY)); + final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : null, fi.getAttribute(LEGACY_NORM_TYPE_KEY)); assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; byte val = (byte) (0xff & ((nrm << 4) | dv)); output.writeByte(val); diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40FieldInfoFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40FieldInfoFormat.java index 01feb0551cc..7ff112c7809 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40FieldInfoFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40FieldInfoFormat.java @@ -64,7 +64,7 @@ public class TestLucene40FieldInfoFormat extends BaseFieldInfoFormatTestCase { } } - if (fi.getNormType() != null) { + if (fi.hasNorms()) { fi.putAttribute(Lucene40FieldInfosFormat.LEGACY_NORM_TYPE_KEY, LegacyDocValuesType.FIXED_INTS_8.name()); } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java index 51b72558072..72c01dad171 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java @@ -17,8 +17,18 @@ package org.apache.lucene.codecs.lucene40; * limitations under the License. */ +import java.io.InputStream; +import java.nio.file.Path; + import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.BaseNormsFormatTestCase; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.TestUtil; /** Tests Lucene40's norms format */ public class TestLucene40NormsFormat extends BaseNormsFormatTestCase { @@ -28,4 +38,101 @@ public class TestLucene40NormsFormat extends BaseNormsFormatTestCase { protected Codec getCodec() { return codec; } + + /** Copy this back to /l/400/lucene/CreateUndeadNorms.java, then: + * - ant clean + * - pushd analysis/common; ant jar; popd + * - pushd core; ant jar; popd + * - javac -cp build/analysis/common/lucene-analyzers-common-4.0-SNAPSHOT.jar:build/core/lucene-core-4.0-SNAPSHOT.jar CreateUndeadNorms.java + * - java -cp .:build/analysis/common/lucene-analyzers-common-4.0-SNAPSHOT.jar:build/core/lucene-core-4.0-SNAPSHOT.jar CreateUndeadNorms + * - cd /tmp/undeadnorms ; zip index.40.undeadnorms.zip * + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Version; + +public class CreateUndeadNorms { + public static void main(String[] args) throws Exception { + File file = new File("/tmp/undeadnorms"); + if (file.exists()) { + throw new RuntimeException("please remove /tmp/undeadnorms first"); + } + Directory dir = FSDirectory.open(new File("/tmp/undeadnorms")); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_40, new WhitespaceAnalyzer(Version.LUCENE_40))); + Document doc = new Document(); + doc.add(new StringField("id", "0", Field.Store.NO)); + w.addDocument(doc); + doc = new Document(); + doc.add(new StringField("id", "1", Field.Store.NO)); + Field content = new TextField("content", "some content", Field.Store.NO); + content.setTokenStream(new TokenStream() { + @Override + public boolean incrementToken() throws IOException { + throw new IOException("brains brains!"); + } + }); + + doc.add(content); + try { + w.addDocument(doc); + throw new RuntimeException("didn't hit exception"); + } catch (IOException ioe) { + // perfect + } + w.close(); + dir.close(); + } +} +*/ + + /** + * LUCENE-6006: Test undead norms. + * ..... + * C C / + * /< / + * ___ __________/_#__=o + * /(- /(\_\________ \ + * \ ) \ )_ \o \ + * /|\ /|\ |' | + * | _| + * /o __\ + * / ' | + * / / | + * /_/\______| + * ( _( < + * \ \ \ + * \ \ | + * \____\____\ + * ____\_\__\_\ + * /` /` o\ + * |___ |_______| + * + */ + public void testReadUndeadNorms() throws Exception { + InputStream resource = TestLucene40NormsFormat.class.getResourceAsStream("index.40.undeadnorms.zip"); + assertNotNull(resource); + Path path = createTempDir("undeadnorms"); + TestUtil.unzip(resource, path); + Directory dir = FSDirectory.open(path); + IndexReader r = DirectoryReader.open(dir); + NumericDocValues undeadNorms = MultiDocValues.getNormValues(r, "content"); + assertNotNull(undeadNorms); + assertEquals(2, r.maxDoc()); + assertEquals(0, undeadNorms.get(0)); + assertEquals(0, undeadNorms.get(1)); + dir.close(); + r.close(); + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/index.40.undeadnorms.zip b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/index.40.undeadnorms.zip new file mode 100644 index 0000000000000000000000000000000000000000..d2d6a97bf8a0305ed3d7f1062fcf271f2f6be9d2 GIT binary patch literal 2356 zcmWIWW@Zs#U|`^2c;D;ix|>rbT^7ib2aCus#2dsL>ZPRSgobc3Fn^k98^SQlHl(zI zn}Lz#1v3K!nE3tw|9|`6uPqg5ookB z5VJ9WjMqy`DZyv>=Kb@O^&SS#nlo#9R+2!>r1&Yb9!;A$YjV(=NfDDy01ZL4V0GS( z(`rD&jgT#C@uRFtGUe%+%y*OqiDAkR=#lq_BU7in0FQ8@l^7%s0B|^ZFDX`0_X?EN0fUX^{sr z4$PY~KYY@x@Ixm!jHWW8IcmX)7_4Ge|f96~SIEj_uia-|v@*nedszW6Mou)v?IpT0&0l}buhT*`M&mTzvQy_V!!+wyiR9ARD3sW`P-bm>d6j7g8S1pl9MJ~46e^hEi2Z@>T1v9AgLUOqcaYqiDM zE9W$B@40b_*Qv*Hq2l8JzWTb~+RslqHeNq>PUw)3Q=!n~*c+_6?dRO3b8o*}W_9zc zmsm`}w_iV|+2-FDj8@6n8Kc6As9h;d)Uu4gZ;;(GUFJFZ%>CThQY`)sBs zc0$V{KD_2H6xjQ@W#W_zHFop=`Q|=+P{!>P#${`{LbgNg1>5r(XNqQhU;k{1$*mry zDc52IRBPsTEb3;esCB%4VeZRk^Al@XP!p4sc1V>DFsxjGm<( zl6`vVsd@Np1edC890@HKfd(=%$uT368)_jf0W=r_wlsohM3K!3DYDUuYjo34%VcEJ z;=!gtN@(KDLM?ug%{l-x3s^D`XBKK9i)@w}FqwmjG*FRCoLQ)4BeGe$!DeBUl=zIr zRSF^-tHA`02v{KpbS=0{#Ahrbeo@OiWMijd7z-}_h%*+oltMPv3|N%|lMRwx-5_<4;GPOh&PBg)JsXt2@T<7VD?&I8v??m72FJr zEH9WD7{J8u|NsBn|9)-xXpY}Z)x?!2Re!qj@bL6FoMBlc$rylQyy3DmMRlOjnn29P z05V=LEu{pX;hXo*Q`UPJJZsLZ=~+nErw!{ejS#mIna zP3?X6E-j!Hy2#d45VYncHfv^1pA>ZvXb;do3s{cupxLxPPIhk?(3Tn`oAPoA+Vli! z(-V+a9?hISE&dDxi;vGtO`gVtX*mvAf)Pdv`*)}q>+ijxyI;e6ql-STPocwC=|@3f zF|(#ki#(WdVBVbh;ge>CA3DKdG!^21E}$D2z_I9jI_#P=kPX6u;9!pTDNRnzOEobB zhImSTGCl{|U(XSjgeecmFD}W-8yS;45F$fV4|%Er#Y$;HjX%f~Mu zC?qT*Dkd%=DJ3lw{xb9$Ui+?2>k zQ7lgk8PH?i-j?%eDA1l5tag=T5{{ZzPaeO2BP%cTC$w+PH? zsy26;CcJf8ii-6U|KFBt3nqE*YM+~$Hlwlj*GhT$u(=^&g_BnI25zg}JXQLLr^Paf zM}~iTvukc z=k4crEq(Eyzdx1hW9ye0q;62?7Fp_6znWtTgS&&k2a#vq3%tGD4wlBPTC!^QhP@qY z51r0c^09ns9-pM{cQg5bu}_!IZo5s}O8(WoZCw-k>ru_)JsFGh3ub&=p|E39TF`_? zKR7#lUOth$yyHUj=~^Mh7si(Ro-1ts-|~zfH7%DsGLZt6Dy~4x1q{04)b!lcyprO0 zLww-}O2p>^=ggWndscYRnFfY44re$H`Fv!Pv0Hj_W#vjWw*X5w4YuaR+~>>D<0?}? zRe~F68K{WnL9$RUJv9%XmEb~|jU%DuA}|FqGRZOHD&r-921CG>Mi7lC@mV1yK3Zvy zZW?Nlj%->y*fdB1Pn=n(pRtIDKrI-NjSXP}M+B@C2D%npOcG}-Y8i-Z>;Vj8!6hPb#-bKo$i^lBTLQpj zgXG$hOkhDqoUy2d5wfvQ!N%%=iYDTWLd~4WMk%l|pk!6z%tOt4$mUH%G7pjyahb@< S2Fg_2Kv)g*&Lf~c1_l6&M4rbT^7ib2aCus#2dsL>ZPRSgobc3Fz;Vv8v??m72FJr zEH9WD7{J8u|NsBn|9)-xXpY}Z)x?!2Re!qj@bL6FoMBlc$rylQyh5#$Oc~JVM?lQR z05V=LIV}~R;hXo*Q`UPqXMXgA_*v5*O`ADcDsD>Tq^Kve!h@#Fj5@=>@K~75y(R~L4)W!B%=;zi)8vJZRxjLQ+*RXPXBhHFc1jqN)T|)JL}M-o*3v+e zsVPpnsasl49ba%#+~H}z(^FZ;wrLKm=}xSr45+S7?!7cslM(1*CL~uE6Lj@6Ojl2e zm~?`H<(osM;`~cbuIvdBHr{^b${dmNd$#;Znjj(^oW|v>aJXKBq4NaW60t^6)+gfA zQ=Tmq-~CklsoJy`OB*%$R#>*}m?AoT+0xW71E~$_&!10S@@%SskT>jms>;$OsVPsNe3TGm_$b=d^N1b5qh4hqtCC}W5{df>zYt|JLY7x~%!J9qD+qT{-C9~IX*?mPHVZ{9gYg;s^g z%s10omxw)WTAQ7al9p=1Fq5;zXYve(w&q5DkL@gG?GB6EHZEjbF{6|5S%x7`dBv}t zJKPS)9C{fTR<7Up0GKocfPu{lNgl-FB2l5M?tlP0~H_AmH<-^azpGuYdvKTnn0 ze{1*0J?1Y0!%Wx7{(5)R{GrDA&Xmw&4=mPSj7*nZ=C}K;sE5F!kS^{S9iJ!M;7i_g z@ID7yQ=yjEu58sGlhyZDowHY-6^Tc?%5bT zfkkfF(mAJ28noI;>CD>ZKl}2Xm5XnR+*+$u`Ez0UuLL>9-RU(euN?hIy4eUAJb^&W1q`0z)b!lcyprO0LwsQb z%0cG>=ggWn8<=O#95`^`1Owxl05j#_J&f)CnMGM~t2!k^_;misb4g5S*u3uz6I!HX z3aCnO18oEq5ZPaV;jr^-@xELPIzim_N+23jyKM3T_5Q zmKV$n3}E8-|NsB(f4{bTG{pB$g| zHnc4ZabT5Q&{U=|%gbotG^1;dPfuTb!mV(~r!C;8!-R#Z0lx7Io6Oiv)b=-QlHBA~ z=E{iXiPtl(=@c?DFic`X@QwthcFQ@oAs4o*&P6 zpVnvdGBh+yU1q>y^rxL^!h{@$EWwCChq^+~w79B`(RG1&Nu6<0Gafa}O78HEEX<6G zN@HEjSY1>X=b079sjR%|B@f6C2@OpACugTS>d&8)4Z@-#Gp9sG+`M6F zv2#ad#m^tCEUhiBE=P~3sw`cSn)39?#NL?wADFb|l$jOwxm{Bfk>14Z>R#Yk7nsF$ zBm3yQO3js`3GN0=hbKCUc1V^?D6Kx{ah>Ueat??r=MB=gb>1o-%u8W#bFF33GdRJ_Q)- zOyIRi_@N;;bARgp;B2%Ab7wlh@eP>b6oHY(3Q2XvnfT(%w12PPVFMAj%VC;N!x!zR11%MZxqyLI zoSL4SnpaXBZ-_7CK-u+N;G9|WW&<-QD0iM=U{shT*fR4GuSJd5V5Ae%Q4$vjAk!Dk|_gn?}0JD`ck$pn{qtZbmjX9q%MVDK2Rf_MO8 CGi49} literal 0 HcmV?d00001 diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 6950bec9379..240c7c9bfab 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.lang.reflect.Modifier; -import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -121,7 +120,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { private Path getIndexDir() { String path = System.getProperty("tests.bwcdir"); - assumeTrue("backcompat creation tests must be run with -Dtests,bwcdir=/path/to/write/indexes", path != null); + assumeTrue("backcompat creation tests must be run with -Dtests.bwcdir=/path/to/write/indexes", path != null); return Paths.get(path); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java index f3607d18fde..8642bf8b918 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java @@ -71,8 +71,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer { @Override public void addNumericField(FieldInfo field, Iterable values) throws IOException { assert fieldSeen(field.name); - assert (field.getDocValuesType() == FieldInfo.DocValuesType.NUMERIC || - field.getNormType() == FieldInfo.DocValuesType.NUMERIC); + assert field.getDocValuesType() == FieldInfo.DocValuesType.NUMERIC || field.hasNorms(); writeFieldEntry(field, FieldInfo.DocValuesType.NUMERIC); // first pass to find min/max diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java index ccb826172b2..a6967eddc71 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java @@ -59,7 +59,6 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat { static final BytesRef STORETVOFF = new BytesRef(" term vector offsets "); static final BytesRef PAYLOADS = new BytesRef(" payloads "); static final BytesRef NORMS = new BytesRef(" norms "); - static final BytesRef NORMS_TYPE = new BytesRef(" norms type "); static final BytesRef DOCVALUES = new BytesRef(" doc values "); static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen "); static final BytesRef INDEXOPTIONS = new BytesRef(" index options "); @@ -115,11 +114,6 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat { assert StringHelper.startsWith(scratch.get(), NORMS); boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch)); - SimpleTextUtil.readLine(input, scratch); - assert StringHelper.startsWith(scratch.get(), NORMS_TYPE); - String nrmType = readString(NORMS_TYPE.length, scratch); - final DocValuesType normsType = docValuesType(nrmType); - SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), DOCVALUES); String dvType = readString(DOCVALUES.length, scratch); @@ -146,7 +140,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat { } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, - omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(atts)); + omitNorms, storePayloads, indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(atts)); } SimpleTextUtil.checkFooter(input); @@ -217,11 +211,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat { SimpleTextUtil.write(out, NORMS); SimpleTextUtil.write(out, Boolean.toString(!fi.omitsNorms()), scratch); SimpleTextUtil.writeNewline(out); - - SimpleTextUtil.write(out, NORMS_TYPE); - SimpleTextUtil.write(out, getDocValuesType(fi.getNormType()), scratch); - SimpleTextUtil.writeNewline(out); - + SimpleTextUtil.write(out, DOCVALUES); SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch); SimpleTextUtil.writeNewline(out); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java index 3dbe4877618..e2d26b467c8 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java @@ -102,7 +102,7 @@ import org.apache.lucene.store.IndexOutput; * @lucene.experimental */ public final class Lucene50FieldInfosFormat extends FieldInfosFormat { - + /** Sole constructor. */ public Lucene50FieldInfosFormat() { } @@ -149,12 +149,11 @@ public final class Lucene50FieldInfosFormat extends FieldInfosFormat { // DV Types are packed in one byte byte val = input.readByte(); final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F)); - final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F)); final long dvGen = input.readLong(); final Map attributes = input.readStringStringMap(); try { infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, - indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes)); + indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(attributes)); infos[i].checkConsistency(); } catch (IllegalStateException e) { throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e); @@ -215,12 +214,10 @@ public final class Lucene50FieldInfosFormat extends FieldInfosFormat { output.writeVInt(fi.number); output.writeByte(bits); - // pack the DV types in one byte + // pack the DV type and hasNorms in one byte final byte dv = docValuesByte(fi.getDocValuesType()); - final byte nrm = docValuesByte(fi.getNormType()); - assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; - byte val = (byte) (0xff & ((nrm << 4) | dv)); - output.writeByte(val); + assert (dv & (~0xF)) == 0; + output.writeByte(dv); output.writeLong(fi.getDocValuesGen()); output.writeStringStringMap(fi.attributes()); } diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 5292459595e..f7e73f76ee4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1790,12 +1790,8 @@ public class CheckIndex implements Closeable { } private static void checkNorms(FieldInfo fi, LeafReader reader, PrintStream infoStream) throws IOException { - switch(fi.getNormType()) { - case NUMERIC: - checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name), new Bits.MatchAllBits(reader.maxDoc())); - break; - default: - throw new AssertionError("wtf: " + fi.getNormType()); + if (fi.hasNorms()) { + checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name), new Bits.MatchAllBits(reader.maxDoc())); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java index 1dd666b8819..64220d2cca9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java +++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java @@ -183,14 +183,10 @@ final class DefaultIndexingChain extends DocConsumer { // we must check the final value of omitNorms for the fieldinfo: it could have // changed for this field since the first time we added it. - if (fi.omitsNorms() == false) { - if (perField.norms != null) { - perField.norms.finish(state.segmentInfo.getDocCount()); - perField.norms.flush(state, normsConsumer); - assert fi.getNormType() == DocValuesType.NUMERIC; - } else if (fi.isIndexed()) { - assert fi.getNormType() == null: "got " + fi.getNormType() + "; field=" + fi.name; - } + if (fi.omitsNorms() == false && fi.isIndexed()) { + assert perField.norms != null: "field=" + fi.name; + perField.norms.finish(state.segmentInfo.getDocCount()); + perField.norms.flush(state, normsConsumer); } } } @@ -535,6 +531,11 @@ final class DefaultIndexingChain extends DocConsumer { void setInvertState() { invertState = new FieldInvertState(fieldInfo.name); termsHashPerField = termsHash.addField(invertState, fieldInfo); + if (fieldInfo.omitsNorms() == false) { + assert norms == null; + // Even if no documents actually succeed in setting a norm, we still write norms for this segment: + norms = new NormValuesWriter(fieldInfo, docState.docWriter.bytesUsed); + } } @Override @@ -543,14 +544,8 @@ final class DefaultIndexingChain extends DocConsumer { } public void finish() throws IOException { - if (fieldInfo.omitsNorms() == false) { - if (norms == null) { - fieldInfo.setNormValueType(FieldInfo.DocValuesType.NUMERIC); - norms = new NormValuesWriter(fieldInfo, docState.docWriter.bytesUsed); - } - if (invertState.length != 0) { - norms.addValue(docState.docID, similarity.computeNorm(invertState)); - } + if (fieldInfo.omitsNorms() == false && invertState.length != 0) { + norms.addValue(docState.docID, similarity.computeNorm(invertState)); } termsHashPerField.finish(); diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java index 8209d8aa374..315cde45560 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java @@ -40,15 +40,15 @@ public final class FieldInfo { // True if any document indexed term vectors private boolean storeTermVector; - private DocValuesType normType; private boolean omitNorms; // omit norms associated with indexed fields + private IndexOptions indexOptions; private boolean storePayloads; // whether this field stores payloads together with term positions private Map attributes; private long dvGen; - + /** * Controls how much information is stored in the postings lists. * @lucene.experimental @@ -120,12 +120,12 @@ public final class FieldInfo { } /** - * Sole Constructor. + * Sole constructor. * * @lucene.experimental */ public FieldInfo(String name, boolean indexed, int number, boolean storeTermVector, boolean omitNorms, - boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normsType, + boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, long dvGen, Map attributes) { this.name = name; this.indexed = indexed; @@ -136,13 +136,11 @@ public final class FieldInfo { this.storePayloads = storePayloads; this.omitNorms = omitNorms; this.indexOptions = indexOptions; - this.normType = !omitNorms ? normsType : null; } else { // for non-indexed fields, leave defaults this.storeTermVector = false; this.storePayloads = false; this.omitNorms = false; this.indexOptions = null; - this.normType = null; } this.dvGen = dvGen; this.attributes = attributes; @@ -158,11 +156,6 @@ public final class FieldInfo { if (indexOptions == null) { throw new IllegalStateException("indexed field '" + name + "' must have index options"); } - if (omitNorms) { - if (normType != null) { - throw new IllegalStateException("indexed field '" + name + "' cannot both omit norms and have norms"); - } - } // Cannot store payloads unless positions are indexed: if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0 && storePayloads) { throw new IllegalStateException("indexed field '" + name + "' cannot have payloads without positions"); @@ -177,12 +170,8 @@ public final class FieldInfo { if (omitNorms) { throw new IllegalStateException("non-indexed field '" + name + "' cannot omit norms"); } - if (normType != null) { - throw new IllegalStateException("non-indexed field '" + name + "' cannot have norms"); - } if (indexOptions != null) { throw new IllegalStateException("non-indexed field '" + name + "' cannot have index options"); - } } @@ -206,7 +195,6 @@ public final class FieldInfo { this.storePayloads |= storePayloads; if (this.omitNorms != omitNorms) { this.omitNorms = true; // if one require omitNorms at least once, it remains off for life - this.normType = null; } if (this.indexOptions != indexOptions) { if (this.indexOptions == null) { @@ -265,13 +253,6 @@ public final class FieldInfo { return dvGen; } - /** - * Returns {@link DocValuesType} of the norm. this may be null if the field has no norms. - */ - public DocValuesType getNormType() { - return normType; - } - void setStoreTermVectors() { storeTermVector = true; assert checkConsistency(); @@ -284,14 +265,6 @@ public final class FieldInfo { assert checkConsistency(); } - void setNormValueType(DocValuesType type) { - if (normType != null && normType != type) { - throw new IllegalArgumentException("cannot change Norm type from " + normType + " to " + type + " for field \"" + name + "\""); - } - normType = type; - assert checkConsistency(); - } - /** * Returns true if norms are explicitly omitted for this field */ @@ -303,7 +276,7 @@ public final class FieldInfo { * Returns true if this field actually has any norms. */ public boolean hasNorms() { - return normType != null; + return indexed && omitNorms == false; } /** diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java index a803f5d7380..35f42191dba 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java @@ -276,10 +276,8 @@ public class FieldInfos implements Iterable { } /** NOTE: this method does not carry over termVector - * booleans nor docValuesType; the indexer chain - * (TermVectorsConsumerPerField, DocFieldProcessor) must - * set these fields when they succeed in consuming - * the document */ + * the indexer chain must set these fields when they + * succeed in consuming the document */ public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) { // TODO: really, indexer shouldn't even call this // method (it's only called from DocFieldProcessor); @@ -288,12 +286,12 @@ public class FieldInfos implements Iterable { // be updated by maybe FreqProxTermsWriterPerField: return addOrUpdateInternal(name, -1, fieldType.indexed(), false, fieldType.omitNorms(), false, - fieldType.indexOptions(), fieldType.docValueType(), null); + fieldType.indexOptions(), fieldType.docValueType()); } private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed, boolean storeTermVector, - boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normType) { + boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues) { FieldInfo fi = fieldInfo(name); if (fi == null) { // This field wasn't yet added to this in-RAM @@ -302,7 +300,7 @@ public class FieldInfos implements Iterable { // before then we'll get the same name and number, // else we'll allocate a new one: final int fieldNumber = globalFieldNumbers.addOrGet(name, preferredFieldNumber, docValues); - fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType, -1, null); + fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, -1, null); assert !byName.containsKey(fi.name); assert globalFieldNumbers.containsConsistent(Integer.valueOf(fi.number), fi.name, fi.getDocValuesType()); byName.put(fi.name, fi); @@ -319,26 +317,22 @@ public class FieldInfos implements Iterable { globalFieldNumbers.setDocValuesType(fi.number, name, docValues); } } - - if (!fi.omitsNorms() && normType != null) { - fi.setNormValueType(normType); - } } return fi; } - + public FieldInfo add(FieldInfo fi) { // IMPORTANT - reuse the field number if possible for consistent field numbers across segments return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed(), fi.hasVectors(), fi.omitsNorms(), fi.hasPayloads(), - fi.getIndexOptions(), fi.getDocValuesType(), fi.getNormType()); + fi.getIndexOptions(), fi.getDocValuesType()); } public FieldInfo fieldInfo(String fieldName) { return byName.get(fieldName); } - final FieldInfos finish() { + FieldInfos finish() { return new FieldInfos(byName.values().toArray(new FieldInfo[byName.size()])); } } diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 9a84c76b137..bd8568005fd 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -445,7 +445,7 @@ public class MemoryIndex { if (!fieldInfos.containsKey(fieldName)) { fieldInfos.put(fieldName, - new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, -1, null)); + new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, -1, null)); } TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class); diff --git a/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java b/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java index 50a60af6133..45c88a7b809 100644 --- a/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java +++ b/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java @@ -21,22 +21,22 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Map; +import org.apache.lucene.document.BinaryDocValuesField; // javadocs +import org.apache.lucene.document.DoubleField; // javadocs +import org.apache.lucene.document.FloatField; // javadocs import org.apache.lucene.document.IntField; // javadocs import org.apache.lucene.document.LongField; // javadocs -import org.apache.lucene.document.FloatField; // javadocs -import org.apache.lucene.document.DoubleField; // javadocs -import org.apache.lucene.document.BinaryDocValuesField; // javadocs import org.apache.lucene.document.NumericDocValuesField; // javadocs import org.apache.lucene.document.SortedDocValuesField; // javadocs import org.apache.lucene.document.SortedSetDocValuesField; // javadocs import org.apache.lucene.document.StringField; // javadocs -import org.apache.lucene.index.FilterLeafReader; -import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FilterDirectoryReader; +import org.apache.lucene.index.FilterLeafReader; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; @@ -213,7 +213,7 @@ public class UninvertingReader extends FilterLeafReader { } } filteredInfos.add(new FieldInfo(fi.name, fi.isIndexed(), fi.number, fi.hasVectors(), fi.omitsNorms(), - fi.hasPayloads(), fi.getIndexOptions(), type, fi.getNormType(), -1, null)); + fi.hasPayloads(), fi.getIndexOptions(), type, -1, null)); } fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()])); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java index 4b3f09356d6..a7cca084df7 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java @@ -93,7 +93,7 @@ public class AssertingNormsFormat extends NormsFormat { @Override public NumericDocValues getNorms(FieldInfo field) throws IOException { - assert field.getNormType() == FieldInfo.DocValuesType.NUMERIC; + assert field.hasNorms(); NumericDocValues values = in.getNorms(field); assert values != null; return new AssertingLeafReader.AssertingNumericDocValues(values, maxDoc); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java index 853506b1f10..5f71598d3b2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java @@ -89,11 +89,6 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes fi.setStorePayloads(); } } - if (fi.isIndexed() && !fi.omitsNorms()) { - if (random().nextBoolean()) { - fi.setNormValueType(DocValuesType.NUMERIC); - } - } addAttributes(fi); } FieldInfos infos = builder.finish(); @@ -165,7 +160,6 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes assertEquals(expected.name, actual.name); assertEquals(expected.getDocValuesType(), actual.getDocValuesType()); assertEquals(expected.getIndexOptions(), actual.getIndexOptions()); - assertEquals(expected.getNormType(), actual.getNormType()); assertEquals(expected.hasDocValues(), actual.hasDocValues()); assertEquals(expected.hasNorms(), actual.hasNorms()); assertEquals(expected.hasPayloads(), actual.hasPayloads()); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java index 2b314af9c1b..9eaeeef7f67 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java @@ -18,6 +18,8 @@ package org.apache.lucene.index; */ import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Random; import org.apache.lucene.analysis.Analyzer; @@ -128,7 +130,6 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas public void testAllZeros() throws Exception { int iterations = atLeast(1); - final Random r = random(); for (int i = 0; i < iterations; i++) { doTestNormsVersusStoredFields(new LongProducer() { @Override @@ -264,4 +265,60 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas } // TODO: test thread safety (e.g. across different fields) explicitly here + + /** + * LUCENE-6006: Tests undead norms. + * ..... + * C C / + * /< / + * ___ __________/_#__=o + * /(- /(\_\________ \ + * \ ) \ )_ \o \ + * /|\ /|\ |' | + * | _| + * /o __\ + * / ' | + * / / | + * /_/\______| + * ( _( < + * \ \ \ + * \ \ | + * \____\____\ + * ____\_\__\_\ + * /` /` o\ + * |___ |_______| + * + */ + public void testUndeadNorms() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + int numDocs = atLeast(1000); + List toDelete = new ArrayList<>(); + for(int i=0;i postings = new TreeMap<>(); @@ -702,7 +701,6 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest doPayloads, indexOptions, null, - DocValuesType.NUMERIC, -1, null); } diff --git a/solr/core/src/java/org/apache/solr/search/Insanity.java b/solr/core/src/java/org/apache/solr/search/Insanity.java index 815edbad051..9340718915f 100644 --- a/solr/core/src/java/org/apache/solr/search/Insanity.java +++ b/solr/core/src/java/org/apache/solr/search/Insanity.java @@ -65,7 +65,7 @@ public class Insanity { for (FieldInfo fi : in.getFieldInfos()) { if (fi.name.equals(insaneField)) { filteredInfos.add(new FieldInfo(fi.name, fi.isIndexed(), fi.number, fi.hasVectors(), fi.omitsNorms(), - fi.hasPayloads(), fi.getIndexOptions(), null, fi.getNormType(), -1, null)); + fi.hasPayloads(), fi.getIndexOptions(), null, -1, null)); } else { filteredInfos.add(fi); }