From 1539c093f4d20c5f53317484536ac379fd6779c6 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 22 Jan 2013 23:16:26 +0000 Subject: [PATCH] add sophisticated(TM) back-compat fieldinfos flux capacitor git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1437231 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/lucene40/Lucene40Codec.java | 7 +- .../lucene40/Lucene40DocValuesFormat.java | 71 ++++++++++++++ .../lucene40/Lucene40FieldInfosReader.java | 92 +++++++++---------- .../codecs/lucene40/Lucene40NormsFormat.java | 69 ++++++++++++++ .../lucene/codecs/lucene41/Lucene41Codec.java | 10 +- .../lucene40/Lucene40FieldInfosWriter.java | 62 ++----------- 6 files changed, 199 insertions(+), 112 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java index 5b16fadd6b7..c9a44e763e8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java @@ -87,17 +87,16 @@ public class Lucene40Codec extends Codec { return infosFormat; } - // nocommit need a read-only Lucene40DVFormat / read-write in the impersonator - private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Disk"); + //nocommit need a read-only Lucene40DocValuesFormat / read-write in the impersonator + private final DocValuesFormat defaultDVFormat = new Lucene40DocValuesFormat(); @Override public final DocValuesFormat docValuesFormat() { - // nocommit return defaultDVFormat; } // nocommit need a read-only Lucene40NormsFormat / read-write in the impersonator - private final NormsFormat normsFormat = new Lucene42NormsFormat(); + private final NormsFormat normsFormat = new Lucene40NormsFormat(); @Override public final NormsFormat normsFormat() { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java new file mode 100644 index 00000000000..de07f8004ea --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java @@ -0,0 +1,71 @@ +package org.apache.lucene.codecs.lucene40; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType; +import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.util.BytesRef; + +// nocommit: still a lie, but allows javadocs @links to work +// nocommit: make read-only and move to impersonator +public class Lucene40DocValuesFormat extends Lucene42DocValuesFormat { + + @Override + public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + final DocValuesConsumer delegate = super.fieldsConsumer(state); + return new DocValuesConsumer() { + + @Override + public void addNumericField(FieldInfo field, Iterable values) throws IOException { + // hack: here we would examine the numerics and simulate in the impersonator the best we can + // e.g. if they are all in byte/int range write fixed, otherwise write packed or whatever + field.putAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.VAR_INTS.name()); + delegate.addNumericField(field, values); + } + + @Override + public void addBinaryField(FieldInfo field, Iterable values) throws IOException { + field.putAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_VAR_STRAIGHT.name()); + delegate.addBinaryField(field, values); + } + + @Override + public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) throws IOException { + field.putAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_VAR_SORTED.name()); + delegate.addSortedField(field, values, docToOrd); + } + + @Override + public void close() throws IOException { + delegate.close(); + } + }; + } + + @Override + public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { + return super.fieldsProducer(state); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java index 80507fe8661..9c12f12ac4a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java @@ -91,11 +91,20 @@ public class Lucene40FieldInfosReader extends FieldInfosReader { } // DV Types are packed in one byte byte val = input.readByte(); - final DocValuesType docValuesType = getDocValuesTypeFake((byte) (val & 0x0F)); - final DocValuesType normsType = getDocValuesTypeFake((byte) ((val >>> 4) & 0x0F)); - final Map attributes = input.readStringStringMap(); + final LegacyDocValuesType oldValuesType = getDocValuesType((byte) (val & 0x0F)); + final LegacyDocValuesType oldNormsType = getDocValuesType((byte) ((val >>> 4) & 0x0F)); + final Map attributes = input.readStringStringMap();; + if (oldValuesType.mapping != null) { + attributes.put(LEGACY_DV_TYPE_KEY, oldValuesType.name()); + } + if (oldNormsType.mapping != null) { + if (oldNormsType.mapping != DocValuesType.NUMERIC) { + throw new CorruptIndexException("invalid norm type: " + oldNormsType); + } + attributes.put(LEGACY_NORM_TYPE_KEY, oldNormsType.name()); + } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, - omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes)); + omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, Collections.unmodifiableMap(attributes)); } if (input.getFilePointer() != input.length()) { @@ -113,53 +122,34 @@ public class Lucene40FieldInfosReader extends FieldInfosReader { } } - // nocommit: this is not actually how 4.0 was encoded - private static DocValuesType getDocValuesTypeFake(byte b) { - if (b == 0) { - return null; - } else if (b == 1) { - return DocValuesType.NUMERIC; - } else if (b == 2) { - return DocValuesType.BINARY; - } else if (b == 3) { - return DocValuesType.SORTED; - } else { - throw new AssertionError(); + static final String LEGACY_DV_TYPE_KEY = Lucene40FieldInfosReader.class.getSimpleName() + ".dvtype"; + static final String LEGACY_NORM_TYPE_KEY = Lucene40FieldInfosReader.class.getSimpleName() + ".normtype"; + + // mapping of 4.0 types -> 4.2 types + static enum LegacyDocValuesType { + NONE(null), + VAR_INTS(DocValuesType.NUMERIC), + FLOAT_32(DocValuesType.NUMERIC), + FLOAT_64(DocValuesType.NUMERIC), + BYTES_FIXED_STRAIGHT(DocValuesType.BINARY), + BYTES_FIXED_DEREF(DocValuesType.BINARY), + BYTES_VAR_STRAIGHT(DocValuesType.BINARY), + BYTES_VAR_DEREF(DocValuesType.BINARY), + FIXED_INTS_16(DocValuesType.NUMERIC), + FIXED_INTS_32(DocValuesType.NUMERIC), + FIXED_INTS_64(DocValuesType.NUMERIC), + FIXED_INTS_8(DocValuesType.NUMERIC), + BYTES_FIXED_SORTED(DocValuesType.SORTED), + BYTES_VAR_SORTED(DocValuesType.SORTED); + + final DocValuesType mapping; + LegacyDocValuesType(DocValuesType mapping) { + this.mapping = mapping; } } -/* - private static DocValues.Type getDocValuesType(final byte b) { - switch(b) { - case 0: - return null; - case 1: - return DocValues.Type.VAR_INTS; - case 2: - return DocValues.Type.FLOAT_32; - case 3: - return DocValues.Type.FLOAT_64; - case 4: - return DocValues.Type.BYTES_FIXED_STRAIGHT; - case 5: - return DocValues.Type.BYTES_FIXED_DEREF; - case 6: - return DocValues.Type.BYTES_VAR_STRAIGHT; - case 7: - return DocValues.Type.BYTES_VAR_DEREF; - case 8: - return DocValues.Type.FIXED_INTS_16; - case 9: - return DocValues.Type.FIXED_INTS_32; - case 10: - return DocValues.Type.FIXED_INTS_64; - case 11: - return DocValues.Type.FIXED_INTS_8; - case 12: - return DocValues.Type.BYTES_FIXED_SORTED; - case 13: - return DocValues.Type.BYTES_VAR_SORTED; - default: - throw new IllegalStateException("unhandled indexValues type " + b); - } - }*/ + + // decodes a 4.0 type + private static LegacyDocValuesType getDocValuesType(byte b) { + return LegacyDocValuesType.values()[b]; + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java new file mode 100644 index 00000000000..2301ba3bce9 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java @@ -0,0 +1,69 @@ +package org.apache.lucene.codecs.lucene40; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType; +import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.util.BytesRef; + +// nocommit: still a lie, but allows javadocs @links to work +// nocommit: make read-only and move to impersonator +public class Lucene40NormsFormat extends Lucene42NormsFormat { + + @Override + public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException { + final DocValuesConsumer delegate = super.normsConsumer(state); + return new DocValuesConsumer() { + + @Override + public void addNumericField(FieldInfo field, Iterable values) throws IOException { + // hack: here we would examine the numerics and simulate in the impersonator the best we can + // e.g. if they are all in byte/int range write fixed, otherwise write packed or whatever + field.putAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY, LegacyDocValuesType.VAR_INTS.name()); + delegate.addNumericField(field, values); + } + + @Override + public void addBinaryField(FieldInfo field, Iterable values) throws IOException { + assert false; + } + + @Override + public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) throws IOException { + assert false; + } + + @Override + public void close() throws IOException { + delegate.close(); + } + }; + } + + @Override + public DocValuesProducer normsProducer(SegmentReadState state) throws IOException { + return super.normsProducer(state); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java index ecf056c2014..8825ea2d472 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java @@ -32,8 +32,10 @@ import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat; import org.apache.lucene.codecs.compressing.CompressionMode; +import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat; import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat; import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat; import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat; import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat; @@ -124,10 +126,10 @@ public class Lucene41Codec extends Codec { } private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); - // nocommit - private final DocValuesFormat dvFormat = DocValuesFormat.forName("Lucene42"); - // nocommit - private final NormsFormat normsFormat = new Lucene42NormsFormat(); + // nocommit need a read-only Lucene40DocValuesFormat / read-write in the impersonator + private final DocValuesFormat dvFormat = new Lucene40DocValuesFormat(); + // nocommit need a read-only Lucene40NormsFormat / read-write in the impersonator + private final NormsFormat normsFormat = new Lucene40NormsFormat(); @Override public final NormsFormat normsFormat() { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java index b65c2271cb6..b943b70a5b6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldInfosWriter; +import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType; import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; @@ -72,8 +73,8 @@ public class Lucene40FieldInfosWriter extends FieldInfosWriter { output.writeByte(bits); // pack the DV types in one byte - final byte dv = docValuesByteFake(fi.getDocValuesType()); - final byte nrm = docValuesByteFake(fi.getNormType()); + final byte dv = docValuesByte(fi.getDocValuesType(), fi.getAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY)); + final byte nrm = docValuesByte(fi.getNormType(), fi.getAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY)); assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; byte val = (byte) (0xff & ((nrm << 4) | dv)); output.writeByte(val); @@ -89,59 +90,14 @@ public class Lucene40FieldInfosWriter extends FieldInfosWriter { } } - /** this is not actually how 4.0 wrote this! */ - // nocommit: make a 4.0 fieldinfos writer - public byte docValuesByteFake(DocValuesType type) { - if (type == null) { - return 0; - } else if (type == DocValuesType.NUMERIC) { - return 1; - } else if (type == DocValuesType.BINARY) { - return 2; - } else if (type == DocValuesType.SORTED) { - return 3; - } else { - throw new AssertionError(); - } - } - - /** Returns the byte used to encode the {@link - * Type} for each field. - public byte docValuesByte(Type type) { + /** 4.0-style docvalues byte */ + public byte docValuesByte(DocValuesType type, String legacyTypeAtt) { if (type == null) { + assert legacyTypeAtt == null; return 0; } else { - switch(type) { - case VAR_INTS: - return 1; - case FLOAT_32: - return 2; - case FLOAT_64: - return 3; - case BYTES_FIXED_STRAIGHT: - return 4; - case BYTES_FIXED_DEREF: - return 5; - case BYTES_VAR_STRAIGHT: - return 6; - case BYTES_VAR_DEREF: - return 7; - case FIXED_INTS_16: - return 8; - case FIXED_INTS_32: - return 9; - case FIXED_INTS_64: - return 10; - case FIXED_INTS_8: - return 11; - case BYTES_FIXED_SORTED: - return 12; - case BYTES_VAR_SORTED: - return 13; - default: - throw new IllegalStateException("unhandled indexValues type " + type); - } + assert legacyTypeAtt != null; + return (byte) LegacyDocValuesType.valueOf(legacyTypeAtt).ordinal(); } - }*/ - + } }