add sophisticated(TM) back-compat fieldinfos flux capacitor

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1437231 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-22 23:16:26 +00:00
parent ab9fdf8cce
commit 1539c093f4
6 changed files with 199 additions and 112 deletions

View File

@ -87,17 +87,16 @@ public class Lucene40Codec extends Codec {
return infosFormat;
}
// nocommit need a read-only Lucene40DVFormat / read-write in the impersonator
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Disk");
//nocommit need a read-only Lucene40DocValuesFormat / read-write in the impersonator
private final DocValuesFormat defaultDVFormat = new Lucene40DocValuesFormat();
@Override
public final DocValuesFormat docValuesFormat() {
// nocommit
return defaultDVFormat;
}
// nocommit need a read-only Lucene40NormsFormat / read-write in the impersonator
private final NormsFormat normsFormat = new Lucene42NormsFormat();
private final NormsFormat normsFormat = new Lucene40NormsFormat();
@Override
public final NormsFormat normsFormat() {

View File

@ -0,0 +1,71 @@
package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef;
// nocommit: still a lie, but allows javadocs @links to work
// nocommit: make read-only and move to impersonator
public class Lucene40DocValuesFormat extends Lucene42DocValuesFormat {
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
final DocValuesConsumer delegate = super.fieldsConsumer(state);
return new DocValuesConsumer() {
@Override
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
// hack: here we would examine the numerics and simulate in the impersonator the best we can
// e.g. if they are all in byte/int range write fixed, otherwise write packed or whatever
field.putAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.VAR_INTS.name());
delegate.addNumericField(field, values);
}
@Override
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
field.putAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_VAR_STRAIGHT.name());
delegate.addBinaryField(field, values);
}
@Override
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
field.putAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_VAR_SORTED.name());
delegate.addSortedField(field, values, docToOrd);
}
@Override
public void close() throws IOException {
delegate.close();
}
};
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return super.fieldsProducer(state);
}
}

View File

@ -91,11 +91,20 @@ public class Lucene40FieldInfosReader extends FieldInfosReader {
}
// DV Types are packed in one byte
byte val = input.readByte();
final DocValuesType docValuesType = getDocValuesTypeFake((byte) (val & 0x0F));
final DocValuesType normsType = getDocValuesTypeFake((byte) ((val >>> 4) & 0x0F));
final Map<String,String> attributes = input.readStringStringMap();
final LegacyDocValuesType oldValuesType = getDocValuesType((byte) (val & 0x0F));
final LegacyDocValuesType oldNormsType = getDocValuesType((byte) ((val >>> 4) & 0x0F));
final Map<String,String> attributes = input.readStringStringMap();;
if (oldValuesType.mapping != null) {
attributes.put(LEGACY_DV_TYPE_KEY, oldValuesType.name());
}
if (oldNormsType.mapping != null) {
if (oldNormsType.mapping != DocValuesType.NUMERIC) {
throw new CorruptIndexException("invalid norm type: " + oldNormsType);
}
attributes.put(LEGACY_NORM_TYPE_KEY, oldNormsType.name());
}
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes));
omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, Collections.unmodifiableMap(attributes));
}
if (input.getFilePointer() != input.length()) {
@ -113,53 +122,34 @@ public class Lucene40FieldInfosReader extends FieldInfosReader {
}
}
// nocommit: this is not actually how 4.0 was encoded
private static DocValuesType getDocValuesTypeFake(byte b) {
if (b == 0) {
return null;
} else if (b == 1) {
return DocValuesType.NUMERIC;
} else if (b == 2) {
return DocValuesType.BINARY;
} else if (b == 3) {
return DocValuesType.SORTED;
} else {
throw new AssertionError();
static final String LEGACY_DV_TYPE_KEY = Lucene40FieldInfosReader.class.getSimpleName() + ".dvtype";
static final String LEGACY_NORM_TYPE_KEY = Lucene40FieldInfosReader.class.getSimpleName() + ".normtype";
// mapping of 4.0 types -> 4.2 types
static enum LegacyDocValuesType {
NONE(null),
VAR_INTS(DocValuesType.NUMERIC),
FLOAT_32(DocValuesType.NUMERIC),
FLOAT_64(DocValuesType.NUMERIC),
BYTES_FIXED_STRAIGHT(DocValuesType.BINARY),
BYTES_FIXED_DEREF(DocValuesType.BINARY),
BYTES_VAR_STRAIGHT(DocValuesType.BINARY),
BYTES_VAR_DEREF(DocValuesType.BINARY),
FIXED_INTS_16(DocValuesType.NUMERIC),
FIXED_INTS_32(DocValuesType.NUMERIC),
FIXED_INTS_64(DocValuesType.NUMERIC),
FIXED_INTS_8(DocValuesType.NUMERIC),
BYTES_FIXED_SORTED(DocValuesType.SORTED),
BYTES_VAR_SORTED(DocValuesType.SORTED);
final DocValuesType mapping;
LegacyDocValuesType(DocValuesType mapping) {
this.mapping = mapping;
}
}
/*
private static DocValues.Type getDocValuesType(final byte b) {
switch(b) {
case 0:
return null;
case 1:
return DocValues.Type.VAR_INTS;
case 2:
return DocValues.Type.FLOAT_32;
case 3:
return DocValues.Type.FLOAT_64;
case 4:
return DocValues.Type.BYTES_FIXED_STRAIGHT;
case 5:
return DocValues.Type.BYTES_FIXED_DEREF;
case 6:
return DocValues.Type.BYTES_VAR_STRAIGHT;
case 7:
return DocValues.Type.BYTES_VAR_DEREF;
case 8:
return DocValues.Type.FIXED_INTS_16;
case 9:
return DocValues.Type.FIXED_INTS_32;
case 10:
return DocValues.Type.FIXED_INTS_64;
case 11:
return DocValues.Type.FIXED_INTS_8;
case 12:
return DocValues.Type.BYTES_FIXED_SORTED;
case 13:
return DocValues.Type.BYTES_VAR_SORTED;
default:
throw new IllegalStateException("unhandled indexValues type " + b);
}
}*/
// decodes a 4.0 type
private static LegacyDocValuesType getDocValuesType(byte b) {
return LegacyDocValuesType.values()[b];
}
}

View File

@ -0,0 +1,69 @@
package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef;
// nocommit: still a lie, but allows javadocs @links to work
// nocommit: make read-only and move to impersonator
public class Lucene40NormsFormat extends Lucene42NormsFormat {
@Override
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
final DocValuesConsumer delegate = super.normsConsumer(state);
return new DocValuesConsumer() {
@Override
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
// hack: here we would examine the numerics and simulate in the impersonator the best we can
// e.g. if they are all in byte/int range write fixed, otherwise write packed or whatever
field.putAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY, LegacyDocValuesType.VAR_INTS.name());
delegate.addNumericField(field, values);
}
@Override
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
assert false;
}
@Override
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
assert false;
}
@Override
public void close() throws IOException {
delegate.close();
}
};
}
@Override
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
return super.normsProducer(state);
}
}

View File

@ -32,8 +32,10 @@ import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
@ -124,10 +126,10 @@ public class Lucene41Codec extends Codec {
}
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
// nocommit
private final DocValuesFormat dvFormat = DocValuesFormat.forName("Lucene42");
// nocommit
private final NormsFormat normsFormat = new Lucene42NormsFormat();
// nocommit need a read-only Lucene40DocValuesFormat / read-write in the impersonator
private final DocValuesFormat dvFormat = new Lucene40DocValuesFormat();
// nocommit need a read-only Lucene40NormsFormat / read-write in the impersonator
private final NormsFormat normsFormat = new Lucene40NormsFormat();
@Override
public final NormsFormat normsFormat() {

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
@ -72,8 +73,8 @@ public class Lucene40FieldInfosWriter extends FieldInfosWriter {
output.writeByte(bits);
// pack the DV types in one byte
final byte dv = docValuesByteFake(fi.getDocValuesType());
final byte nrm = docValuesByteFake(fi.getNormType());
final byte dv = docValuesByte(fi.getDocValuesType(), fi.getAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY));
final byte nrm = docValuesByte(fi.getNormType(), fi.getAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY));
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
byte val = (byte) (0xff & ((nrm << 4) | dv));
output.writeByte(val);
@ -89,59 +90,14 @@ public class Lucene40FieldInfosWriter extends FieldInfosWriter {
}
}
/** this is not actually how 4.0 wrote this! */
// nocommit: make a 4.0 fieldinfos writer
public byte docValuesByteFake(DocValuesType type) {
if (type == null) {
return 0;
} else if (type == DocValuesType.NUMERIC) {
return 1;
} else if (type == DocValuesType.BINARY) {
return 2;
} else if (type == DocValuesType.SORTED) {
return 3;
} else {
throw new AssertionError();
}
}
/** Returns the byte used to encode the {@link
* Type} for each field.
public byte docValuesByte(Type type) {
/** 4.0-style docvalues byte */
public byte docValuesByte(DocValuesType type, String legacyTypeAtt) {
if (type == null) {
assert legacyTypeAtt == null;
return 0;
} else {
switch(type) {
case VAR_INTS:
return 1;
case FLOAT_32:
return 2;
case FLOAT_64:
return 3;
case BYTES_FIXED_STRAIGHT:
return 4;
case BYTES_FIXED_DEREF:
return 5;
case BYTES_VAR_STRAIGHT:
return 6;
case BYTES_VAR_DEREF:
return 7;
case FIXED_INTS_16:
return 8;
case FIXED_INTS_32:
return 9;
case FIXED_INTS_64:
return 10;
case FIXED_INTS_8:
return 11;
case BYTES_FIXED_SORTED:
return 12;
case BYTES_VAR_SORTED:
return 13;
default:
throw new IllegalStateException("unhandled indexValues type " + type);
}
assert legacyTypeAtt != null;
return (byte) LegacyDocValuesType.valueOf(legacyTypeAtt).ordinal();
}
}*/
}
}