LUCENE-4087: throw exceptions on illegal DocValue#Type changes in IW

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1348236 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-06-08 20:49:30 +00:00
parent 694d75773d
commit 75092c24ad
26 changed files with 792 additions and 129 deletions

View File

@ -107,6 +107,10 @@ public abstract class DocValuesArraySource extends Source {
return values; return values;
} }
public double getFloat(int docID) {
return getInt(docID);
}
@Override @Override
public long getInt(int docID) { public long getInt(int docID) {
assert docID >= 0 && docID < values.length; assert docID >= 0 && docID < values.length;
@ -168,6 +172,10 @@ public abstract class DocValuesArraySource extends Source {
return values; return values;
} }
public double getFloat(int docID) {
return getInt(docID);
}
@Override @Override
public long getInt(int docID) { public long getInt(int docID) {
assert docID >= 0 && docID < values.length; assert docID >= 0 && docID < values.length;
@ -224,6 +232,10 @@ public abstract class DocValuesArraySource extends Source {
return values; return values;
} }
public double getFloat(int docID) {
return getInt(docID);
}
@Override @Override
public long getInt(int docID) { public long getInt(int docID) {
assert docID >= 0 && docID < values.length; assert docID >= 0 && docID < values.length;

View File

@ -77,6 +77,19 @@ public abstract class DocValuesConsumer {
*/ */
public abstract void finish(int docCount) throws IOException; public abstract void finish(int docCount) throws IOException;
/**
* Returns the value size this consumer accepts or <tt>-1</tt> iff this
* consumer is value size agnostic ie. accepts variable length values.
* <p>
* NOTE: the return value is undefined until the consumer has successfully
* consumed at least one value.
*
* @return the value size this consumer accepts or <tt>-1</tt> iff this
* consumer is value size agnostic ie. accepts variable length values.
*/
public abstract int getValueSize();
/** /**
* Merges the given {@link org.apache.lucene.index.MergeState} into * Merges the given {@link org.apache.lucene.index.MergeState} into
* this {@link DocValuesConsumer}. * this {@link DocValuesConsumer}.

View File

@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values; package org.apache.lucene.codecs.lucene40.values;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -473,6 +473,10 @@ public final class Bytes {
} }
} }
public int getValueSize() {
return size;
}
// Important that we get docCount, in case there were // Important that we get docCount, in case there were
// some last docs that we didn't see // some last docs that we didn't see
@Override @Override

View File

@ -43,6 +43,11 @@ abstract class DirectSource extends Source {
toNumeric = new ShortToLong(); toNumeric = new ShortToLong();
break; break;
case FLOAT_32: case FLOAT_32:
toNumeric = new BytesToFloat();
break;
case FLOAT_64:
toNumeric = new BytesToDouble();
break;
case FIXED_INTS_32: case FIXED_INTS_32:
toNumeric = new IntToLong(); toNumeric = new IntToLong();
break; break;
@ -103,7 +108,6 @@ abstract class DirectSource extends Source {
long toLong(IndexInput input) throws IOException { long toLong(IndexInput input) throws IOException {
return input.readByte(); return input.readByte();
} }
} }
private static final class ShortToLong extends ToNumeric { private static final class ShortToLong extends ToNumeric {
@ -118,12 +122,31 @@ abstract class DirectSource extends Source {
long toLong(IndexInput input) throws IOException { long toLong(IndexInput input) throws IOException {
return input.readInt(); return input.readInt();
} }
}
private static final class BytesToFloat extends ToNumeric {
@Override
long toLong(IndexInput input) throws IOException {
throw new UnsupportedOperationException("ints are not supported");
}
double toDouble(IndexInput input) throws IOException { double toDouble(IndexInput input) throws IOException {
return Float.intBitsToFloat(input.readInt()); return Float.intBitsToFloat(input.readInt());
} }
} }
private static final class BytesToDouble extends ToNumeric {
@Override
long toLong(IndexInput input) throws IOException {
throw new UnsupportedOperationException("ints are not supported");
}
double toDouble(IndexInput input) throws IOException {
return Double.longBitsToDouble(input.readLong());
}
}
private static final class LongToLong extends ToNumeric { private static final class LongToLong extends ToNumeric {
@Override @Override
long toLong(IndexInput input) throws IOException { long toLong(IndexInput input) throws IOException {
@ -131,7 +154,7 @@ abstract class DirectSource extends Source {
} }
double toDouble(IndexInput input) throws IOException { double toDouble(IndexInput input) throws IOException {
return Double.longBitsToDouble(input.readLong()); throw new UnsupportedOperationException("doubles are not supported");
} }
} }

View File

@ -67,6 +67,7 @@ class FixedDerefBytesImpl {
idxOut.writeInt(numValues); idxOut.writeInt(numValues);
writeIndex(idxOut, docCount, numValues, docToEntry); writeIndex(idxOut, docCount, numValues, docToEntry);
} }
} }
public static class FixedDerefReader extends BytesReaderBase { public static class FixedDerefReader extends BytesReaderBase {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values; package org.apache.lucene.codecs.lucene40.values;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -129,6 +129,11 @@ class FixedStraightBytesImpl {
out.writeBytes(zeros, zeros.length); out.writeBytes(zeros, zeros.length);
} }
} }
@Override
public int getValueSize() {
return size;
}
} }
static class Writer extends FixedBytesWriterBase { static class Writer extends FixedBytesWriterBase {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values; package org.apache.lucene.codecs.lucene40.values;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -232,6 +232,11 @@ class VarStraightBytesImpl {
public long ramBytesUsed() { public long ramBytesUsed() {
return bytesUsed.get(); return bytesUsed.get();
} }
@Override
public int getValueSize() {
return -1;
}
} }
public static class VarStraightReader extends BytesReaderBase { public static class VarStraightReader extends BytesReaderBase {

View File

@ -1,5 +1,5 @@
package org.apache.lucene.codecs.simpletext; package org.apache.lucene.codecs.simpletext;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this * contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF * work for additional information regarding copyright ownership. The ASF
@ -55,7 +55,7 @@ public class SimpleTextDocValuesConsumer extends DocValuesConsumer {
protected final Type type; protected final Type type;
protected final BytesRefHash hash; protected final BytesRefHash hash;
private int[] ords; private int[] ords;
private int fixedSize = Integer.MIN_VALUE; private int valueSize = Integer.MIN_VALUE;
private BytesRef zeroBytes; private BytesRef zeroBytes;
private final String segmentSuffix; private final String segmentSuffix;
@ -137,12 +137,12 @@ public class SimpleTextDocValuesConsumer extends DocValuesConsumer {
throw new RuntimeException("should not reach this line"); throw new RuntimeException("should not reach this line");
} }
if (fixedSize == Integer.MIN_VALUE) { if (valueSize == Integer.MIN_VALUE) {
assert maxDocId == -1; assert maxDocId == -1;
fixedSize = vSize; valueSize = vSize;
} else { } else {
if (fixedSize != vSize) { if (valueSize != vSize) {
throw new IllegalArgumentException("value size must be " + fixedSize + " but was: " + vSize); throw new IllegalArgumentException("value size must be " + valueSize + " but was: " + vSize);
} }
} }
maxDocId = Math.max(docID, maxDocId); maxDocId = Math.max(docID, maxDocId);
@ -178,7 +178,7 @@ public class SimpleTextDocValuesConsumer extends DocValuesConsumer {
SimpleTextUtil.write(output, getHeader()); SimpleTextUtil.write(output, getHeader());
SimpleTextUtil.writeNewline(output); SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, VALUE_SIZE); SimpleTextUtil.write(output, VALUE_SIZE);
SimpleTextUtil.write(output, Integer.toString(this.fixedSize), scratch); SimpleTextUtil.write(output, Integer.toString(this.valueSize), scratch);
SimpleTextUtil.writeNewline(output); SimpleTextUtil.writeNewline(output);
prepareFlush(docCount); prepareFlush(docCount);
for (int i = 0; i < docCount; i++) { for (int i = 0; i < docCount; i++) {
@ -253,8 +253,8 @@ public class SimpleTextDocValuesConsumer extends DocValuesConsumer {
case BYTES_FIXED_SORTED: case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT: case BYTES_FIXED_STRAIGHT:
if(zeroBytes == null) { if(zeroBytes == null) {
assert fixedSize > 0; assert valueSize > 0;
zeroBytes = new BytesRef(new byte[fixedSize]); zeroBytes = new BytesRef(new byte[valueSize]);
} }
SimpleTextUtil.write(output, zeroBytes); SimpleTextUtil.write(output, zeroBytes);
break; break;
@ -286,4 +286,9 @@ public class SimpleTextDocValuesConsumer extends DocValuesConsumer {
protected Type getType() { protected Type getType() {
return type; return type;
} }
@Override
public int getValueSize() {
return valueSize;
}
} }

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -32,6 +32,8 @@ import org.apache.lucene.index.DocValues;
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
*
* @see DocValues for further information
* */ * */
public class ByteDocValuesField extends Field { public class ByteDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -37,6 +37,8 @@ import org.apache.lucene.util.BytesRef;
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
*
* @see DocValues for further information
* */ * */
public class DerefBytesDocValuesField extends Field { public class DerefBytesDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -32,6 +32,8 @@ import org.apache.lucene.index.DocValues;
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
*
* @see DocValues for further information
* */ * */
public class DoubleDocValuesField extends Field { public class DoubleDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -32,6 +32,7 @@ import org.apache.lucene.index.DocValues;
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
* @see DocValues for further information
* */ * */
public class FloatDocValuesField extends Field { public class FloatDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -32,6 +32,7 @@ import org.apache.lucene.index.DocValues;
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
* @see DocValues for further information
* */ * */
public class IntDocValuesField extends Field { public class IntDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -32,6 +32,7 @@ import org.apache.lucene.index.DocValues;
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
* @see DocValues for further information
* */ * */
public class LongDocValuesField extends Field { public class LongDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -35,6 +35,8 @@ import org.apache.lucene.index.AtomicReader; // javadocs
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
*
* @see DocValues for further information
* */ * */
public class PackedLongDocValuesField extends Field { public class PackedLongDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -32,6 +32,8 @@ import org.apache.lucene.index.DocValues;
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
*
* @see DocValues for further information
* */ * */
public class ShortDocValuesField extends Field { public class ShortDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -33,6 +33,8 @@ import org.apache.lucene.util.BytesRef;
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
*
* @see DocValues for further information
* */ * */
public class SortedBytesDocValuesField extends Field { public class SortedBytesDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.document; package org.apache.lucene.document;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -36,6 +36,8 @@ import org.apache.lucene.util.BytesRef;
* <p> * <p>
* If you also need to store the value, you should add a * If you also need to store the value, you should add a
* separate {@link StoredField} instance. * separate {@link StoredField} instance.
*
* @see DocValues for further information
* */ * */
public class StraightBytesDocValuesField extends Field { public class StraightBytesDocValuesField extends Field {

View File

@ -1,6 +1,6 @@
package org.apache.lucene.index; package org.apache.lucene.index;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -29,6 +29,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.PerDocConsumer; import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.index.DocumentsWriterPerThread.DocState; import org.apache.lucene.index.DocumentsWriterPerThread.DocState;
import org.apache.lucene.index.TypePromoter.TypeCompatibility;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
@ -80,7 +81,7 @@ final class DocFieldProcessor extends DocConsumer {
fieldsWriter.flush(state); fieldsWriter.flush(state);
consumer.flush(childFields, state); consumer.flush(childFields, state);
for (DocValuesConsumerAndDocID consumer : docValues.values()) { for (DocValuesConsumerHolder consumer : docValues.values()) {
consumer.docValuesConsumer.finish(state.segmentInfo.getDocCount()); consumer.docValuesConsumer.finish(state.segmentInfo.getDocCount());
} }
@ -271,7 +272,26 @@ final class DocFieldProcessor extends DocConsumer {
} }
final DocValues.Type dvType = field.fieldType().docValueType(); final DocValues.Type dvType = field.fieldType().docValueType();
if (dvType != null) { if (dvType != null) {
docValuesConsumer(dvType, docState, fp.fieldInfo).add(docState.docID, field); DocValuesConsumerHolder docValuesConsumer = docValuesConsumer(dvType,
docState, fp.fieldInfo);
DocValuesConsumer consumer = docValuesConsumer.docValuesConsumer;
if (docValuesConsumer.compatibility == null) {
consumer.add(docState.docID, field);
docValuesConsumer.compatibility = new TypeCompatibility(dvType,
consumer.getValueSize());
} else if (docValuesConsumer.compatibility.isCompatible(dvType,
TypePromoter.getValueSize(dvType, field.binaryValue()))) {
consumer.add(docState.docID, field);
} else {
docValuesConsumer.compatibility.isCompatible(dvType,
TypePromoter.getValueSize(dvType, field.binaryValue()));
TypeCompatibility compatibility = docValuesConsumer.compatibility;
throw new IllegalArgumentException("Incompatible DocValues type: "
+ dvType.name() + " size: "
+ TypePromoter.getValueSize(dvType, field.binaryValue())
+ " expected: " + " type: " + compatibility.getBaseType()
+ " size: " + compatibility.getBaseSize());
}
} }
} }
@ -308,30 +328,31 @@ final class DocFieldProcessor extends DocConsumer {
} }
} }
private static class DocValuesConsumerAndDocID { private static class DocValuesConsumerHolder {
// Only used to enforce that same DV field name is never // Only used to enforce that same DV field name is never
// added more than once per doc: // added more than once per doc:
public int docID; int docID;
final DocValuesConsumer docValuesConsumer; final DocValuesConsumer docValuesConsumer;
TypeCompatibility compatibility;
public DocValuesConsumerAndDocID(DocValuesConsumer docValuesConsumer) { public DocValuesConsumerHolder(DocValuesConsumer docValuesConsumer) {
this.docValuesConsumer = docValuesConsumer; this.docValuesConsumer = docValuesConsumer;
} }
} }
final private Map<String, DocValuesConsumerAndDocID> docValues = new HashMap<String, DocValuesConsumerAndDocID>(); final private Map<String, DocValuesConsumerHolder> docValues = new HashMap<String, DocValuesConsumerHolder>();
private PerDocConsumer perDocConsumer; private PerDocConsumer perDocConsumer;
DocValuesConsumer docValuesConsumer(DocValues.Type valueType, DocState docState, FieldInfo fieldInfo) DocValuesConsumerHolder docValuesConsumer(DocValues.Type valueType, DocState docState, FieldInfo fieldInfo)
throws IOException { throws IOException {
DocValuesConsumerAndDocID docValuesConsumerAndDocID = docValues.get(fieldInfo.name); DocValuesConsumerHolder docValuesConsumerAndDocID = docValues.get(fieldInfo.name);
if (docValuesConsumerAndDocID != null) { if (docValuesConsumerAndDocID != null) {
if (docState.docID == docValuesConsumerAndDocID.docID) { if (docState.docID == docValuesConsumerAndDocID.docID) {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears more than once in this document (only one value is allowed, per field)"); throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears more than once in this document (only one value is allowed, per field)");
} }
assert docValuesConsumerAndDocID.docID < docState.docID; assert docValuesConsumerAndDocID.docID < docState.docID;
docValuesConsumerAndDocID.docID = docState.docID; docValuesConsumerAndDocID.docID = docState.docID;
return docValuesConsumerAndDocID.docValuesConsumer; return docValuesConsumerAndDocID;
} }
if (perDocConsumer == null) { if (perDocConsumer == null) {
@ -345,9 +366,10 @@ final class DocFieldProcessor extends DocConsumer {
assert fieldInfo.getDocValuesType() == null || fieldInfo.getDocValuesType() == valueType; assert fieldInfo.getDocValuesType() == null || fieldInfo.getDocValuesType() == valueType;
fieldInfo.setDocValuesType(valueType); fieldInfo.setDocValuesType(valueType);
docValuesConsumerAndDocID = new DocValuesConsumerAndDocID(docValuesConsumer); docValuesConsumerAndDocID = new DocValuesConsumerHolder(docValuesConsumer);
docValuesConsumerAndDocID.docID = docState.docID; docValuesConsumerAndDocID.docID = docState.docID;
docValues.put(fieldInfo.name, docValuesConsumerAndDocID); docValues.put(fieldInfo.name, docValuesConsumerAndDocID);
return docValuesConsumer; return docValuesConsumerAndDocID;
} }
} }

View File

@ -1,6 +1,6 @@
package org.apache.lucene.index; package org.apache.lucene.index;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -48,6 +48,17 @@ import org.apache.lucene.util.packed.PackedInts;
* IndexReader. * IndexReader.
* <p> * <p>
* {@link DocValues} are fully integrated into the {@link DocValuesFormat} API. * {@link DocValues} are fully integrated into the {@link DocValuesFormat} API.
* <p>
* NOTE: DocValues is a strongly typed per-field API. Type changes within an
* indexing session can result in exceptions if the type has changed in a way that
* the previously give type for a field can't promote the value without losing
* information. For instance a field initially indexed with {@link Type#FIXED_INTS_32}
* can promote a value with {@link Type#FIXED_INTS_8} but can't promote
* {@link Type#FIXED_INTS_64}. During segment merging type-promotion exceptions are suppressed.
* Fields will be promoted to their common denominator or automatically transformed
* into a 3rd type like {@link Type#BYTES_VAR_STRAIGHT} to prevent data loss and merge exceptions.
* This behavior is considered <i>best-effort</i> might change in future releases.
* </p>
* *
* @see Type for limitations and default implementation documentation * @see Type for limitations and default implementation documentation
* @see ByteDocValuesField for adding byte values to the index * @see ByteDocValuesField for adding byte values to the index

View File

@ -185,13 +185,7 @@ final class SegmentMerger {
if (previous == null) { if (previous == null) {
previous = TypePromoter.getIdentityPromoter(); previous = TypePromoter.getIdentityPromoter();
} }
TypePromoter promoted = previous.promote(incoming); return previous.promote(incoming);
if (promoted == null) {
// type is incompatible: promote to BYTES_VAR_STRAIGHT
return TypePromoter.create(DocValues.Type.BYTES_VAR_STRAIGHT, TypePromoter.VAR_TYPE_VALUE_SIZE);
} else {
return promoted;
}
} }
// NOTE: this is actually merging all the fieldinfos // NOTE: this is actually merging all the fieldinfos

View File

@ -1,6 +1,6 @@
package org.apache.lucene.index; package org.apache.lucene.index;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -20,14 +20,15 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.util.BytesRef;
// TODO: maybe we should not automagically promote // TODO: maybe we should not automagically promote
// types... and instead require a given field always has the // types... and instead require a given field always has the
// same type? // same type?
/** /**
* Type promoter that promotes {@link DocValues} during merge based on * Type promoter that promotes {@link DocValues} during merge based on their
* their {@link Type} and {@link #getValueSize()} * {@link Type} and {@link #getValueSize()}
* *
* @lucene.internal * @lucene.internal
*/ */
@ -37,7 +38,7 @@ class TypePromoter {
private static final TypePromoter IDENTITY_PROMOTER = new IdentityTypePromoter(); private static final TypePromoter IDENTITY_PROMOTER = new IdentityTypePromoter();
public static final int VAR_TYPE_VALUE_SIZE = -1; public static final int VAR_TYPE_VALUE_SIZE = -1;
private static final int IS_INT = 1 << 0; private static final int IS_INT = 1 << 0 | 1 << 2;
private static final int IS_BYTE = 1 << 1; private static final int IS_BYTE = 1 << 1;
private static final int IS_FLOAT = 1 << 2 ; private static final int IS_FLOAT = 1 << 2 ;
/* VAR & FIXED == VAR */ /* VAR & FIXED == VAR */
@ -50,14 +51,16 @@ class TypePromoter {
private static final int IS_DEREF = 1 << 5 | 1 << 6; private static final int IS_DEREF = 1 << 5 | 1 << 6;
private static final int IS_SORTED = 1 << 7; private static final int IS_SORTED = 1 << 7;
/* more bits wins (int16 & int32 == int32) */ /* more bits wins (int16 & int32 == int32) */
private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11; private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11 | 1 << 12 | 1 << 13; // 8
private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11; private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11 | 1 << 12 | 1 << 13; // 9
private static final int IS_32_BIT = 1 << 10 | 1 << 11; private static final int IS_32_BIT = 1 << 10 | 1 << 11 | 1 << 13;
private static final int IS_64_BIT = 1 << 11; private static final int IS_64_BIT = 1 << 11;
private static final int IS_32_BIT_FLOAT = 1 << 12 | 1 << 13;
private static final int IS_64_BIT_FLOAT = 1 << 13;
private final Type type; private Type type;
private final int flags; private int flags;
private final int valueSize; private int valueSize;
/** /**
* Returns a positive value size if this {@link TypePromoter} represents a * Returns a positive value size if this {@link TypePromoter} represents a
@ -77,11 +80,18 @@ class TypePromoter {
} }
} }
/**
* Creates a new {@link TypePromoter}
*
*/
protected TypePromoter() {}
/** /**
* Creates a new {@link TypePromoter} * Creates a new {@link TypePromoter}
* *
* @param type * @param type
* the {@link Type} this promoter represents * the {@link Type} this promoter represents
*
* @param flags * @param flags
* the promoters flags * the promoters flags
* @param valueSize * @param valueSize
@ -93,6 +103,24 @@ class TypePromoter {
this.valueSize = valueSize; this.valueSize = valueSize;
} }
/**
* Resets the {@link TypePromoter}
*
* @param type
* the {@link Type} this promoter represents
*
* @param flags
* the promoters flags
* @param valueSize
* the value size if {@link #IS_FIXED} or <code>-1</code> otherwise.
*/
protected TypePromoter set(Type type, int flags, int valueSize) {
this.type = type;
this.flags = flags;
this.valueSize = valueSize;
return this;
}
/** /**
* Creates a new promoted {@link TypePromoter} based on this and the given * Creates a new promoted {@link TypePromoter} based on this and the given
* {@link TypePromoter} or <code>null</code> iff the {@link TypePromoter} * {@link TypePromoter} or <code>null</code> iff the {@link TypePromoter}
@ -105,18 +133,24 @@ class TypePromoter {
* {@link TypePromoter} aren't compatible. * {@link TypePromoter} aren't compatible.
*/ */
public TypePromoter promote(TypePromoter promoter) { public TypePromoter promote(TypePromoter promoter) {
return promote(promoter, newPromoter());
int promotedFlags = promoter.flags & this.flags;
TypePromoter promoted = create(FLAGS_MAP.get(promotedFlags), valueSize);
if (promoted == null) {
return promoted;
} }
if ((promoted.flags & IS_BYTE) != 0 && (promoted.flags & IS_FIXED) == IS_FIXED) {
private TypePromoter promote(TypePromoter promoter, TypePromoter spare) {
int promotedFlags = promoter.flags & this.flags;
TypePromoter promoted = reset(FLAGS_MAP.get(promotedFlags), valueSize,
spare);
if (promoted == null) {
return TypePromoter.create(DocValues.Type.BYTES_VAR_STRAIGHT,
TypePromoter.VAR_TYPE_VALUE_SIZE);
}
if ((promoted.flags & IS_BYTE) != 0
&& (promoted.flags & IS_FIXED) == IS_FIXED) {
if (this.valueSize == promoter.valueSize) { if (this.valueSize == promoter.valueSize) {
return promoted; return promoted;
} }
return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK), return reset(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK),
VAR_TYPE_VALUE_SIZE); VAR_TYPE_VALUE_SIZE, spare);
} }
return promoted; return promoted;
@ -131,11 +165,50 @@ class TypePromoter {
return type; return type;
} }
private boolean isTypeCompatible(TypePromoter promoter) {
int promotedFlags = promoter.flags & this.flags;
return (promotedFlags & 0x7) > 0;
}
private boolean isBytesCompatible(TypePromoter promoter) {
int promotedFlags = promoter.flags & this.flags;
return (promotedFlags & IS_BYTE) > 0
&& (promotedFlags & (IS_FIXED | IS_VAR)) > 0;
}
private boolean isNumericSizeCompatible(TypePromoter promoter) {
int promotedFlags = promoter.flags & this.flags;
return (promotedFlags & IS_BYTE) == 0
&& (((promotedFlags & IS_FIXED) > 0 && (promotedFlags & (IS_8_BIT)) > 0) || (promotedFlags & IS_VAR) > 0);
}
@Override @Override
public String toString() { public String toString() {
return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]"; return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]";
} }
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + flags;
result = prime * result + ((type == null) ? 0 : type.hashCode());
result = prime * result + valueSize;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
TypePromoter other = (TypePromoter) obj;
if (flags != other.flags) return false;
if (type != other.type) return false;
if (valueSize != other.valueSize) return false;
return true;
}
/** /**
* Creates a new {@link TypePromoter} for the given type and size per value. * Creates a new {@link TypePromoter} for the given type and size per value.
* *
@ -147,47 +220,84 @@ class TypePromoter {
* @return a new {@link TypePromoter} * @return a new {@link TypePromoter}
*/ */
public static TypePromoter create(Type type, int valueSize) { public static TypePromoter create(Type type, int valueSize) {
return reset(type, valueSize, new TypePromoter());
}
private static TypePromoter reset(Type type, int valueSize,
TypePromoter promoter) {
if (type == null) { if (type == null) {
return null; return null;
} }
switch (type) { switch (type) {
case BYTES_FIXED_DEREF: case BYTES_FIXED_DEREF:
return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_DEREF, valueSize); return promoter.set(type, IS_BYTE | IS_FIXED | IS_DEREF, valueSize);
case BYTES_FIXED_SORTED: case BYTES_FIXED_SORTED:
return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_SORTED, valueSize); return promoter.set(type, IS_BYTE | IS_FIXED | IS_SORTED, valueSize);
case BYTES_FIXED_STRAIGHT: case BYTES_FIXED_STRAIGHT:
return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, valueSize); return promoter.set(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, valueSize);
case BYTES_VAR_DEREF: case BYTES_VAR_DEREF:
return new TypePromoter(type, IS_BYTE | IS_VAR | IS_DEREF, VAR_TYPE_VALUE_SIZE); return promoter.set(type, IS_BYTE | IS_VAR | IS_DEREF,
VAR_TYPE_VALUE_SIZE);
case BYTES_VAR_SORTED: case BYTES_VAR_SORTED:
return new TypePromoter(type, IS_BYTE | IS_VAR | IS_SORTED, VAR_TYPE_VALUE_SIZE); return promoter.set(type, IS_BYTE | IS_VAR | IS_SORTED,
VAR_TYPE_VALUE_SIZE);
case BYTES_VAR_STRAIGHT: case BYTES_VAR_STRAIGHT:
return new TypePromoter(type, IS_BYTE | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE); return promoter.set(type, IS_BYTE | IS_VAR | IS_STRAIGHT,
VAR_TYPE_VALUE_SIZE);
case FIXED_INTS_16: case FIXED_INTS_16:
return new TypePromoter(type, return promoter.set(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT,
IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT, valueSize); valueSize);
case FIXED_INTS_32: case FIXED_INTS_32:
return new TypePromoter(type, return promoter.set(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT,
IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT, valueSize); valueSize);
case FIXED_INTS_64: case FIXED_INTS_64:
return new TypePromoter(type, return promoter.set(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT,
IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT, valueSize); valueSize);
case FIXED_INTS_8: case FIXED_INTS_8:
return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_8_BIT, return promoter.set(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_8_BIT,
valueSize); valueSize);
case FLOAT_32: case FLOAT_32:
return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT return promoter.set(type,
| IS_32_BIT, valueSize); IS_FLOAT | IS_FIXED | IS_STRAIGHT | IS_32_BIT_FLOAT, valueSize);
case FLOAT_64: case FLOAT_64:
return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT return promoter.set(type,
| IS_64_BIT, valueSize); IS_FLOAT | IS_FIXED | IS_STRAIGHT | IS_64_BIT_FLOAT, valueSize);
case VAR_INTS: case VAR_INTS:
return new TypePromoter(type, IS_INT | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE); return promoter.set(type, IS_INT | IS_VAR | IS_STRAIGHT,
VAR_TYPE_VALUE_SIZE);
default: default:
throw new IllegalStateException(); throw new IllegalStateException();
} }
} }
public static int getValueSize(DocValues.Type type, BytesRef ref) {
switch (type) {
case VAR_INTS:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
return -1;
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
assert ref != null;
return ref.length;
case FIXED_INTS_16:
return 2;
case FLOAT_32:
case FIXED_INTS_32:
return 4;
case FLOAT_64:
case FIXED_INTS_64:
return 8;
case FIXED_INTS_8:
return 1;
default:
throw new IllegalArgumentException("unknonw docvalues type: "
+ type.name());
}
}
/** /**
* Returns a {@link TypePromoter} that always promotes to the type provided to * Returns a {@link TypePromoter} that always promotes to the type provided to
* {@link #promote(TypePromoter)} * {@link #promote(TypePromoter)}
@ -196,15 +306,57 @@ class TypePromoter {
return IDENTITY_PROMOTER; return IDENTITY_PROMOTER;
} }
private static TypePromoter newPromoter() {
return new TypePromoter(null, 0, -1);
}
private static class IdentityTypePromoter extends TypePromoter { private static class IdentityTypePromoter extends TypePromoter {
public IdentityTypePromoter() { public IdentityTypePromoter() {
super(null, 0, -1); super(null, 0, -1);
} }
@Override
protected TypePromoter set(Type type, int flags, int valueSize) {
throw new UnsupportedOperationException("can not reset IdendityPromotoer");
}
@Override @Override
public TypePromoter promote(TypePromoter promoter) { public TypePromoter promote(TypePromoter promoter) {
return promoter; return promoter;
} }
} }
static class TypeCompatibility {
private final TypePromoter base;
private final TypePromoter spare;
TypeCompatibility(Type type, int valueSize) {
this.base = create(type, valueSize);
spare = newPromoter();
}
boolean isCompatible(Type type, int valueSize) {
TypePromoter reset = reset(type, valueSize, spare);
if (base.isTypeCompatible(reset)) {
if (base.isBytesCompatible(reset)) {
return base.valueSize == -1 || base.valueSize == valueSize;
} else if (base.flags == reset.flags) {
return true;
} else if (base.isNumericSizeCompatible(reset)) {
return base.valueSize == -1
|| (base.valueSize > valueSize && valueSize > 0);
}
}
return false;
}
Type getBaseType() {
return base.type();
}
int getBaseSize() {
return base.valueSize;
}
}
} }

View File

@ -47,8 +47,6 @@ import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.junit.Before;
/** /**
* *

View File

@ -0,0 +1,317 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.ByteDocValuesField;
import org.apache.lucene.document.DerefBytesDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.IntDocValuesField;
import org.apache.lucene.document.LongDocValuesField;
import org.apache.lucene.document.PackedLongDocValuesField;
import org.apache.lucene.document.ShortDocValuesField;
import org.apache.lucene.document.SortedBytesDocValuesField;
import org.apache.lucene.document.StraightBytesDocValuesField;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
/**
* Tests compatibility of {@link DocValues.Type} during indexing
*/
public class TestDocValuesTypeCompatibility extends LuceneTestCase {
public void testAddCompatibleIntTypes() throws CorruptIndexException,
LockObtainFailedException, IOException {
int numIter = atLeast(10);
for (int i = 0; i < numIter; i++) {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random()));
int numDocs = atLeast(100);
iwc.setMaxBufferedDocs(2 * numDocs); // make sure we hit the same DWPT
// here
iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwc.setRAMPerThreadHardLimitMB(2000);
IndexWriter writer = new IndexWriter(dir, iwc);
Type[] types = new Type[] {Type.VAR_INTS, Type.FIXED_INTS_16,
Type.FIXED_INTS_64, Type.FIXED_INTS_16, Type.FIXED_INTS_8};
Type maxType = types[random().nextInt(types.length)];
for (int j = 0; j < numDocs; j++) {
addDoc(writer, getRandomIntsField(maxType, j == 0));
}
writer.close();
dir.close();
}
}
@SuppressWarnings("fallthrough")
public IndexableField getRandomIntsField(Type maxType, boolean force) {
switch (maxType) {
case VAR_INTS:
if (random().nextInt(5) == 0 || force) {
return new PackedLongDocValuesField("f", 1);
}
case FIXED_INTS_64:
if (random().nextInt(4) == 0 || force) {
return new LongDocValuesField("f", 1);
}
case FIXED_INTS_32:
if (random().nextInt(3) == 0 || force) {
return new IntDocValuesField("f", 1);
}
case FIXED_INTS_16:
if (random().nextInt(2) == 0 || force) {
return new ShortDocValuesField("f", (short) 1);
}
case FIXED_INTS_8:
return new ByteDocValuesField("f", (byte) 1);
default:
throw new IllegalArgumentException();
}
}
public void testAddCompatibleDoubleTypes() throws CorruptIndexException,
LockObtainFailedException, IOException {
int numIter = atLeast(10);
for (int i = 0; i < numIter; i++) {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random()));
int numDocs = atLeast(100);
iwc.setMaxBufferedDocs(2 * numDocs); // make sure we hit the same DWPT
// here
iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwc.setRAMPerThreadHardLimitMB(2000);
IndexWriter writer = new IndexWriter(dir, iwc);
Type[] types = new Type[] {Type.FLOAT_64, Type.FLOAT_32};
Type maxType = types[random().nextInt(types.length)];
for (int j = 0; j < numDocs; j++) {
addDoc(writer, getRandomFloatField(maxType, j == 0));
}
writer.close();
dir.close();
}
}
@SuppressWarnings("fallthrough")
public IndexableField getRandomFloatField(Type maxType, boolean force) {
switch (maxType) {
case FLOAT_64:
if (random().nextInt(5) == 0 || force) {
return new PackedLongDocValuesField("f", 1);
}
case FIXED_INTS_32:
if (random().nextInt(4) == 0 || force) {
return new LongDocValuesField("f", 1);
}
case FLOAT_32:
if (random().nextInt(3) == 0 || force) {
return new IntDocValuesField("f", 1);
}
case FIXED_INTS_16:
if (random().nextInt(2) == 0 || force) {
return new ShortDocValuesField("f", (short) 1);
}
case FIXED_INTS_8:
return new ByteDocValuesField("f", (byte) 1);
default:
throw new IllegalArgumentException();
}
}
public void testAddCompatibleDoubleTypes2() throws CorruptIndexException,
LockObtainFailedException, IOException {
int numIter = atLeast(10);
for (int i = 0; i < numIter; i++) {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random()));
int numDocs = atLeast(100);
iwc.setMaxBufferedDocs(2 * numDocs); // make sure we hit the same DWPT
// here
iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwc.setRAMPerThreadHardLimitMB(2000);
IndexWriter writer = new IndexWriter(dir, iwc);
IndexableField[] fields = new IndexableField[] {
new DoubleDocValuesField("f", 1.0), new IntDocValuesField("f", 1),
new ShortDocValuesField("f", (short) 1),
new ByteDocValuesField("f", (byte) 1)};
int base = random().nextInt(fields.length - 1);
addDoc(writer, fields[base]);
for (int j = 0; j < numDocs; j++) {
int f = base + random().nextInt(fields.length - base);
addDoc(writer, fields[f]);
}
writer.close();
dir.close();
}
}
public void testAddCompatibleByteTypes() throws CorruptIndexException,
LockObtainFailedException, IOException {
int numIter = atLeast(10);
for (int i = 0; i < numIter; i++) {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random()));
int numDocs = atLeast(100);
iwc.setMaxBufferedDocs(2 * numDocs); // make sure we hit the same DWPT
// here
iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwc.setRAMPerThreadHardLimitMB(2000);
IndexWriter writer = new IndexWriter(dir, iwc);
boolean mustBeFixed = random().nextBoolean();
int maxSize = 2 + random().nextInt(15);
IndexableField bytesField = getRandomBytesField(mustBeFixed, maxSize,
true);
addDoc(writer, bytesField);
for (int j = 0; j < numDocs; j++) {
bytesField = getRandomBytesField(mustBeFixed, maxSize, false);
addDoc(writer, bytesField);
}
writer.close();
dir.close();
}
}
public IndexableField getRandomBytesField(boolean mustBeFixed, int maxSize,
boolean mustBeVariableIfNotFixed) {
int size = mustBeFixed ? maxSize : random().nextInt(maxSize) + 1;
StringBuilder s = new StringBuilder();
for (int i = 0; i < size; i++) {
s.append("a");
}
BytesRef bytesRef = new BytesRef(s);
boolean fixed = mustBeFixed ? true : mustBeVariableIfNotFixed ? false
: random().nextBoolean();
switch (random().nextInt(3)) {
case 0:
return new SortedBytesDocValuesField("f", bytesRef, fixed);
case 1:
return new DerefBytesDocValuesField("f", bytesRef, fixed);
default:
return new StraightBytesDocValuesField("f", bytesRef, fixed);
}
}
public void testIncompatibleTypesBytes() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random()));
int numDocs = atLeast(100);
iwc.setMaxBufferedDocs(numDocs); // make sure we hit the same DWPT
iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwc.setRAMPerThreadHardLimitMB(2000);
IndexWriter writer = new IndexWriter(dir, iwc);
int numDocsIndexed = 0;
for (int j = 1; j < numDocs; j++) {
try {
addDoc(writer, getRandomIndexableDVField());
numDocsIndexed++;
} catch (IllegalArgumentException e) {
assertTrue(e.getMessage().startsWith("Incompatible DocValues type:"));
}
}
writer.commit();
DirectoryReader open = DirectoryReader.open(dir);
assertEquals(numDocsIndexed, open.numDocs());
open.close();
writer.close();
dir.close();
}
private void addDoc(IndexWriter writer, IndexableField... fields)
throws CorruptIndexException, IOException {
Document doc = new Document();
for (IndexableField indexableField : fields) {
doc.add(indexableField);
}
writer.addDocument(doc);
}
public IndexableField getRandomIndexableDVField() {
int size = random().nextInt(100) + 1;
StringBuilder s = new StringBuilder();
for (int i = 0; i < size; i++) {
s.append("a");
}
BytesRef bytesRef = new BytesRef(s);
Type[] values = Type.values();
Type t = values[random().nextInt(values.length)];
switch (t) {
case BYTES_FIXED_DEREF:
return new DerefBytesDocValuesField("f", bytesRef, true);
case BYTES_FIXED_SORTED:
return new SortedBytesDocValuesField("f", bytesRef, true);
case BYTES_FIXED_STRAIGHT:
return new StraightBytesDocValuesField("f", bytesRef, true);
case BYTES_VAR_DEREF:
return new DerefBytesDocValuesField("f", bytesRef, false);
case BYTES_VAR_SORTED:
return new SortedBytesDocValuesField("f", bytesRef, false);
case BYTES_VAR_STRAIGHT:
return new StraightBytesDocValuesField("f", bytesRef, false);
case FIXED_INTS_16:
return new ShortDocValuesField("f", (short) 1);
case FIXED_INTS_32:
return new IntDocValuesField("f", 1);
case FIXED_INTS_64:
return new LongDocValuesField("f", 1);
case FIXED_INTS_8:
return new ByteDocValuesField("f", (byte) 1);
case FLOAT_32:
return new FloatDocValuesField("f", 1.0f);
case FLOAT_64:
return new DoubleDocValuesField("f", 1.0f);
case VAR_INTS:
return new PackedLongDocValuesField("f", 1);
default:
throw new IllegalArgumentException();
}
}
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -175,6 +176,57 @@ public class TestNorms extends LuceneTestCase {
otherDir.close(); otherDir.close();
} }
public void testIllegalCustomEncoder() throws Exception {
Directory dir = newDirectory();
IllegalCustomEncodingSimilarity similarity = new IllegalCustomEncodingSimilarity();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
config.setSimilarity(similarity);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
Document doc = new Document();
Field foo = newField("foo", "", TextField.TYPE_UNSTORED);
Field bar = newField("bar", "", TextField.TYPE_UNSTORED);
doc.add(foo);
doc.add(bar);
int numAdded = 0;
for (int i = 0; i < 100; i++) {
try {
bar.setStringValue("singleton");
similarity.useByte = random().nextBoolean();
writer.addDocument(doc);
numAdded++;
} catch (IllegalArgumentException e) {}
}
IndexReader reader = writer.getReader();
writer.close();
assertEquals(numAdded, reader.numDocs());
IndexReaderContext topReaderContext = reader.getTopReaderContext();
AtomicReaderContext[] leaves = topReaderContext.leaves();
for (int j = 0; j < leaves.length; j++) {
AtomicReader atomicReader = leaves[j].reader();
Source source = random().nextBoolean() ? atomicReader.normValues("foo").getSource() : atomicReader.normValues("foo").getDirectSource();
Bits liveDocs = atomicReader.getLiveDocs();
Type t = source.getType();
for (int i = 0; i < atomicReader.maxDoc(); i++) {
assertEquals(0, source.getFloat(i), 0.000f);
}
source = random().nextBoolean() ? atomicReader.normValues("bar").getSource() : atomicReader.normValues("bar").getDirectSource();
for (int i = 0; i < atomicReader.maxDoc(); i++) {
if (liveDocs == null || liveDocs.get(i)) {
assertEquals("type: " + t, 1, source.getFloat(i), 0.000f);
} else {
assertEquals("type: " + t, 0, source.getFloat(i), 0.000f);
}
}
}
reader.close();
dir.close();
}
public void buildIndex(Directory dir, boolean writeNorms) throws IOException, public void buildIndex(Directory dir, boolean writeNorms) throws IOException,
CorruptIndexException { CorruptIndexException {
Random random = random(); Random random = random();
@ -249,4 +301,28 @@ public class TestNorms extends LuceneTestCase {
} }
} }
class IllegalCustomEncodingSimilarity extends DefaultSimilarity {
public boolean useByte = false;
@Override
public byte encodeNormValue(float f) {
return (byte) f;
}
@Override
public float decodeNormValue(byte b) {
return (float) b;
}
@Override
public void computeNorm(FieldInvertState state, Norm norm) {
if (useByte) {
norm.setByte(encodeNormValue((float) state.getLength()));
} else {
norm.setFloat((float)state.getLength());
}
}
}
} }

View File

@ -1,6 +1,6 @@
package org.apache.lucene.index; package org.apache.lucene.index;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this * contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF * work for additional information regarding copyright ownership. The ASF
@ -41,7 +41,6 @@ import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
public class TestTypePromotion extends LuceneTestCase { public class TestTypePromotion extends LuceneTestCase {
@ -50,7 +49,7 @@ public class TestTypePromotion extends LuceneTestCase {
Type.FIXED_INTS_64, Type.FIXED_INTS_8); Type.FIXED_INTS_64, Type.FIXED_INTS_8);
private static EnumSet<Type> FLOATS = EnumSet.of(Type.FLOAT_32, private static EnumSet<Type> FLOATS = EnumSet.of(Type.FLOAT_32,
Type.FLOAT_64); Type.FLOAT_64, Type.FIXED_INTS_8);
private static EnumSet<Type> UNSORTED_BYTES = EnumSet.of( private static EnumSet<Type> UNSORTED_BYTES = EnumSet.of(
Type.BYTES_FIXED_DEREF, Type.BYTES_FIXED_STRAIGHT, Type.BYTES_FIXED_DEREF, Type.BYTES_FIXED_STRAIGHT,
@ -77,12 +76,13 @@ public class TestTypePromotion extends LuceneTestCase {
int num_2 = atLeast(200); int num_2 = atLeast(200);
int num_3 = atLeast(200); int num_3 = atLeast(200);
long[] values = new long[num_1 + num_2 + num_3]; long[] values = new long[num_1 + num_2 + num_3];
Type[] sourceType = new Type[num_1 + num_2 + num_3];
index(writer, index(writer,
randomValueType(types, random()), values, 0, num_1); randomValueType(types, random()), values, sourceType, 0, num_1);
writer.commit(); writer.commit();
index(writer, index(writer,
randomValueType(types, random()), values, num_1, num_2); randomValueType(types, random()), values, sourceType, num_1, num_2);
writer.commit(); writer.commit();
if (random().nextInt(4) == 0) { if (random().nextInt(4) == 0) {
@ -93,7 +93,7 @@ public class TestTypePromotion extends LuceneTestCase {
IndexWriter writer_2 = new IndexWriter(dir_2, IndexWriter writer_2 = new IndexWriter(dir_2,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
index(writer_2, index(writer_2,
randomValueType(types, random()), values, num_1 + num_2, num_3); randomValueType(types, random()), values, sourceType, num_1 + num_2, num_3);
writer_2.commit(); writer_2.commit();
writer_2.close(); writer_2.close();
if (rarely()) { if (rarely()) {
@ -107,17 +107,17 @@ public class TestTypePromotion extends LuceneTestCase {
dir_2.close(); dir_2.close();
} else { } else {
index(writer, index(writer,
randomValueType(types, random()), values, num_1 + num_2, num_3); randomValueType(types, random()), values, sourceType, num_1 + num_2, num_3);
} }
writer.forceMerge(1); writer.forceMerge(1);
writer.close(); writer.close();
assertValues(type, dir, values); assertValues(type, dir, values, sourceType);
dir.close(); dir.close();
} }
private void assertValues(TestType type, Directory dir, long[] values) private void assertValues(TestType type, Directory dir, long[] values, Type[] sourceType)
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
DirectoryReader reader = DirectoryReader.open(dir); DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.getSequentialSubReaders().length); assertEquals(1, reader.getSequentialSubReaders().length);
@ -158,7 +158,13 @@ public class TestTypePromotion extends LuceneTestCase {
assertEquals(msg + " byteSize: " + bytes.length, values[id], value); assertEquals(msg + " byteSize: " + bytes.length, values[id], value);
break; break;
case Float: case Float:
assertEquals(msg, values[id], Double.doubleToRawLongBits(directSource.getFloat(i))); if (sourceType[id] == Type.FLOAT_32
|| sourceType[id] == Type.FLOAT_64) {
assertEquals(msg, values[id],
Double.doubleToRawLongBits(directSource.getFloat(i)));
} else {
assertEquals(msg, values[id], directSource.getFloat(i), 0.0d);
}
break; break;
case Int: case Int:
assertEquals(msg, values[id], directSource.getInt(i)); assertEquals(msg, values[id], directSource.getInt(i));
@ -173,7 +179,7 @@ public class TestTypePromotion extends LuceneTestCase {
} }
public void index(IndexWriter writer, public void index(IndexWriter writer,
Type valueType, long[] values, int offset, int num) Type valueType, long[] values, Type[] sourceTypes, int offset, int num)
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
final Field valField; final Field valField;
@ -228,6 +234,7 @@ public class TestTypePromotion extends LuceneTestCase {
for (int i = offset; i < offset + num; i++) { for (int i = offset; i < offset + num; i++) {
Document doc = new Document(); Document doc = new Document();
doc.add(new Field("id", i + "", TextField.TYPE_STORED)); doc.add(new Field("id", i + "", TextField.TYPE_STORED));
sourceTypes[i] = valueType;
switch (valueType) { switch (valueType) {
case VAR_INTS: case VAR_INTS:
// TODO: can we do nextLong()? // TODO: can we do nextLong()?
@ -327,8 +334,9 @@ public class TestTypePromotion extends LuceneTestCase {
int num_1 = atLeast(200); int num_1 = atLeast(200);
int num_2 = atLeast(200); int num_2 = atLeast(200);
long[] values = new long[num_1 + num_2]; long[] values = new long[num_1 + num_2];
Type[] sourceType = new Type[num_1 + num_2];
index(writer, index(writer,
randomValueType(INTEGERS, random()), values, 0, num_1); randomValueType(INTEGERS, random()), values, sourceType, 0, num_1);
writer.commit(); writer.commit();
if (random().nextInt(4) == 0) { if (random().nextInt(4) == 0) {
@ -337,7 +345,7 @@ public class TestTypePromotion extends LuceneTestCase {
IndexWriter writer_2 = new IndexWriter(dir_2, IndexWriter writer_2 = new IndexWriter(dir_2,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
index(writer_2, index(writer_2,
randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()), values, num_1, num_2); randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()), values, sourceType, num_1, num_2);
writer_2.commit(); writer_2.commit();
writer_2.close(); writer_2.close();
if (random().nextBoolean()) { if (random().nextBoolean()) {
@ -351,7 +359,7 @@ public class TestTypePromotion extends LuceneTestCase {
dir_2.close(); dir_2.close();
} else { } else {
index(writer, index(writer,
randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()), values, num_1, num_2); randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()), values, sourceType, num_1, num_2);
writer.commit(); writer.commit();
} }
writer.close(); writer.close();
@ -369,7 +377,7 @@ public class TestTypePromotion extends LuceneTestCase {
AtomicReaderContext[] children = topReaderContext.leaves(); AtomicReaderContext[] children = topReaderContext.leaves();
DocValues docValues = children[0].reader().docValues("promote"); DocValues docValues = children[0].reader().docValues("promote");
assertNotNull(docValues); assertNotNull(docValues);
assertValues(TestType.Byte, dir, values); assertValues(TestType.Byte, dir, values, sourceType);
assertEquals(Type.BYTES_VAR_STRAIGHT, docValues.getType()); assertEquals(Type.BYTES_VAR_STRAIGHT, docValues.getType());
reader.close(); reader.close();
dir.close(); dir.close();