LUCENE-3186: svn add

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1181104 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-10-10 18:05:18 +00:00
parent 9e027695c8
commit c95d4fd9a2
2 changed files with 517 additions and 0 deletions

View File

@ -0,0 +1,204 @@
package org.apache.lucene.index.values;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.HashMap;
import java.util.Map;
/**
* Type promoter that promotes {@link IndexDocValues} during merge based on
* their {@link ValueType} and {@link #getValueSize()}
*
* @lucene.internal
*/
public class TypePromoter {
private final static Map<Integer, ValueType> FLAGS_MAP = new HashMap<Integer, ValueType>();
private static final TypePromoter IDENTITY_PROMOTER = new IdentityTypePromoter();
public static final int VAR_TYPE_VALUE_SIZE = -1;
private static final int IS_INT = 1 << 0;
private static final int IS_BYTE = 1 << 1;
private static final int IS_FLOAT = 1 << 2;
/* VAR & FIXED == VAR */
private static final int IS_VAR = 1 << 3;
private static final int IS_FIXED = 1 << 3 | 1 << 4;
/* if we have FIXED & FIXED with different size we promote to VAR */
private static final int PROMOTE_TO_VAR_SIZE_MASK = ~(1 << 3);
/* STRAIGHT & DEREF == STRAIGHT (dense values win) */
private static final int IS_STRAIGHT = 1 << 5;
private static final int IS_DEREF = 1 << 5 | 1 << 6;
private static final int IS_SORTED = 1 << 7;
/* more bits wins (int16 & int32 == int32) */
private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11;
private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11;
private static final int IS_32_BIT = 1 << 10 | 1 << 11;
private static final int IS_64_BIT = 1 << 11;
private final ValueType type;
private final int flags;
private final int valueSize;
/**
* Returns a positive value size if this {@link TypePromoter} represents a
* fixed variant, otherwise <code>-1</code>
*
* @return a positive value size if this {@link TypePromoter} represents a
* fixed variant, otherwise <code>-1</code>
*/
public int getValueSize() {
return valueSize;
}
static {
for (ValueType type : ValueType.values()) {
TypePromoter create = create(type, VAR_TYPE_VALUE_SIZE);
FLAGS_MAP.put(create.flags, type);
}
}
/**
* Creates a new {@link TypePromoter}
*
* @param type
* the {@link ValueType} this promoter represents
* @param flags
* the promoters flags
* @param valueSize
* the value size if {@link #IS_FIXED} or <code>-1</code> otherwise.
*/
protected TypePromoter(ValueType type, int flags, int valueSize) {
this.type = type;
this.flags = flags;
this.valueSize = valueSize;
}
/**
* Creates a new promoted {@link TypePromoter} based on this and the given
* {@link TypePromoter} or <code>null</code> iff the {@link TypePromoter}
* aren't compatible.
*
* @param promoter
* the incoming promoter
* @return a new promoted {@link TypePromoter} based on this and the given
* {@link TypePromoter} or <code>null</code> iff the
* {@link TypePromoter} aren't compatible.
*/
public TypePromoter promote(TypePromoter promoter) {
int promotedFlags = promoter.flags & this.flags;
TypePromoter promoted = create(FLAGS_MAP.get(promotedFlags), valueSize);
if (promoted == null) {
return promoted;
}
if ((promoted.flags & IS_BYTE) != 0 && (promoted.flags & IS_FIXED) == IS_FIXED) {
if (this.valueSize == promoter.valueSize) {
return promoted;
}
return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK),
VAR_TYPE_VALUE_SIZE);
}
return promoted;
}
/**
* Returns the {@link ValueType} of this {@link TypePromoter}
*
* @return the {@link ValueType} of this {@link TypePromoter}
*/
public ValueType type() {
return type;
}
@Override
public String toString() {
return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]";
}
/**
* Creates a new {@link TypePromoter} for the given type and size per value.
*
* @param type
* the {@link ValueType} to create the promoter for
* @param valueSize
* the size per value in bytes or <code>-1</code> iff the types have
* variable length.
* @return a new {@link TypePromoter}
*/
public static TypePromoter create(ValueType type, int valueSize) {
if (type == null) {
return null;
}
switch (type) {
case BYTES_FIXED_DEREF:
return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_DEREF, valueSize);
case BYTES_FIXED_SORTED:
return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_SORTED, valueSize);
case BYTES_FIXED_STRAIGHT:
return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, valueSize);
case BYTES_VAR_DEREF:
return new TypePromoter(type, IS_BYTE | IS_VAR | IS_DEREF, VAR_TYPE_VALUE_SIZE);
case BYTES_VAR_SORTED:
return new TypePromoter(type, IS_BYTE | IS_VAR | IS_SORTED, VAR_TYPE_VALUE_SIZE);
case BYTES_VAR_STRAIGHT:
return new TypePromoter(type, IS_BYTE | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
case FIXED_INTS_16:
return new TypePromoter(type,
IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT, valueSize);
case FIXED_INTS_32:
return new TypePromoter(type,
IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT, valueSize);
case FIXED_INTS_64:
return new TypePromoter(type,
IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT, valueSize);
case FIXED_INTS_8:
return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_8_BIT,
valueSize);
case FLOAT_32:
return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
| IS_32_BIT, valueSize);
case FLOAT_64:
return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
| IS_64_BIT, valueSize);
case VAR_INTS:
return new TypePromoter(type, IS_INT | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
default:
throw new IllegalStateException();
}
}
/**
* Returns a {@link TypePromoter} that always promotes to the type provided to
* {@link #promote(TypePromoter)}
*/
public static TypePromoter getIdentityPromoter() {
return IDENTITY_PROMOTER;
}
private static class IdentityTypePromoter extends TypePromoter {
public IdentityTypePromoter() {
super(null, 0, -1);
}
@Override
public TypePromoter promote(TypePromoter promoter) {
return promoter;
}
}
}

View File

@ -0,0 +1,313 @@
package org.apache.lucene.index.values;
import java.io.IOException;
import java.util.EnumSet;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IndexDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Before;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
public class TestTypePromotion extends LuceneTestCase {
@Before
public void setUp() throws Exception {
super.setUp();
assumeFalse("cannot work with preflex codec", CodecProvider.getDefault()
.getDefaultFieldCodec().equals("PreFlex"));
}
private static EnumSet<ValueType> INTEGERS = EnumSet.of(ValueType.VAR_INTS,
ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,
ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_8);
private static EnumSet<ValueType> FLOATS = EnumSet.of(ValueType.FLOAT_32,
ValueType.FLOAT_64);
private static EnumSet<ValueType> UNSORTED_BYTES = EnumSet.of(
ValueType.BYTES_FIXED_DEREF, ValueType.BYTES_FIXED_STRAIGHT,
ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_VAR_DEREF);
private static EnumSet<ValueType> SORTED_BYTES = EnumSet.of(
ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED);
public ValueType randomValueType(EnumSet<ValueType> typeEnum, Random random) {
ValueType[] array = typeEnum.toArray(new ValueType[0]);
return array[random.nextInt(array.length)];
}
private static enum TestType {
Int, Float, Byte
}
private void runTest(EnumSet<ValueType> types, TestType type)
throws CorruptIndexException, IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
int num_1 = atLeast(200);
int num_2 = atLeast(200);
int num_3 = atLeast(200);
long[] values = new long[num_1 + num_2 + num_3];
index(writer, new IndexDocValuesField("promote"),
randomValueType(types, random), values, 0, num_1);
writer.commit();
index(writer, new IndexDocValuesField("promote"),
randomValueType(types, random), values, num_1, num_2);
writer.commit();
if (random.nextInt(4) == 0) {
// once in a while use addIndexes
writer.optimize();
Directory dir_2 = newDirectory() ;
IndexWriter writer_2 = new IndexWriter(dir_2,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
index(writer_2, new IndexDocValuesField("promote"),
randomValueType(types, random), values, num_1 + num_2, num_3);
writer_2.commit();
writer_2.close();
if (random.nextBoolean()) {
writer.addIndexes(dir_2);
} else {
// do a real merge here
IndexReader open = IndexReader.open(dir_2);
writer.addIndexes(open);
open.close();
}
dir_2.close();
} else {
index(writer, new IndexDocValuesField("promote"),
randomValueType(types, random), values, num_1 + num_2, num_3);
}
writer.optimize();
writer.close();
assertValues(type, dir, values);
dir.close();
}
private void assertValues(TestType type, Directory dir, long[] values)
throws CorruptIndexException, IOException {
IndexReader reader = IndexReader.open(dir);
assertTrue(reader.isOptimized());
ReaderContext topReaderContext = reader.getTopReaderContext();
ReaderContext[] children = topReaderContext.children();
IndexDocValues docValues = children[0].reader.docValues("promote");
assertEquals(1, children.length);
Source directSource = docValues.getDirectSource();
for (int i = 0; i < values.length; i++) {
int id = Integer.parseInt(reader.document(i).get("id"));
String msg = "id: " + id + " doc: " + i;
switch (type) {
case Byte:
BytesRef bytes = directSource.getBytes(i, new BytesRef());
long value = 0;
switch(bytes.length) {
case 1:
value = bytes.bytes[bytes.offset];
break;
case 2:
value = bytes.asShort();
break;
case 4:
value = bytes.asInt();
break;
case 8:
value = bytes.asLong();
break;
default:
fail(msg + " bytessize: " + bytes.length);
}
assertEquals(msg + " byteSize: " + bytes.length, values[id], value);
break;
case Float:
assertEquals(msg, values[id], Double.doubleToRawLongBits(directSource.getFloat(i)));
break;
case Int:
assertEquals(msg, values[id], directSource.getInt(i));
default:
break;
}
}
docValues.close();
reader.close();
}
public void index(IndexWriter writer, IndexDocValuesField valField,
ValueType valueType, long[] values, int offset, int num)
throws CorruptIndexException, IOException {
BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 });
for (int i = offset; i < offset + num; i++) {
Document doc = new Document();
doc.add(new Field("id", i + "", TextField.TYPE_STORED));
switch (valueType) {
case VAR_INTS:
values[i] = random.nextInt();
valField.setInt(values[i]);
break;
case FIXED_INTS_16:
values[i] = random.nextInt(Short.MAX_VALUE);
valField.setInt((short) values[i], true);
break;
case FIXED_INTS_32:
values[i] = random.nextInt();
valField.setInt((int) values[i], true);
break;
case FIXED_INTS_64:
values[i] = random.nextLong();
valField.setInt(values[i], true);
break;
case FLOAT_64:
double nextDouble = random.nextDouble();
values[i] = Double.doubleToRawLongBits(nextDouble);
valField.setFloat(nextDouble);
break;
case FLOAT_32:
final float nextFloat = random.nextFloat();
values[i] = Double.doubleToRawLongBits(nextFloat);
valField.setFloat(nextFloat);
break;
case FIXED_INTS_8:
values[i] = (byte) i;
valField.setInt((byte)values[i], true);
break;
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
values[i] = random.nextLong();
ref.copy(values[i]);
valField.setBytes(ref, valueType);
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
if (random.nextBoolean()) {
ref.copy(random.nextInt());
values[i] = ref.asInt();
} else {
ref.copy(random.nextLong());
values[i] = ref.asLong();
}
valField.setBytes(ref, valueType);
break;
default:
fail("unexpected value " + valueType);
}
doc.add(valField);
writer.addDocument(doc);
if (random.nextInt(10) == 0) {
writer.commit();
}
}
}
public void testPromoteBytes() throws IOException {
runTest(UNSORTED_BYTES, TestType.Byte);
}
public void testSortedPromoteBytes() throws IOException {
runTest(SORTED_BYTES, TestType.Byte);
}
public void testPromotInteger() throws IOException {
runTest(INTEGERS, TestType.Int);
}
public void testPromotFloatingPoint() throws CorruptIndexException,
IOException {
runTest(FLOATS, TestType.Float);
}
public void testMergeIncompatibleTypes() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
writerConfig.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // no merges until we are done with adding values
IndexWriter writer = new IndexWriter(dir, writerConfig);
int num_1 = atLeast(200);
int num_2 = atLeast(200);
long[] values = new long[num_1 + num_2];
index(writer, new IndexDocValuesField("promote"),
randomValueType(INTEGERS, random), values, 0, num_1);
writer.commit();
if (random.nextInt(4) == 0) {
// once in a while use addIndexes
Directory dir_2 = newDirectory() ;
IndexWriter writer_2 = new IndexWriter(dir_2,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
index(writer_2, new IndexDocValuesField("promote"),
randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
writer_2.commit();
writer_2.close();
if (random.nextBoolean()) {
writer.addIndexes(dir_2);
} else {
// do a real merge here
IndexReader open = IndexReader.open(dir_2);
writer.addIndexes(open);
open.close();
}
dir_2.close();
} else {
index(writer, new IndexDocValuesField("promote"),
randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
writer.commit();
}
writer.close();
writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
if (writerConfig.getMergePolicy() instanceof NoMergePolicy) {
writerConfig.setMergePolicy(newLogMergePolicy()); // make sure we optimize to one segment (merge everything together)
}
writer = new IndexWriter(dir, writerConfig);
// now optimize
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(dir);
assertTrue(reader.isOptimized());
ReaderContext topReaderContext = reader.getTopReaderContext();
ReaderContext[] children = topReaderContext.children();
IndexDocValues docValues = children[0].reader.docValues("promote");
assertNotNull(docValues);
assertValues(TestType.Byte, dir, values);
assertEquals(ValueType.BYTES_VAR_STRAIGHT, docValues.type());
reader.close();
dir.close();
}
}