LUCENE-3070: Added UOE to PreFlex Codec, Added Random DocValues injection to RandomIndexWriter, Added basic DocValues verification to CheckIndex

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1103699 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-05-16 11:43:51 +00:00
parent 54a2d7aab4
commit d63f39c17d
9 changed files with 182 additions and 18 deletions

View File

@ -73,7 +73,6 @@ import org.apache.lucene.util.BytesRef;
* </pre>
*
* */
@SuppressWarnings("serial")
public class DocValuesField extends AbstractField implements PerDocFieldValues {
protected BytesRef bytes;

View File

@ -27,6 +27,9 @@ import org.apache.lucene.document.AbstractField; // for javadocs
import org.apache.lucene.document.Document;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.DocValuesEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -195,6 +198,9 @@ public class CheckIndex {
/** Status for testing of term vectors (null if term vectors could not be tested). */
public TermVectorStatus termVectorStatus;
/** Status for testing of DocValues (null if DocValues could not be tested). */
public DocValuesStatus docValuesStatus;
}
/**
@ -254,6 +260,15 @@ public class CheckIndex {
/** Exception thrown during term vector test (null on success) */
public Throwable error = null;
}
public static final class DocValuesStatus {
/** Number of documents tested. */
public int docCount;
/** Total number of docValues tested. */
public long totalValueFields;
/** Exception thrown during doc values test (null on success) */
public Throwable error = null;
}
}
/** Create a new CheckIndex on the directory. */
@ -499,6 +514,8 @@ public class CheckIndex {
// Test Term Vectors
segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
segInfoStat.docValuesStatus = testDocValues(info, reader);
// Rethrow the first exception we encountered
// This will cause stats for failed segments to be incremented properly
@ -510,6 +527,8 @@ public class CheckIndex {
throw new RuntimeException("Stored Field test failed");
} else if (segInfoStat.termVectorStatus.error != null) {
throw new RuntimeException("Term Vector test failed");
} else if (segInfoStat.docValuesStatus.error != null) {
throw new RuntimeException("DocValues test failed");
}
msg("");
@ -920,6 +939,60 @@ public class CheckIndex {
return status;
}
private Status.DocValuesStatus testDocValues(SegmentInfo info,
SegmentReader reader) {
final Status.DocValuesStatus status = new Status.DocValuesStatus();
try {
if (infoStream != null) {
infoStream.print(" test: DocValues........");
}
final FieldInfos fieldInfos = info.getFieldInfos();
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.hasDocValues()) {
status.totalValueFields++;
final PerDocValues perDocValues = reader.perDocValues();
final DocValues docValues = perDocValues.docValues(fieldInfo.name);
if (docValues == null) {
continue;
}
final DocValuesEnum values = docValues.getEnum();
while (values.nextDoc() != DocValuesEnum.NO_MORE_DOCS) {
switch (fieldInfo.docValues) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
values.bytes();
break;
case FLOAT_32:
case FLOAT_64:
values.getFloat();
break;
case INTS:
values.getInt();
break;
default:
throw new IllegalArgumentException("Field: " + fieldInfo.name
+ " - no such DocValues type: " + fieldInfo.docValues);
}
}
}
}
msg("OK [" + status.docCount + " total doc Count; Num DocValues Fields "
+ status.totalValueFields);
} catch (Throwable e) {
msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
status.error = e;
if (infoStream != null) {
e.printStackTrace(infoStream);
}
}
return status;
}
/**
* Test term vectors for a segment.

View File

@ -251,7 +251,7 @@ final class DocFieldProcessor extends DocConsumer {
fieldsWriter.addField(field, fp.fieldInfo);
}
if (field.hasDocValues()) {
final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo, fieldInfos);
final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo);
docValuesConsumer.add(docState.docID, field.getDocValues());
}
}
@ -292,7 +292,7 @@ final class DocFieldProcessor extends DocConsumer {
final private Map<String, DocValuesConsumer> docValues = new HashMap<String, DocValuesConsumer>();
final private Map<Integer, PerDocConsumer> perDocConsumers = new HashMap<Integer, PerDocConsumer>();
DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo, FieldInfos infos)
DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo)
throws IOException {
DocValuesConsumer docValuesConsumer = docValues.get(fieldInfo.name);
if (docValuesConsumer != null) {
@ -303,12 +303,12 @@ final class DocFieldProcessor extends DocConsumer {
PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(fieldInfo.getCodecId());
SegmentCodecs codecs = perDocWriteState.segmentCodecs;
assert codecs.codecs.length > fieldInfo.getCodecId();
Codec codec = codecs.codecs[fieldInfo.getCodecId()];
perDocConsumer = codec.docsConsumer(perDocWriteState);
perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer);
}
docValuesConsumer = perDocConsumer.addValuesField(fieldInfo);
fieldInfo.commitDocValues();
docValues.put(fieldInfo.name, docValuesConsumer);
return docValuesConsumer;
}

View File

@ -127,6 +127,7 @@ public final class FieldInfo {
}
private boolean vectorsCommitted;
private boolean docValuesCommitted;
/**
* Reverts all uncommitted changes on this {@link FieldInfo}
@ -138,6 +139,10 @@ public final class FieldInfo {
storePositionWithTermVector = false;
storeTermVector = false;
}
if (docValues != null && !docValuesCommitted) {
docValues = null;
}
}
/**
@ -150,4 +155,9 @@ public final class FieldInfo {
assert storeTermVector;
vectorsCommitted = true;
}
void commitDocValues() {
assert hasDocValues();
docValuesCommitted = true;
}
}

View File

@ -750,5 +750,5 @@ public final class FieldInfos implements Iterable<FieldInfo> {
}
return roFis;
}
}

View File

@ -317,22 +317,22 @@ final class PerFieldCodecWrapper extends Codec {
}
private final class PerDocConsumers extends PerDocConsumer {
private final ArrayList<PerDocConsumer> consumers = new ArrayList<PerDocConsumer>();
private final PerDocConsumer[] consumers;
private final Codec[] codecs;
private final PerDocWriteState state;
public PerDocConsumers(PerDocWriteState state) throws IOException {
assert segmentCodecs == state.segmentCodecs;
final Codec[] codecs = segmentCodecs.codecs;
for (int i = 0; i < codecs.length; i++) {
consumers.add(codecs[i].docsConsumer(new PerDocWriteState(state, i)));
}
this.state = state;
codecs = segmentCodecs.codecs;
consumers = new PerDocConsumer[codecs.length];
}
public void close() throws IOException {
Iterator<PerDocConsumer> it = consumers.iterator();
IOException err = null;
while (it.hasNext()) {
for (int i = 0; i < consumers.length; i++) {
try {
PerDocConsumer next = it.next();
final PerDocConsumer next = consumers[i];
if (next != null) {
next.close();
}
@ -351,10 +351,13 @@ final class PerFieldCodecWrapper extends Codec {
@Override
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
final PerDocConsumer perDoc = consumers.get(field.getCodecId());
final int codecId = field.getCodecId();
assert codecId != FieldInfo.UNASSIGNED_CODEC_ID;
PerDocConsumer perDoc = consumers[codecId];
if (perDoc == null) {
return null;
perDoc = codecs[codecId].docsConsumer(new PerDocWriteState(state, codecId));
assert perDoc != null;
consumers[codecId] = perDoc;
}
return perDoc.addValuesField(field);
}

View File

@ -77,6 +77,7 @@ public class DefaultDocValuesConsumer extends PerDocConsumer {
Writer.INDEX_EXTENSION));
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
Writer.INDEX_EXTENSION));
// until here all types use an index
case BYTES_FIXED_STRAIGHT:
case FLOAT_32:
case FLOAT_64:

View File

@ -84,11 +84,11 @@ public class PreFlexCodec extends Codec {
@Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
return null;
throw new UnsupportedOperationException("PerDocConsumer is not supported by Preflex codec");
}
@Override
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
return null;
throw new UnsupportedOperationException("PerDocValues is not supported by Preflex codec");
}
}

View File

@ -23,9 +23,13 @@ import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter; // javadoc
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.values.Type;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import org.apache.lucene.util._TestUtil;
@ -44,6 +48,10 @@ public class RandomIndexWriter implements Closeable {
int flushAt;
private double flushAtFactor = 1.0;
private boolean getReaderCalled;
private final int fixedBytesLength;
private final long docValuesFieldPrefix;
private volatile boolean doDocValues;
private CodecProvider codecProvider;
// Randomly calls Thread.yield so we mixup thread scheduling
private static final class MockIndexWriter extends IndexWriter {
@ -91,16 +99,79 @@ public class RandomIndexWriter implements Closeable {
System.out.println("codec default=" + w.getConfig().getCodecProvider().getDefaultFieldCodec());
w.setInfoStream(System.out);
}
/* TODO: find some what to make that random...
* This must be fixed across all fixed bytes
* fields in one index. so if you open another writer
* this might change if I use r.nextInt(x)
* maybe we can peek at the existing files here?
*/
fixedBytesLength = 37;
docValuesFieldPrefix = r.nextLong();
codecProvider = w.getConfig().getCodecProvider();
switchDoDocValues();
}
private void switchDoDocValues() {
// randomly enable / disable docValues
doDocValues = r.nextInt(10) != 0;
}
/**
* Adds a Document.
* @see IndexWriter#addDocument(Document)
*/
public void addDocument(Document doc) throws IOException {
if (doDocValues) {
randomPerDocFieldValues(r, doc);
}
w.addDocument(doc);
maybeCommit();
}
private void randomPerDocFieldValues(Random random, Document doc) {
Type[] values = Type.values();
Type type = values[random.nextInt(values.length)];
String name = "random_" + type.name() + "" + docValuesFieldPrefix;
if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getFieldable(name) != null)
return;
DocValuesField docValuesField = new DocValuesField(name);
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
final String randomUnicodeString = _TestUtil.randomUnicodeString(random, fixedBytesLength);
BytesRef fixedRef = new BytesRef(randomUnicodeString);
if (fixedRef.length > fixedBytesLength) {
fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength);
} else {
fixedRef.grow(fixedBytesLength);
fixedRef.length = fixedBytesLength;
}
docValuesField.setBytes(fixedRef, type);
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
BytesRef ref = new BytesRef(_TestUtil.randomUnicodeString(random, 200));
docValuesField.setBytes(ref, type);
break;
case FLOAT_32:
docValuesField.setFloat(random.nextFloat());
break;
case FLOAT_64:
docValuesField.setFloat(random.nextDouble());
break;
case INTS:
docValuesField.setInt(random.nextInt());
break;
default:
throw new IllegalArgumentException("no such type: " + type);
}
doc.add(docValuesField);
}
private void maybeCommit() throws IOException {
if (docCount++ == flushAt) {
@ -113,6 +184,7 @@ public class RandomIndexWriter implements Closeable {
// gradually but exponentially increase time b/w flushes
flushAtFactor *= 1.05;
}
switchDoDocValues();
}
}
@ -121,6 +193,9 @@ public class RandomIndexWriter implements Closeable {
* @see IndexWriter#updateDocument(Term, Document)
*/
public void updateDocument(Term t, Document doc) throws IOException {
if (doDocValues) {
randomPerDocFieldValues(r, doc);
}
w.updateDocument(t, doc);
maybeCommit();
}
@ -135,6 +210,7 @@ public class RandomIndexWriter implements Closeable {
public void commit() throws CorruptIndexException, IOException {
w.commit();
switchDoDocValues();
}
public int numDocs() throws IOException {
@ -164,6 +240,7 @@ public class RandomIndexWriter implements Closeable {
w.optimize(limit);
assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
}
switchDoDocValues();
}
public IndexReader getReader(boolean applyDeletions) throws IOException {
@ -184,6 +261,7 @@ public class RandomIndexWriter implements Closeable {
System.out.println("RIW.getReader: open new reader");
}
w.commit();
switchDoDocValues();
return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
}
}