mirror of https://github.com/apache/lucene.git
LUCENE-3070: Added UOE to PreFlex Codec, Added Random DocValues injection to RandomIndexWriter, Added basic DocValues verification to CheckIndex
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1103699 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
54a2d7aab4
commit
d63f39c17d
|
@ -73,7 +73,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
* </pre>
|
||||
*
|
||||
* */
|
||||
@SuppressWarnings("serial")
|
||||
public class DocValuesField extends AbstractField implements PerDocFieldValues {
|
||||
|
||||
protected BytesRef bytes;
|
||||
|
|
|
@ -27,6 +27,9 @@ import org.apache.lucene.document.AbstractField; // for javadocs
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.index.values.DocValues;
|
||||
import org.apache.lucene.index.values.DocValuesEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -195,6 +198,9 @@ public class CheckIndex {
|
|||
|
||||
/** Status for testing of term vectors (null if term vectors could not be tested). */
|
||||
public TermVectorStatus termVectorStatus;
|
||||
|
||||
/** Status for testing of DocValues (null if DocValues could not be tested). */
|
||||
public DocValuesStatus docValuesStatus;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -254,6 +260,15 @@ public class CheckIndex {
|
|||
/** Exception thrown during term vector test (null on success) */
|
||||
public Throwable error = null;
|
||||
}
|
||||
|
||||
public static final class DocValuesStatus {
|
||||
/** Number of documents tested. */
|
||||
public int docCount;
|
||||
/** Total number of docValues tested. */
|
||||
public long totalValueFields;
|
||||
/** Exception thrown during doc values test (null on success) */
|
||||
public Throwable error = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Create a new CheckIndex on the directory. */
|
||||
|
@ -499,6 +514,8 @@ public class CheckIndex {
|
|||
|
||||
// Test Term Vectors
|
||||
segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
|
||||
|
||||
segInfoStat.docValuesStatus = testDocValues(info, reader);
|
||||
|
||||
// Rethrow the first exception we encountered
|
||||
// This will cause stats for failed segments to be incremented properly
|
||||
|
@ -510,6 +527,8 @@ public class CheckIndex {
|
|||
throw new RuntimeException("Stored Field test failed");
|
||||
} else if (segInfoStat.termVectorStatus.error != null) {
|
||||
throw new RuntimeException("Term Vector test failed");
|
||||
} else if (segInfoStat.docValuesStatus.error != null) {
|
||||
throw new RuntimeException("DocValues test failed");
|
||||
}
|
||||
|
||||
msg("");
|
||||
|
@ -920,6 +939,60 @@ public class CheckIndex {
|
|||
|
||||
return status;
|
||||
}
|
||||
|
||||
private Status.DocValuesStatus testDocValues(SegmentInfo info,
|
||||
SegmentReader reader) {
|
||||
final Status.DocValuesStatus status = new Status.DocValuesStatus();
|
||||
try {
|
||||
if (infoStream != null) {
|
||||
infoStream.print(" test: DocValues........");
|
||||
}
|
||||
final FieldInfos fieldInfos = info.getFieldInfos();
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.hasDocValues()) {
|
||||
status.totalValueFields++;
|
||||
final PerDocValues perDocValues = reader.perDocValues();
|
||||
final DocValues docValues = perDocValues.docValues(fieldInfo.name);
|
||||
if (docValues == null) {
|
||||
continue;
|
||||
}
|
||||
final DocValuesEnum values = docValues.getEnum();
|
||||
while (values.nextDoc() != DocValuesEnum.NO_MORE_DOCS) {
|
||||
switch (fieldInfo.docValues) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
values.bytes();
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
values.getFloat();
|
||||
break;
|
||||
case INTS:
|
||||
values.getInt();
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Field: " + fieldInfo.name
|
||||
+ " - no such DocValues type: " + fieldInfo.docValues);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
msg("OK [" + status.docCount + " total doc Count; Num DocValues Fields "
|
||||
+ status.totalValueFields);
|
||||
} catch (Throwable e) {
|
||||
msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
|
||||
status.error = e;
|
||||
if (infoStream != null) {
|
||||
e.printStackTrace(infoStream);
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test term vectors for a segment.
|
||||
|
|
|
@ -251,7 +251,7 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
fieldsWriter.addField(field, fp.fieldInfo);
|
||||
}
|
||||
if (field.hasDocValues()) {
|
||||
final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo, fieldInfos);
|
||||
final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo);
|
||||
docValuesConsumer.add(docState.docID, field.getDocValues());
|
||||
}
|
||||
}
|
||||
|
@ -292,7 +292,7 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
final private Map<String, DocValuesConsumer> docValues = new HashMap<String, DocValuesConsumer>();
|
||||
final private Map<Integer, PerDocConsumer> perDocConsumers = new HashMap<Integer, PerDocConsumer>();
|
||||
|
||||
DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo, FieldInfos infos)
|
||||
DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo)
|
||||
throws IOException {
|
||||
DocValuesConsumer docValuesConsumer = docValues.get(fieldInfo.name);
|
||||
if (docValuesConsumer != null) {
|
||||
|
@ -303,12 +303,12 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(fieldInfo.getCodecId());
|
||||
SegmentCodecs codecs = perDocWriteState.segmentCodecs;
|
||||
assert codecs.codecs.length > fieldInfo.getCodecId();
|
||||
|
||||
Codec codec = codecs.codecs[fieldInfo.getCodecId()];
|
||||
perDocConsumer = codec.docsConsumer(perDocWriteState);
|
||||
perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer);
|
||||
}
|
||||
docValuesConsumer = perDocConsumer.addValuesField(fieldInfo);
|
||||
fieldInfo.commitDocValues();
|
||||
docValues.put(fieldInfo.name, docValuesConsumer);
|
||||
return docValuesConsumer;
|
||||
}
|
||||
|
|
|
@ -127,6 +127,7 @@ public final class FieldInfo {
|
|||
}
|
||||
|
||||
private boolean vectorsCommitted;
|
||||
private boolean docValuesCommitted;
|
||||
|
||||
/**
|
||||
* Reverts all uncommitted changes on this {@link FieldInfo}
|
||||
|
@ -138,6 +139,10 @@ public final class FieldInfo {
|
|||
storePositionWithTermVector = false;
|
||||
storeTermVector = false;
|
||||
}
|
||||
|
||||
if (docValues != null && !docValuesCommitted) {
|
||||
docValues = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -150,4 +155,9 @@ public final class FieldInfo {
|
|||
assert storeTermVector;
|
||||
vectorsCommitted = true;
|
||||
}
|
||||
|
||||
void commitDocValues() {
|
||||
assert hasDocValues();
|
||||
docValuesCommitted = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -750,5 +750,5 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
}
|
||||
return roFis;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -317,22 +317,22 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
}
|
||||
|
||||
private final class PerDocConsumers extends PerDocConsumer {
|
||||
private final ArrayList<PerDocConsumer> consumers = new ArrayList<PerDocConsumer>();
|
||||
private final PerDocConsumer[] consumers;
|
||||
private final Codec[] codecs;
|
||||
private final PerDocWriteState state;
|
||||
|
||||
public PerDocConsumers(PerDocWriteState state) throws IOException {
|
||||
assert segmentCodecs == state.segmentCodecs;
|
||||
final Codec[] codecs = segmentCodecs.codecs;
|
||||
for (int i = 0; i < codecs.length; i++) {
|
||||
consumers.add(codecs[i].docsConsumer(new PerDocWriteState(state, i)));
|
||||
}
|
||||
this.state = state;
|
||||
codecs = segmentCodecs.codecs;
|
||||
consumers = new PerDocConsumer[codecs.length];
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
Iterator<PerDocConsumer> it = consumers.iterator();
|
||||
IOException err = null;
|
||||
while (it.hasNext()) {
|
||||
for (int i = 0; i < consumers.length; i++) {
|
||||
try {
|
||||
PerDocConsumer next = it.next();
|
||||
final PerDocConsumer next = consumers[i];
|
||||
if (next != null) {
|
||||
next.close();
|
||||
}
|
||||
|
@ -351,10 +351,13 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
|
||||
@Override
|
||||
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
|
||||
assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
|
||||
final PerDocConsumer perDoc = consumers.get(field.getCodecId());
|
||||
final int codecId = field.getCodecId();
|
||||
assert codecId != FieldInfo.UNASSIGNED_CODEC_ID;
|
||||
PerDocConsumer perDoc = consumers[codecId];
|
||||
if (perDoc == null) {
|
||||
return null;
|
||||
perDoc = codecs[codecId].docsConsumer(new PerDocWriteState(state, codecId));
|
||||
assert perDoc != null;
|
||||
consumers[codecId] = perDoc;
|
||||
}
|
||||
return perDoc.addValuesField(field);
|
||||
}
|
||||
|
|
|
@ -77,6 +77,7 @@ public class DefaultDocValuesConsumer extends PerDocConsumer {
|
|||
Writer.INDEX_EXTENSION));
|
||||
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.INDEX_EXTENSION));
|
||||
// until here all types use an index
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
|
|
|
@ -84,11 +84,11 @@ public class PreFlexCodec extends Codec {
|
|||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return null;
|
||||
throw new UnsupportedOperationException("PerDocConsumer is not supported by Preflex codec");
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return null;
|
||||
throw new UnsupportedOperationException("PerDocValues is not supported by Preflex codec");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,9 +23,13 @@ import java.util.Random;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexWriter; // javadoc
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.values.Type;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
@ -44,6 +48,10 @@ public class RandomIndexWriter implements Closeable {
|
|||
int flushAt;
|
||||
private double flushAtFactor = 1.0;
|
||||
private boolean getReaderCalled;
|
||||
private final int fixedBytesLength;
|
||||
private final long docValuesFieldPrefix;
|
||||
private volatile boolean doDocValues;
|
||||
private CodecProvider codecProvider;
|
||||
|
||||
// Randomly calls Thread.yield so we mixup thread scheduling
|
||||
private static final class MockIndexWriter extends IndexWriter {
|
||||
|
@ -91,16 +99,79 @@ public class RandomIndexWriter implements Closeable {
|
|||
System.out.println("codec default=" + w.getConfig().getCodecProvider().getDefaultFieldCodec());
|
||||
w.setInfoStream(System.out);
|
||||
}
|
||||
/* TODO: find some what to make that random...
|
||||
* This must be fixed across all fixed bytes
|
||||
* fields in one index. so if you open another writer
|
||||
* this might change if I use r.nextInt(x)
|
||||
* maybe we can peek at the existing files here?
|
||||
*/
|
||||
fixedBytesLength = 37;
|
||||
docValuesFieldPrefix = r.nextLong();
|
||||
codecProvider = w.getConfig().getCodecProvider();
|
||||
switchDoDocValues();
|
||||
}
|
||||
|
||||
private void switchDoDocValues() {
|
||||
// randomly enable / disable docValues
|
||||
doDocValues = r.nextInt(10) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a Document.
|
||||
* @see IndexWriter#addDocument(Document)
|
||||
*/
|
||||
public void addDocument(Document doc) throws IOException {
|
||||
if (doDocValues) {
|
||||
randomPerDocFieldValues(r, doc);
|
||||
}
|
||||
w.addDocument(doc);
|
||||
|
||||
maybeCommit();
|
||||
}
|
||||
|
||||
private void randomPerDocFieldValues(Random random, Document doc) {
|
||||
|
||||
Type[] values = Type.values();
|
||||
Type type = values[random.nextInt(values.length)];
|
||||
String name = "random_" + type.name() + "" + docValuesFieldPrefix;
|
||||
if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getFieldable(name) != null)
|
||||
return;
|
||||
DocValuesField docValuesField = new DocValuesField(name);
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
final String randomUnicodeString = _TestUtil.randomUnicodeString(random, fixedBytesLength);
|
||||
BytesRef fixedRef = new BytesRef(randomUnicodeString);
|
||||
if (fixedRef.length > fixedBytesLength) {
|
||||
fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength);
|
||||
} else {
|
||||
fixedRef.grow(fixedBytesLength);
|
||||
fixedRef.length = fixedBytesLength;
|
||||
}
|
||||
docValuesField.setBytes(fixedRef, type);
|
||||
break;
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
BytesRef ref = new BytesRef(_TestUtil.randomUnicodeString(random, 200));
|
||||
docValuesField.setBytes(ref, type);
|
||||
break;
|
||||
case FLOAT_32:
|
||||
docValuesField.setFloat(random.nextFloat());
|
||||
break;
|
||||
case FLOAT_64:
|
||||
docValuesField.setFloat(random.nextDouble());
|
||||
break;
|
||||
case INTS:
|
||||
docValuesField.setInt(random.nextInt());
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("no such type: " + type);
|
||||
}
|
||||
|
||||
doc.add(docValuesField);
|
||||
}
|
||||
|
||||
private void maybeCommit() throws IOException {
|
||||
if (docCount++ == flushAt) {
|
||||
|
@ -113,6 +184,7 @@ public class RandomIndexWriter implements Closeable {
|
|||
// gradually but exponentially increase time b/w flushes
|
||||
flushAtFactor *= 1.05;
|
||||
}
|
||||
switchDoDocValues();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,6 +193,9 @@ public class RandomIndexWriter implements Closeable {
|
|||
* @see IndexWriter#updateDocument(Term, Document)
|
||||
*/
|
||||
public void updateDocument(Term t, Document doc) throws IOException {
|
||||
if (doDocValues) {
|
||||
randomPerDocFieldValues(r, doc);
|
||||
}
|
||||
w.updateDocument(t, doc);
|
||||
maybeCommit();
|
||||
}
|
||||
|
@ -135,6 +210,7 @@ public class RandomIndexWriter implements Closeable {
|
|||
|
||||
public void commit() throws CorruptIndexException, IOException {
|
||||
w.commit();
|
||||
switchDoDocValues();
|
||||
}
|
||||
|
||||
public int numDocs() throws IOException {
|
||||
|
@ -164,6 +240,7 @@ public class RandomIndexWriter implements Closeable {
|
|||
w.optimize(limit);
|
||||
assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
|
||||
}
|
||||
switchDoDocValues();
|
||||
}
|
||||
|
||||
public IndexReader getReader(boolean applyDeletions) throws IOException {
|
||||
|
@ -184,6 +261,7 @@ public class RandomIndexWriter implements Closeable {
|
|||
System.out.println("RIW.getReader: open new reader");
|
||||
}
|
||||
w.commit();
|
||||
switchDoDocValues();
|
||||
return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue