From d63f39c17d8257670f7b926e45295bbb63434d9b Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 16 May 2011 11:43:51 +0000 Subject: [PATCH] LUCENE-3070: Added UOE to PreFlex Codec, Added Random DocValues injection to RandomIndexWriter, Added basic DocValues verification to CheckIndex git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1103699 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/document/DocValuesField.java | 1 - .../org/apache/lucene/index/CheckIndex.java | 73 +++++++++++++++++ .../lucene/index/DocFieldProcessor.java | 6 +- .../org/apache/lucene/index/FieldInfo.java | 10 +++ .../org/apache/lucene/index/FieldInfos.java | 2 +- .../lucene/index/PerFieldCodecWrapper.java | 25 +++--- .../codecs/DefaultDocValuesConsumer.java | 1 + .../index/codecs/preflex/PreFlexCodec.java | 4 +- .../lucene/index/RandomIndexWriter.java | 78 +++++++++++++++++++ 9 files changed, 182 insertions(+), 18 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/document/DocValuesField.java b/lucene/src/java/org/apache/lucene/document/DocValuesField.java index c5f13e17159..0b14860dc44 100644 --- a/lucene/src/java/org/apache/lucene/document/DocValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/DocValuesField.java @@ -73,7 +73,6 @@ import org.apache.lucene.util.BytesRef; * * * */ -@SuppressWarnings("serial") public class DocValuesField extends AbstractField implements PerDocFieldValues { protected BytesRef bytes; diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 61b3fc07da0..145a43801ae 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -27,6 +27,9 @@ import org.apache.lucene.document.AbstractField; // for javadocs import org.apache.lucene.document.Document; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; +import org.apache.lucene.index.codecs.PerDocValues; +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.DocValuesEnum; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -195,6 +198,9 @@ public class CheckIndex { /** Status for testing of term vectors (null if term vectors could not be tested). */ public TermVectorStatus termVectorStatus; + + /** Status for testing of DocValues (null if DocValues could not be tested). */ + public DocValuesStatus docValuesStatus; } /** @@ -254,6 +260,15 @@ public class CheckIndex { /** Exception thrown during term vector test (null on success) */ public Throwable error = null; } + + public static final class DocValuesStatus { + /** Number of documents tested. */ + public int docCount; + /** Total number of docValues tested. */ + public long totalValueFields; + /** Exception thrown during doc values test (null on success) */ + public Throwable error = null; + } } /** Create a new CheckIndex on the directory. */ @@ -499,6 +514,8 @@ public class CheckIndex { // Test Term Vectors segInfoStat.termVectorStatus = testTermVectors(info, reader, nf); + + segInfoStat.docValuesStatus = testDocValues(info, reader); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly @@ -510,6 +527,8 @@ public class CheckIndex { throw new RuntimeException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new RuntimeException("Term Vector test failed"); + } else if (segInfoStat.docValuesStatus.error != null) { + throw new RuntimeException("DocValues test failed"); } msg(""); @@ -920,6 +939,60 @@ public class CheckIndex { return status; } + + private Status.DocValuesStatus testDocValues(SegmentInfo info, + SegmentReader reader) { + final Status.DocValuesStatus status = new Status.DocValuesStatus(); + try { + if (infoStream != null) { + infoStream.print(" test: DocValues........"); + } + final FieldInfos fieldInfos = info.getFieldInfos(); + for (FieldInfo fieldInfo : fieldInfos) { + if (fieldInfo.hasDocValues()) { + status.totalValueFields++; + final PerDocValues perDocValues = reader.perDocValues(); + final DocValues docValues = perDocValues.docValues(fieldInfo.name); + if (docValues == null) { + continue; + } + final DocValuesEnum values = docValues.getEnum(); + while (values.nextDoc() != DocValuesEnum.NO_MORE_DOCS) { + switch (fieldInfo.docValues) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + values.bytes(); + break; + case FLOAT_32: + case FLOAT_64: + values.getFloat(); + break; + case INTS: + values.getInt(); + break; + default: + throw new IllegalArgumentException("Field: " + fieldInfo.name + + " - no such DocValues type: " + fieldInfo.docValues); + } + } + } + } + + msg("OK [" + status.docCount + " total doc Count; Num DocValues Fields " + + status.totalValueFields); + } catch (Throwable e) { + msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); + status.error = e; + if (infoStream != null) { + e.printStackTrace(infoStream); + } + } + return status; + } /** * Test term vectors for a segment. diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 53765f84f1d..003bcdeafd8 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -251,7 +251,7 @@ final class DocFieldProcessor extends DocConsumer { fieldsWriter.addField(field, fp.fieldInfo); } if (field.hasDocValues()) { - final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo, fieldInfos); + final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo); docValuesConsumer.add(docState.docID, field.getDocValues()); } } @@ -292,7 +292,7 @@ final class DocFieldProcessor extends DocConsumer { final private Map docValues = new HashMap(); final private Map perDocConsumers = new HashMap(); - DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo, FieldInfos infos) + DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo) throws IOException { DocValuesConsumer docValuesConsumer = docValues.get(fieldInfo.name); if (docValuesConsumer != null) { @@ -303,12 +303,12 @@ final class DocFieldProcessor extends DocConsumer { PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(fieldInfo.getCodecId()); SegmentCodecs codecs = perDocWriteState.segmentCodecs; assert codecs.codecs.length > fieldInfo.getCodecId(); - Codec codec = codecs.codecs[fieldInfo.getCodecId()]; perDocConsumer = codec.docsConsumer(perDocWriteState); perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer); } docValuesConsumer = perDocConsumer.addValuesField(fieldInfo); + fieldInfo.commitDocValues(); docValues.put(fieldInfo.name, docValuesConsumer); return docValuesConsumer; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index 26b8d30a3ea..472c48619d8 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -127,6 +127,7 @@ public final class FieldInfo { } private boolean vectorsCommitted; + private boolean docValuesCommitted; /** * Reverts all uncommitted changes on this {@link FieldInfo} @@ -138,6 +139,10 @@ public final class FieldInfo { storePositionWithTermVector = false; storeTermVector = false; } + + if (docValues != null && !docValuesCommitted) { + docValues = null; + } } /** @@ -150,4 +155,9 @@ public final class FieldInfo { assert storeTermVector; vectorsCommitted = true; } + + void commitDocValues() { + assert hasDocValues(); + docValuesCommitted = true; + } } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 422560ea057..569597963f7 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -750,5 +750,5 @@ public final class FieldInfos implements Iterable { } return roFis; } - + } diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index f3cb616877c..06d8a0339ff 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -317,22 +317,22 @@ final class PerFieldCodecWrapper extends Codec { } private final class PerDocConsumers extends PerDocConsumer { - private final ArrayList consumers = new ArrayList(); + private final PerDocConsumer[] consumers; + private final Codec[] codecs; + private final PerDocWriteState state; public PerDocConsumers(PerDocWriteState state) throws IOException { assert segmentCodecs == state.segmentCodecs; - final Codec[] codecs = segmentCodecs.codecs; - for (int i = 0; i < codecs.length; i++) { - consumers.add(codecs[i].docsConsumer(new PerDocWriteState(state, i))); - } + this.state = state; + codecs = segmentCodecs.codecs; + consumers = new PerDocConsumer[codecs.length]; } public void close() throws IOException { - Iterator it = consumers.iterator(); IOException err = null; - while (it.hasNext()) { + for (int i = 0; i < consumers.length; i++) { try { - PerDocConsumer next = it.next(); + final PerDocConsumer next = consumers[i]; if (next != null) { next.close(); } @@ -351,10 +351,13 @@ final class PerFieldCodecWrapper extends Codec { @Override public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { - assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID; - final PerDocConsumer perDoc = consumers.get(field.getCodecId()); + final int codecId = field.getCodecId(); + assert codecId != FieldInfo.UNASSIGNED_CODEC_ID; + PerDocConsumer perDoc = consumers[codecId]; if (perDoc == null) { - return null; + perDoc = codecs[codecId].docsConsumer(new PerDocWriteState(state, codecId)); + assert perDoc != null; + consumers[codecId] = perDoc; } return perDoc.addValuesField(field); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java index b95dc748810..0c1d1a1da1a 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java @@ -77,6 +77,7 @@ public class DefaultDocValuesConsumer extends PerDocConsumer { Writer.INDEX_EXTENSION)); assert dir.fileExists(IndexFileNames.segmentFileName(filename, "", Writer.INDEX_EXTENSION)); + // until here all types use an index case BYTES_FIXED_STRAIGHT: case FLOAT_32: case FLOAT_64: diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java index c3860d8ca3d..763457fa71f 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java @@ -84,11 +84,11 @@ public class PreFlexCodec extends Codec { @Override public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { - return null; + throw new UnsupportedOperationException("PerDocConsumer is not supported by Preflex codec"); } @Override public PerDocValues docsProducer(SegmentReadState state) throws IOException { - return null; + throw new UnsupportedOperationException("PerDocValues is not supported by Preflex codec"); } } diff --git a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java index 0712e4104c4..4aa5f7c339a 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -23,9 +23,13 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; // javadoc +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.values.Type; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Version; import org.apache.lucene.util._TestUtil; @@ -44,6 +48,10 @@ public class RandomIndexWriter implements Closeable { int flushAt; private double flushAtFactor = 1.0; private boolean getReaderCalled; + private final int fixedBytesLength; + private final long docValuesFieldPrefix; + private volatile boolean doDocValues; + private CodecProvider codecProvider; // Randomly calls Thread.yield so we mixup thread scheduling private static final class MockIndexWriter extends IndexWriter { @@ -91,16 +99,79 @@ public class RandomIndexWriter implements Closeable { System.out.println("codec default=" + w.getConfig().getCodecProvider().getDefaultFieldCodec()); w.setInfoStream(System.out); } + /* TODO: find some what to make that random... + * This must be fixed across all fixed bytes + * fields in one index. so if you open another writer + * this might change if I use r.nextInt(x) + * maybe we can peek at the existing files here? + */ + fixedBytesLength = 37; + docValuesFieldPrefix = r.nextLong(); + codecProvider = w.getConfig().getCodecProvider(); + switchDoDocValues(); } + private void switchDoDocValues() { + // randomly enable / disable docValues + doDocValues = r.nextInt(10) != 0; + } + /** * Adds a Document. * @see IndexWriter#addDocument(Document) */ public void addDocument(Document doc) throws IOException { + if (doDocValues) { + randomPerDocFieldValues(r, doc); + } w.addDocument(doc); + maybeCommit(); } + + private void randomPerDocFieldValues(Random random, Document doc) { + + Type[] values = Type.values(); + Type type = values[random.nextInt(values.length)]; + String name = "random_" + type.name() + "" + docValuesFieldPrefix; + if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getFieldable(name) != null) + return; + DocValuesField docValuesField = new DocValuesField(name); + switch (type) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + final String randomUnicodeString = _TestUtil.randomUnicodeString(random, fixedBytesLength); + BytesRef fixedRef = new BytesRef(randomUnicodeString); + if (fixedRef.length > fixedBytesLength) { + fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength); + } else { + fixedRef.grow(fixedBytesLength); + fixedRef.length = fixedBytesLength; + } + docValuesField.setBytes(fixedRef, type); + break; + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + BytesRef ref = new BytesRef(_TestUtil.randomUnicodeString(random, 200)); + docValuesField.setBytes(ref, type); + break; + case FLOAT_32: + docValuesField.setFloat(random.nextFloat()); + break; + case FLOAT_64: + docValuesField.setFloat(random.nextDouble()); + break; + case INTS: + docValuesField.setInt(random.nextInt()); + break; + default: + throw new IllegalArgumentException("no such type: " + type); + } + + doc.add(docValuesField); + } private void maybeCommit() throws IOException { if (docCount++ == flushAt) { @@ -113,6 +184,7 @@ public class RandomIndexWriter implements Closeable { // gradually but exponentially increase time b/w flushes flushAtFactor *= 1.05; } + switchDoDocValues(); } } @@ -121,6 +193,9 @@ public class RandomIndexWriter implements Closeable { * @see IndexWriter#updateDocument(Term, Document) */ public void updateDocument(Term t, Document doc) throws IOException { + if (doDocValues) { + randomPerDocFieldValues(r, doc); + } w.updateDocument(t, doc); maybeCommit(); } @@ -135,6 +210,7 @@ public class RandomIndexWriter implements Closeable { public void commit() throws CorruptIndexException, IOException { w.commit(); + switchDoDocValues(); } public int numDocs() throws IOException { @@ -164,6 +240,7 @@ public class RandomIndexWriter implements Closeable { w.optimize(limit); assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount(); } + switchDoDocValues(); } public IndexReader getReader(boolean applyDeletions) throws IOException { @@ -184,6 +261,7 @@ public class RandomIndexWriter implements Closeable { System.out.println("RIW.getReader: open new reader"); } w.commit(); + switchDoDocValues(); return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider()); } }