From e1d1d34e56fff67d9db037d535fbd0ab36e56281 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 26 Sep 2011 16:20:10 +0000 Subject: [PATCH] LUCENE-3461: catch same doc-values field added twice in one doc git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1175935 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/DocFieldProcessor.java | 34 +++++++--- .../index/values/TestDocValuesIndexing.java | 65 ++++++++++++++++++- 2 files changed, 89 insertions(+), 10 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 96005a394e6..02a4a737f11 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -83,9 +83,9 @@ final class DocFieldProcessor extends DocConsumer { // FieldInfo.storePayload. final String fileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELD_INFOS_EXTENSION); state.fieldInfos.write(state.directory, fileName); - for (DocValuesConsumer consumers : docValues.values()) { - consumers.finish(state.numDocs); - }; + for (DocValuesConsumerAndDocID consumers : docValues.values()) { + consumers.docValuesConsumer.finish(state.numDocs); + } // close perDocConsumer during flush to ensure all files are flushed due to PerCodec CFS IOUtils.close(perDocConsumers.values()); } @@ -297,14 +297,28 @@ final class DocFieldProcessor extends DocConsumer { } } - final private Map docValues = new HashMap(); + private static class DocValuesConsumerAndDocID { + public int docID; + final DocValuesConsumer docValuesConsumer; + + public DocValuesConsumerAndDocID(DocValuesConsumer docValuesConsumer) { + this.docValuesConsumer = docValuesConsumer; + } + } + + final private Map docValues = new HashMap(); final private Map perDocConsumers = new HashMap(); DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo) throws IOException { - DocValuesConsumer docValuesConsumer = docValues.get(fieldInfo.name); - if (docValuesConsumer != null) { - return docValuesConsumer; + DocValuesConsumerAndDocID docValuesConsumerAndDocID = docValues.get(fieldInfo.name); + if (docValuesConsumerAndDocID != null) { + if (docState.docID == docValuesConsumerAndDocID.docID) { + throw new IllegalArgumentException("IndexDocValuesField \"" + fieldInfo.name + "\" appears more than once in this document (only one value is allowed, per field)"); + } + assert docValuesConsumerAndDocID.docID < docState.docID; + docValuesConsumerAndDocID.docID = docState.docID; + return docValuesConsumerAndDocID.docValuesConsumer; } PerDocConsumer perDocConsumer = perDocConsumers.get(fieldInfo.getCodecId()); if (perDocConsumer == null) { @@ -316,6 +330,7 @@ final class DocFieldProcessor extends DocConsumer { perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer); } boolean success = false; + DocValuesConsumer docValuesConsumer = null; try { docValuesConsumer = perDocConsumer.addValuesField(fieldInfo); fieldInfo.commitDocValues(); @@ -325,7 +340,10 @@ final class DocFieldProcessor extends DocConsumer { fieldInfo.revertUncommitted(); } } - docValues.put(fieldInfo.name, docValuesConsumer); + + docValuesConsumerAndDocID = new DocValuesConsumerAndDocID(docValuesConsumer); + docValuesConsumerAndDocID.docID = docState.docID; + docValues.put(fieldInfo.name, docValuesConsumerAndDocID); return docValuesConsumer; } } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index f127de9acc6..4d314eb3f24 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -25,9 +25,9 @@ import java.util.EnumSet; import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.IndexDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.IndexDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; @@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.MultiPerDocValues; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.PerDocValues; @@ -45,10 +46,10 @@ import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util._TestUtil; import org.junit.Before; @@ -562,4 +563,64 @@ public class TestDocValuesIndexing extends LuceneTestCase { } return deleted; } + + public void testMultiValuedIndexDocValuesField() throws Exception { + assumeFalse("cannot work with preflex codec", CodecProvider.getDefault().getDefaultFieldCodec().equals("PreFlex")); + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, d); + Document doc = new Document(); + IndexDocValuesField f = new IndexDocValuesField("field"); + f.setInt(17); + // Index doc values are single-valued so we should not + // be able to add same field more than once: + doc.add(f); + doc.add(f); + try { + w.addDocument(doc); + fail("didn't hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + + doc = new Document(); + doc.add(f); + w.addDocument(doc); + w.optimize(); + IndexReader r = w.getReader(); + w.close(); + assertEquals(17, r.getSequentialSubReaders()[0].perDocValues().docValues("field").load().getInt(0)); + r.close(); + d.close(); + } + + public void testDifferentTypedDocValuesField() throws Exception { + assumeFalse("cannot work with preflex codec", CodecProvider.getDefault().getDefaultFieldCodec().equals("PreFlex")); + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, d); + Document doc = new Document(); + IndexDocValuesField f = new IndexDocValuesField("field"); + f.setInt(17); + // Index doc values are single-valued so we should not + // be able to add same field more than once: + doc.add(f); + IndexDocValuesField f2 = new IndexDocValuesField("field"); + f2.setFloat(22.0); + doc.add(f2); + try { + w.addDocument(doc); + fail("didn't hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + + doc = new Document(); + doc.add(f); + w.addDocument(doc); + w.optimize(); + IndexReader r = w.getReader(); + w.close(); + assertEquals(17, r.getSequentialSubReaders()[0].perDocValues().docValues("field").load().getInt(0)); + r.close(); + d.close(); + } }