From 366cd8c2ca2aa98a1b4bc617e2c766ccbdfc860f Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 15 Nov 2012 18:49:55 +0000 Subject: [PATCH] merging 2 sorted bytes fields works git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1409925 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/SimpleDVConsumer.java | 2 - .../codecs/SortedDocValuesConsumer.java | 4 +- .../apache/lucene/index/SegmentMerger.java | 7 +++ .../org/apache/lucene/TestDemoDocValue.java | 43 +++++++++++++++++++ 4 files changed, 52 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java index 5e1f800481a..f51fd85cbd3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java @@ -44,7 +44,6 @@ public abstract class SimpleDVConsumer implements Closeable { for (FieldInfo field : mergeState.fieldInfos) { if (field.hasDocValues()) { mergeState.fieldInfo = field; - // nocommit: switch on 3 types: NUMBER, BYTES, SORTED DocValues.Type type = field.getDocValuesType(); switch(type) { case VAR_INTS: @@ -138,7 +137,6 @@ public abstract class SimpleDVConsumer implements Closeable { } protected void mergeSortedField(MergeState mergeState) throws IOException { - SortedDocValuesConsumer.Merger merger = new SortedDocValuesConsumer.Merger(); merger.merge(mergeState); SortedDocValuesConsumer consumer = addSortedField(mergeState.fieldInfo, merger.numMergedTerms, merger.fixedLength >= 0, merger.maxLength); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java index f0b3befe88d..21c2b019a8d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java @@ -65,7 +65,7 @@ public abstract class SortedDocValuesConsumer { int[] segOrdToMergedOrd; public BytesRef nextTerm() { - while (ord < source.getValueCount()) { + while (ord < source.getValueCount()-1) { ord++; if (liveTerms == null || liveTerms.get(ord)) { source.getByOrd(ord, scratch); @@ -159,7 +159,7 @@ public abstract class SortedDocValuesConsumer { maxLength = Math.max(maxLength, lastTerm.length); } - top.segOrdToMergedOrd[top.ord] = ord; + top.segOrdToMergedOrd[top.ord] = ord-1; if (top.nextTerm() == null) { q.pop(); } else { diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java index cd1a616f385..525786eb5ca 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java @@ -27,6 +27,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.PerDocConsumer; +import org.apache.lucene.codecs.SimpleDVConsumer; import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.TermVectorsWriter; import org.apache.lucene.store.Directory; @@ -111,6 +112,12 @@ final class SegmentMerger { mergeNorms(segmentWriteState); } + if (mergeState.fieldInfos.hasDocValues()) { + SimpleDVConsumer consumer = codec.simpleDocValuesFormat().fieldsConsumer(segmentWriteState); + consumer.merge(mergeState); + consumer.close(); + } + if (mergeState.fieldInfos.hasVectors()) { numMerged = mergeVectors(); assert numMerged == mergeState.segmentInfo.getDocCount(); diff --git a/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java b/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java index 96c564ad504..1049daaa93a 100644 --- a/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java +++ b/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java @@ -26,6 +26,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.PackedLongDocValuesField; import org.apache.lucene.document.SortedBytesDocValuesField; import org.apache.lucene.document.StraightBytesDocValuesField; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; @@ -273,4 +274,46 @@ public class TestDemoDocValue extends LuceneTestCase { directory.close(); } + public void testSortedBytesTwoDocumentsMerged() throws IOException { + Analyzer analyzer = new MockAnalyzer(random()); + + // Store the index in memory: + Directory directory = newDirectory(); + // To store an index on disk, use this instead: + // Directory directory = FSDirectory.open(new File("/tmp/testindex")); + // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + IndexWriter iwriter = new IndexWriter(directory, iwc); + Document doc = new Document(); + doc.add(newField("id", "0", StringField.TYPE_STORED)); + doc.add(new SortedBytesDocValuesField("dv", new BytesRef("hello world 1"))); + iwriter.addDocument(doc); + iwriter.commit(); + doc = new Document(); + doc.add(newField("id", "1", StringField.TYPE_STORED)); + doc.add(new SortedBytesDocValuesField("dv", new BytesRef("hello world 2"))); + iwriter.addDocument(doc); + iwriter.forceMerge(1); + iwriter.close(); + + // Now search the index: + IndexReader ireader = DirectoryReader.open(directory); // read-only=true + assert ireader.leaves().size() == 1; + DocValues dv = ireader.leaves().get(0).reader().docValues("dv"); + for(int i=0;i<2;i++) { + StoredDocument doc2 = ireader.leaves().get(0).reader().document(i); + String expected; + if (doc2.get("id").equals("0")) { + expected = "hello world 1"; + } else { + expected = "hello world 2"; + } + assertEquals(expected, dv.getSource().getBytes(i, new BytesRef()).utf8ToString()); + } + + ireader.close(); + directory.close(); + } + }