merging 2 sorted bytes fields works

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1409925 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-11-15 18:49:55 +00:00
parent 138d8f12e8
commit 366cd8c2ca
4 changed files with 52 additions and 4 deletions

View File

@ -44,7 +44,6 @@ public abstract class SimpleDVConsumer implements Closeable {
for (FieldInfo field : mergeState.fieldInfos) {
if (field.hasDocValues()) {
mergeState.fieldInfo = field;
// nocommit: switch on 3 types: NUMBER, BYTES, SORTED
DocValues.Type type = field.getDocValuesType();
switch(type) {
case VAR_INTS:
@ -138,7 +137,6 @@ public abstract class SimpleDVConsumer implements Closeable {
}
protected void mergeSortedField(MergeState mergeState) throws IOException {
SortedDocValuesConsumer.Merger merger = new SortedDocValuesConsumer.Merger();
merger.merge(mergeState);
SortedDocValuesConsumer consumer = addSortedField(mergeState.fieldInfo, merger.numMergedTerms, merger.fixedLength >= 0, merger.maxLength);

View File

@ -65,7 +65,7 @@ public abstract class SortedDocValuesConsumer {
int[] segOrdToMergedOrd;
public BytesRef nextTerm() {
while (ord < source.getValueCount()) {
while (ord < source.getValueCount()-1) {
ord++;
if (liveTerms == null || liveTerms.get(ord)) {
source.getByOrd(ord, scratch);
@ -159,7 +159,7 @@ public abstract class SortedDocValuesConsumer {
maxLength = Math.max(maxLength, lastTerm.length);
}
top.segOrdToMergedOrd[top.ord] = ord;
top.segOrdToMergedOrd[top.ord] = ord-1;
if (top.nextTerm() == null) {
q.pop();
} else {

View File

@ -27,6 +27,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.codecs.SimpleDVConsumer;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.store.Directory;
@ -111,6 +112,12 @@ final class SegmentMerger {
mergeNorms(segmentWriteState);
}
if (mergeState.fieldInfos.hasDocValues()) {
SimpleDVConsumer consumer = codec.simpleDocValuesFormat().fieldsConsumer(segmentWriteState);
consumer.merge(mergeState);
consumer.close();
}
if (mergeState.fieldInfos.hasVectors()) {
numMerged = mergeVectors();
assert numMerged == mergeState.segmentInfo.getDocCount();

View File

@ -26,6 +26,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.PackedLongDocValuesField;
import org.apache.lucene.document.SortedBytesDocValuesField;
import org.apache.lucene.document.StraightBytesDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
@ -273,4 +274,46 @@ public class TestDemoDocValue extends LuceneTestCase {
directory.close();
}
public void testSortedBytesTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
doc.add(newField("id", "0", StringField.TYPE_STORED));
doc.add(new SortedBytesDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(newField("id", "1", StringField.TYPE_STORED));
doc.add(new SortedBytesDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
DocValues dv = ireader.leaves().get(0).reader().docValues("dv");
for(int i=0;i<2;i++) {
StoredDocument doc2 = ireader.leaves().get(0).reader().document(i);
String expected;
if (doc2.get("id").equals("0")) {
expected = "hello world 1";
} else {
expected = "hello world 2";
}
assertEquals(expected, dv.getSource().getBytes(i, new BytesRef()).utf8ToString());
}
ireader.close();
directory.close();
}
}