mirror of https://github.com/apache/lucene.git
add test, cleanup a bit
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1442321 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
15803eece1
commit
9e62bd9282
|
@ -214,7 +214,7 @@ public class MultiDocValues {
|
|||
if (!anyReal) {
|
||||
return null;
|
||||
} else {
|
||||
OrdinalMapping mapping = new OrdinalMapping(values);
|
||||
OrdinalMap mapping = new OrdinalMap(values);
|
||||
return new MultiSortedDocValues(values, starts, mapping);
|
||||
}
|
||||
}
|
||||
|
@ -222,7 +222,7 @@ public class MultiDocValues {
|
|||
/** maps per-segment ordinals to/from global ordinal space */
|
||||
// TODO: use more efficient packed ints structures (these are all positive values!)
|
||||
// nocommit: cache this in SlowWrapper, it can create MultiSortedDV with it directly.
|
||||
static class OrdinalMapping {
|
||||
static class OrdinalMap {
|
||||
// globalOrd -> (globalOrd - segmentOrd)
|
||||
final AppendingLongBuffer globalOrdDeltas;
|
||||
// globalOrd -> sub index
|
||||
|
@ -230,7 +230,7 @@ public class MultiDocValues {
|
|||
// segmentOrd -> (globalOrd - segmentOrd)
|
||||
final AppendingLongBuffer ordDeltas[];
|
||||
|
||||
OrdinalMapping(SortedDocValues subs[]) throws IOException {
|
||||
OrdinalMap(SortedDocValues subs[]) throws IOException {
|
||||
// create the ordinal mappings by pulling a termsenum over each sub's
|
||||
// unique terms, and walking a multitermsenum over those
|
||||
globalOrdDeltas = new AppendingLongBuffer();
|
||||
|
@ -253,13 +253,15 @@ public class MultiDocValues {
|
|||
TermsEnumWithSlice matches[] = mte.getMatchArray();
|
||||
for (int i = 0; i < mte.getMatchCount(); i++) {
|
||||
int subIndex = matches[i].index;
|
||||
int delta = globalOrd - segmentOrds[subIndex];
|
||||
assert delta >= 0;
|
||||
// for each unique term, just mark the first subindex/delta where it occurs
|
||||
if (i == 0) {
|
||||
subIndexes.add(subIndex);
|
||||
globalOrdDeltas.add(globalOrd - segmentOrds[subIndex]);
|
||||
globalOrdDeltas.add(delta);
|
||||
}
|
||||
// for each per-segment ord, map it back to the global term.
|
||||
ordDeltas[subIndex].add(globalOrd - segmentOrds[subIndex]);
|
||||
ordDeltas[subIndex].add(delta);
|
||||
segmentOrds[subIndex]++;
|
||||
}
|
||||
globalOrd++;
|
||||
|
@ -267,13 +269,13 @@ public class MultiDocValues {
|
|||
}
|
||||
}
|
||||
|
||||
/** implements SortedDocValues over n subs, using a SortedBytesMapping */
|
||||
/** implements SortedDocValues over n subs, using an OrdinalMap */
|
||||
static class MultiSortedDocValues extends SortedDocValues {
|
||||
final int docStarts[];
|
||||
final SortedDocValues values[];
|
||||
final OrdinalMapping mapping;
|
||||
final OrdinalMap mapping;
|
||||
|
||||
MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMapping mapping) throws IOException {
|
||||
MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
|
||||
this.values = values;
|
||||
this.docStarts = docStarts;
|
||||
this.mapping = mapping;
|
||||
|
|
|
@ -0,0 +1,192 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/** Tests MultiDocValues versus ordinary segment merging */
|
||||
public class TestMultiDocValues extends LuceneTestCase {
|
||||
|
||||
public void testNumerics() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Document doc = new Document();
|
||||
Field field = new NumericDocValuesField("numbers", 0);
|
||||
doc.add(field);
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null);
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
int numDocs = atLeast(500);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
field.setLongValue(random().nextLong());
|
||||
iw.addDocument(doc);
|
||||
if (random().nextInt(17) == 0) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.forceMerge(1);
|
||||
DirectoryReader ir2 = iw.getReader();
|
||||
AtomicReader merged = getOnlySegmentReader(ir2);
|
||||
iw.close();
|
||||
|
||||
NumericDocValues multi = MultiDocValues.getNumericValues(ir, "numbers");
|
||||
NumericDocValues single = merged.getNumericDocValues("numbers");
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
assertEquals(single.get(i), multi.get(i));
|
||||
}
|
||||
ir.close();
|
||||
ir2.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testBinary() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Document doc = new Document();
|
||||
BytesRef ref = new BytesRef();
|
||||
Field field = new BinaryDocValuesField("bytes", ref);
|
||||
doc.add(field);
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null);
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
int numDocs = atLeast(500);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
ref.copyChars(_TestUtil.randomUnicodeString(random()));
|
||||
iw.addDocument(doc);
|
||||
if (random().nextInt(17) == 0) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.forceMerge(1);
|
||||
DirectoryReader ir2 = iw.getReader();
|
||||
AtomicReader merged = getOnlySegmentReader(ir2);
|
||||
iw.close();
|
||||
|
||||
BinaryDocValues multi = MultiDocValues.getBinaryValues(ir, "bytes");
|
||||
BinaryDocValues single = merged.getBinaryDocValues("bytes");
|
||||
BytesRef actual = new BytesRef();
|
||||
BytesRef expected = new BytesRef();
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
single.get(i, expected);
|
||||
multi.get(i, actual);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
ir2.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSorted() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Document doc = new Document();
|
||||
BytesRef ref = new BytesRef();
|
||||
Field field = new SortedDocValuesField("bytes", ref);
|
||||
doc.add(field);
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null);
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
int numDocs = atLeast(500);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
ref.copyChars(_TestUtil.randomUnicodeString(random()));
|
||||
iw.addDocument(doc);
|
||||
if (random().nextInt(17) == 0) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.forceMerge(1);
|
||||
DirectoryReader ir2 = iw.getReader();
|
||||
AtomicReader merged = getOnlySegmentReader(ir2);
|
||||
iw.close();
|
||||
|
||||
SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
|
||||
SortedDocValues single = merged.getSortedDocValues("bytes");
|
||||
assertEquals(single.getValueCount(), multi.getValueCount());
|
||||
BytesRef actual = new BytesRef();
|
||||
BytesRef expected = new BytesRef();
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
// check ord
|
||||
assertEquals(single.getOrd(i), multi.getOrd(i));
|
||||
// check ord value
|
||||
single.get(i, expected);
|
||||
multi.get(i, actual);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
ir2.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// tries to make more dups than testSorted
|
||||
public void testSortedWithLotsOfDups() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Document doc = new Document();
|
||||
BytesRef ref = new BytesRef();
|
||||
Field field = new SortedDocValuesField("bytes", ref);
|
||||
doc.add(field);
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null);
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
int numDocs = atLeast(500);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
ref.copyChars(_TestUtil.randomSimpleString(random(), 2));
|
||||
iw.addDocument(doc);
|
||||
if (random().nextInt(17) == 0) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.forceMerge(1);
|
||||
DirectoryReader ir2 = iw.getReader();
|
||||
AtomicReader merged = getOnlySegmentReader(ir2);
|
||||
iw.close();
|
||||
|
||||
SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
|
||||
SortedDocValues single = merged.getSortedDocValues("bytes");
|
||||
assertEquals(single.getValueCount(), multi.getValueCount());
|
||||
BytesRef actual = new BytesRef();
|
||||
BytesRef expected = new BytesRef();
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
// check ord
|
||||
assertEquals(single.getOrd(i), multi.getOrd(i));
|
||||
// check ord value
|
||||
single.get(i, expected);
|
||||
multi.get(i, actual);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
ir2.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue