mirror of https://github.com/apache/lucene.git
ensure values are sent to codec in sorted order: beef up assertingcodec and add more simple tests
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4765@1444653 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2dcf80718c
commit
7bd948623e
|
@ -46,6 +46,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
private int currentDoc;
|
||||
private int currentValues[] = new int[8];
|
||||
private int currentUpto = 0;
|
||||
private int maxCount = 0;
|
||||
|
||||
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
|
@ -83,7 +84,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
updateBytesUsed();
|
||||
}
|
||||
|
||||
// finalize currentDoc
|
||||
// finalize currentDoc: this deduplicates the current term ids
|
||||
private void finishCurrentDoc() {
|
||||
Arrays.sort(currentValues, 0, currentUpto);
|
||||
int lastValue = -1;
|
||||
|
@ -99,6 +100,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
}
|
||||
// record the number of unique ords for this doc
|
||||
pendingCounts.add(count);
|
||||
maxCount = Math.max(maxCount, count);
|
||||
currentUpto = 0;
|
||||
currentDoc++;
|
||||
}
|
||||
|
@ -127,7 +129,9 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
|
||||
if (currentUpto == currentValues.length) {
|
||||
currentValues = ArrayUtil.grow(currentValues, currentValues.length+1);
|
||||
iwBytesUsed.addAndGet((currentValues.length - currentUpto) * RamUsageEstimator.NUM_BYTES_INT);
|
||||
// reserve additional space for max # values per-doc
|
||||
// when flushing, we need an int[] to sort the mapped-ords within the doc
|
||||
iwBytesUsed.addAndGet((currentValues.length - currentUpto) * 2 * RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
currentValues[currentUpto] = ord;
|
||||
|
@ -143,7 +147,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
@Override
|
||||
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
|
||||
final int maxDoc = state.segmentInfo.getDocCount();
|
||||
|
||||
final int maxCountPerDoc = maxCount;
|
||||
assert pendingCounts.size() == maxDoc;
|
||||
final int valueCount = hash.size();
|
||||
|
||||
|
@ -176,7 +180,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
new Iterable<Number>() {
|
||||
@Override
|
||||
public Iterator<Number> iterator() {
|
||||
return new OrdsIterator(ordMap);
|
||||
return new OrdsIterator(ordMap, maxCountPerDoc);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -221,11 +225,17 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
// iterates over the ords for each doc we have in ram
|
||||
private class OrdsIterator implements Iterator<Number> {
|
||||
final AppendingLongBuffer.Iterator iter = pending.iterator();
|
||||
final AppendingLongBuffer.Iterator counts = pendingCounts.iterator();
|
||||
final int ordMap[];
|
||||
final long numOrds;
|
||||
long ordUpto;
|
||||
|
||||
OrdsIterator(int ordMap[]) {
|
||||
final int currentDoc[];
|
||||
int currentUpto;
|
||||
int currentLength;
|
||||
|
||||
OrdsIterator(int ordMap[], int maxCount) {
|
||||
this.currentDoc = new int[maxCount];
|
||||
this.ordMap = ordMap;
|
||||
this.numOrds = pending.size();
|
||||
}
|
||||
|
@ -240,10 +250,20 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
if (!hasNext()) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
int ord = (int) iter.next();
|
||||
if (currentUpto == currentLength) {
|
||||
// refill next doc, and sort remapped ords within the doc.
|
||||
currentUpto = 0;
|
||||
currentLength = (int) counts.next();
|
||||
for (int i = 0; i < currentLength; i++) {
|
||||
currentDoc[i] = ordMap[(int) iter.next()];
|
||||
}
|
||||
Arrays.sort(currentDoc, 0, currentLength);
|
||||
}
|
||||
int ord = currentDoc[currentUpto];
|
||||
currentUpto++;
|
||||
ordUpto++;
|
||||
// TODO: make reusable Number
|
||||
return ordMap[ord];
|
||||
return ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -163,6 +163,39 @@ public class TestDemoDocValue extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
public void testTwoValuesUnordered() throws IOException {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
||||
// Store the index in memory:
|
||||
Directory directory = newDirectory();
|
||||
// To store an index on disk, use this instead:
|
||||
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, analyzer);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
|
||||
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.close();
|
||||
|
||||
// Now search the index:
|
||||
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||
OrdIterator oi = dv.getOrds(0, null);
|
||||
assertEquals(0, oi.nextOrd());
|
||||
assertEquals(1, oi.nextOrd());
|
||||
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||
|
||||
BytesRef bytes = new BytesRef();
|
||||
dv.lookupOrd(0, bytes);
|
||||
assertEquals(new BytesRef("hello"), bytes);
|
||||
|
||||
dv.lookupOrd(1, bytes);
|
||||
assertEquals(new BytesRef("world"), bytes);
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testThreeValuesTwoDocs() throws IOException {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
||||
|
@ -213,4 +246,146 @@ public class TestDemoDocValue extends LuceneTestCase {
|
|||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testTwoDocumentsLastMissing() throws IOException {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
||||
// Store the index in memory:
|
||||
Directory directory = newDirectory();
|
||||
// To store an index on disk, use this instead:
|
||||
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
doc = new Document();
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.forceMerge(1);
|
||||
iwriter.close();
|
||||
|
||||
// Now search the index:
|
||||
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||
OrdIterator oi = dv.getOrds(0, null);
|
||||
assertEquals(0, oi.nextOrd());
|
||||
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||
|
||||
BytesRef bytes = new BytesRef();
|
||||
dv.lookupOrd(0, bytes);
|
||||
assertEquals(new BytesRef("hello"), bytes);
|
||||
|
||||
assertEquals(1, dv.getValueCount());
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testTwoDocumentsLastMissingMerge() throws IOException {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
||||
// Store the index in memory:
|
||||
Directory directory = newDirectory();
|
||||
// To store an index on disk, use this instead:
|
||||
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
doc = new Document();
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.forceMerge(1);
|
||||
iwriter.close();
|
||||
|
||||
// Now search the index:
|
||||
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||
OrdIterator oi = dv.getOrds(0, null);
|
||||
assertEquals(0, oi.nextOrd());
|
||||
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||
|
||||
BytesRef bytes = new BytesRef();
|
||||
dv.lookupOrd(0, bytes);
|
||||
assertEquals(new BytesRef("hello"), bytes);
|
||||
|
||||
assertEquals(1, dv.getValueCount());
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testTwoDocumentsFirstMissing() throws IOException {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
||||
// Store the index in memory:
|
||||
Directory directory = newDirectory();
|
||||
// To store an index on disk, use this instead:
|
||||
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||
Document doc = new Document();
|
||||
iwriter.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.forceMerge(1);
|
||||
iwriter.close();
|
||||
|
||||
// Now search the index:
|
||||
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||
OrdIterator oi = dv.getOrds(1, null);
|
||||
assertEquals(0, oi.nextOrd());
|
||||
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||
|
||||
BytesRef bytes = new BytesRef();
|
||||
dv.lookupOrd(0, bytes);
|
||||
assertEquals(new BytesRef("hello"), bytes);
|
||||
|
||||
assertEquals(1, dv.getValueCount());
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testTwoDocumentsFirstMissingMerge() throws IOException {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
||||
// Store the index in memory:
|
||||
Directory directory = newDirectory();
|
||||
// To store an index on disk, use this instead:
|
||||
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||
Document doc = new Document();
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.forceMerge(1);
|
||||
iwriter.close();
|
||||
|
||||
// Now search the index:
|
||||
DirectoryReader ireader = DirectoryReader.open(directory); // read-only=true
|
||||
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||
OrdIterator oi = dv.getOrds(1, null);
|
||||
assertEquals(0, oi.nextOrd());
|
||||
assertEquals(OrdIterator.NO_MORE_ORDS, oi.nextOrd());
|
||||
|
||||
BytesRef bytes = new BytesRef();
|
||||
dv.lookupOrd(0, bytes);
|
||||
assertEquals(new BytesRef("hello"), bytes);
|
||||
|
||||
assertEquals(1, dv.getValueCount());
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -130,12 +130,53 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
|
|||
|
||||
@Override
|
||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
|
||||
// nocommit: add checks
|
||||
long valueCount = 0;
|
||||
BytesRef lastValue = null;
|
||||
for (BytesRef b : values) {
|
||||
assert b != null;
|
||||
assert b.isValid();
|
||||
if (valueCount > 0) {
|
||||
assert b.compareTo(lastValue) > 0;
|
||||
}
|
||||
lastValue = BytesRef.deepCopyOf(b);
|
||||
valueCount++;
|
||||
}
|
||||
|
||||
int docCount = 0;
|
||||
long ordCount = 0;
|
||||
// nocommit
|
||||
FixedBitSet seenOrds = new FixedBitSet((int)valueCount);
|
||||
Iterator<Number> ordIterator = ords.iterator();
|
||||
for (Number v : docToOrdCount) {
|
||||
assert v != null;
|
||||
int count = v.intValue();
|
||||
assert count >= 0;
|
||||
docCount++;
|
||||
ordCount += count;
|
||||
|
||||
long lastOrd = -1;
|
||||
for (int i = 0; i < count; i++) {
|
||||
Number o = ordIterator.next();
|
||||
assert o != null;
|
||||
long ord = o.longValue();
|
||||
assert ord >= 0 && ord < valueCount;
|
||||
assert ord > lastOrd : "ord=" + ord + ",lastOrd=" + lastOrd;
|
||||
seenOrds.set((int)ord); // nocommit
|
||||
lastOrd = ord;
|
||||
}
|
||||
}
|
||||
assert ordIterator.hasNext() == false;
|
||||
|
||||
assert docCount == maxDoc;
|
||||
assert seenOrds.cardinality() == valueCount;
|
||||
checkIterator(values.iterator(), valueCount);
|
||||
checkIterator(docToOrdCount.iterator(), maxDoc);
|
||||
checkIterator(ords.iterator(), ordCount);
|
||||
in.addSortedSetField(field, values, docToOrdCount, ords);
|
||||
}
|
||||
|
||||
private <T> void checkIterator(Iterator<T> iterator, int expectedSize) {
|
||||
for (int i = 0; i < expectedSize; i++) {
|
||||
private <T> void checkIterator(Iterator<T> iterator, long expectedSize) {
|
||||
for (long i = 0; i < expectedSize; i++) {
|
||||
boolean hasNext = iterator.hasNext();
|
||||
assert hasNext;
|
||||
T v = iterator.next();
|
||||
|
|
Loading…
Reference in New Issue